Skip to content

Don't retry 410, move event processing into reusable method #125

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v1
- name: Set up Python 3.7
uses: actions/setup-python@v1
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.x
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
9 changes: 5 additions & 4 deletions legistar/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,10 @@ def pages(self, url, params=None, item_key=None):
page_num += 1

def accept_response(self, response, **kwargs):
'''
"""
This overrides a method that controls whether
the scraper should retry on an error. We don't
want to retry if the API returns a 400
'''
return response.status_code < 401
want to retry if the API returns a 400, except for
410, which means the record no longer exists.
"""
return response.status_code < 401 or response.status_code == 410
56 changes: 32 additions & 24 deletions legistar/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,36 +240,44 @@ def api_events(self, since_datetime=None):

def events(self, since_datetime=None):
for api_event in self.api_events(since_datetime=since_datetime):
if event := self.event(api_event):
yield event

time_str = api_event['EventTime']
if not time_str: # If we don't have an event time, skip it
continue

try:
# Start times are entered manually. Sometimes, they don't
# conform to this format. Log events with invalid start times,
# but don't interrupt the scrape for them.
start_time = time.strptime(time_str, self.time_string_format)
except ValueError:
event_url = '{0}/events/{1}'.format(self.BASE_URL, api_event['EventId'])
self.logger.error('API event has invalid start time "{0}": {1}'.format(time_str, event_url))
continue
def event(self, api_event):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separated into its own method, just a whitespace (indentation) change.

time_str = api_event["EventTime"]
if not time_str: # If we don't have an event time, skip it
return
try:
# Start times are entered manually. Sometimes, they don't
# conform to this format. Log events with invalid start times,
# but don't interrupt the scrape for them.
start_time = time.strptime(time_str, self.time_string_format)
except ValueError:
event_url = "{0}/events/{1}".format(self.BASE_URL, api_event["EventId"])
self.logger.error(
'API event has invalid start time "{0}": {1}'.format(
time_str, event_url
)
)
return

start = self.toTime(api_event['EventDate'])
api_event['start'] = start.replace(hour=start_time.tm_hour,
minute=start_time.tm_min)
start = self.toTime(api_event["EventDate"])
api_event["start"] = start.replace(
hour=start_time.tm_hour, minute=start_time.tm_min
)

api_event['status'] = self._event_status(api_event)
api_event["status"] = self._event_status(api_event)

web_event = self._get_web_event(api_event)
web_event = self._get_web_event(api_event)

if web_event:
yield api_event, web_event
if web_event:
return api_event, web_event

else:
event_url = '{0}/events/{1}'.format(self.BASE_URL, api_event['EventId'])
self.warning('API event could not be found in web interface: {0}'.format(event_url))
continue
else:
event_url = "{0}/events/{1}".format(self.BASE_URL, api_event["EventId"])
self.warning(
"API event could not be found in web interface: {0}".format(event_url)
)

def agenda(self, event):
agenda_url = (self.BASE_URL +
Expand Down