From 8781cbf85c4855d5e4cdd206e51cddf2b8ba6253 Mon Sep 17 00:00:00 2001
From: Hannah Cushman Garland <hannah.cushman@datamade.us>
Date: Thu, 30 Jan 2025 14:17:33 -0600
Subject: [PATCH 1/6] Don't retry 410, allow search results to be passed to
 APIEventScraper.events

---
 legistar/base.py   | 9 +++++----
 legistar/events.py | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/legistar/base.py b/legistar/base.py
index 77ae289..e7f81b2 100644
--- a/legistar/base.py
+++ b/legistar/base.py
@@ -359,9 +359,10 @@ def pages(self, url, params=None, item_key=None):
             page_num += 1
 
     def accept_response(self, response, **kwargs):
-        '''
+        """
         This overrides a method that controls whether
         the scraper should retry on an error. We don't
-        want to retry if the API returns a 400
-        '''
-        return response.status_code < 401
+        want to retry if the API returns a 400, except for
+        410, which means the record no longer exists.
+        """
+        return response.status_code < 401 or response.status_code == 410
diff --git a/legistar/events.py b/legistar/events.py
index e045130..42827d0 100644
--- a/legistar/events.py
+++ b/legistar/events.py
@@ -238,8 +238,9 @@ def api_events(self, since_datetime=None):
                               params=params,
                               item_key="EventId")
 
-    def events(self, since_datetime=None):
-        for api_event in self.api_events(since_datetime=since_datetime):
+    def events(self, since_datetime=None, api_events=None):
+
+        for api_event in api_events or self.api_events(since_datetime=since_datetime):
 
             time_str = api_event['EventTime']
             if not time_str:  # If we don't have an event time, skip it

From aa525d37126fdc56fc377ba4117dcfc6963b0572 Mon Sep 17 00:00:00 2001
From: Hannah Cushman Garland <hannah.cushman@datamade.us>
Date: Thu, 30 Jan 2025 14:46:32 -0600
Subject: [PATCH 2/6] Break event processing into its own method

---
 legistar/events.py | 63 +++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 28 deletions(-)

diff --git a/legistar/events.py b/legistar/events.py
index 42827d0..44e5011 100644
--- a/legistar/events.py
+++ b/legistar/events.py
@@ -238,39 +238,46 @@ def api_events(self, since_datetime=None):
                               params=params,
                               item_key="EventId")
 
-    def events(self, since_datetime=None, api_events=None):
-
-        for api_event in api_events or self.api_events(since_datetime=since_datetime):
-
-            time_str = api_event['EventTime']
-            if not time_str:  # If we don't have an event time, skip it
-                continue
-
-            try:
-                # Start times are entered manually. Sometimes, they don't
-                # conform to this format. Log events with invalid start times,
-                # but don't interrupt the scrape for them.
-                start_time = time.strptime(time_str, self.time_string_format)
-            except ValueError:
-                event_url = '{0}/events/{1}'.format(self.BASE_URL, api_event['EventId'])
-                self.logger.error('API event has invalid start time "{0}": {1}'.format(time_str, event_url))
-                continue
+    def events(self, since_datetime=None):
+        for api_event in self.api_events(since_datetime=since_datetime):
+            if event := self.event(api_event):
+                yield event
+
+    def event(self, api_event):
+        time_str = api_event["EventTime"]
+        if not time_str:  # If we don't have an event time, skip it
+            return
+        try:
+            # Start times are entered manually. Sometimes, they don't
+            # conform to this format. Log  events with invalid start times,
+            # but don't interrupt the scrape for them.
+            start_time = time.strptime(time_str, self.time_string_format)
+        except ValueError:
+            event_url = "{0}/events/{1}".format(self.BASE_URL, api_event["EventId"])
+            self.logger.error(
+                'API event has invalid start time "{0}": {1}'.format(
+                    time_str, event_url
+                )
+            )
+            return
 
-            start = self.toTime(api_event['EventDate'])
-            api_event['start'] = start.replace(hour=start_time.tm_hour,
-                                               minute=start_time.tm_min)
+        start = self.toTime(api_event["EventDate"])
+        api_event["start"] = start.replace(
+            hour=start_time.tm_hour, minute=start_time.tm_min
+        )
 
-            api_event['status'] = self._event_status(api_event)
+        api_event["status"] = self._event_status(api_event)
 
-            web_event = self._get_web_event(api_event)
+        web_event = self._get_web_event(api_event)
 
-            if web_event:
-                yield api_event, web_event
+        if web_event:
+            return api_event, web_event
 
-            else:
-                event_url = '{0}/events/{1}'.format(self.BASE_URL, api_event['EventId'])
-                self.warning('API event could not be found in web interface: {0}'.format(event_url))
-                continue
+        else:
+            event_url = "{0}/events/{1}".format(self.BASE_URL, api_event["EventId"])
+            self.warning(
+                "API event could not be found in web interface: {0}".format(event_url)
+            )
 
     def agenda(self, event):
         agenda_url = (self.BASE_URL +

From 50466105d670b7148a98d1a91faf651bbc7240fb Mon Sep 17 00:00:00 2001
From: Hannah Cushman Garland <hannah.cushman@datamade.us>
Date: Thu, 30 Jan 2025 14:49:04 -0600
Subject: [PATCH 3/6] Update CI

---
 .github/workflows/pythonapp.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index 92177f5..483e33b 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -8,11 +8,11 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v1
-    - name: Set up Python 3.7
-      uses: actions/setup-python@v1
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
       with:
-        python-version: 3.7
+        python-version: 3.x
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

From e6d08f649ad73b0822cacdb42230e98c3bcc0c1e Mon Sep 17 00:00:00 2001
From: Hannah Cushman Garland <hannah.cushman@datamade.us>
Date: Thu, 6 Mar 2025 15:15:05 -0600
Subject: [PATCH 4/6] Don't retry web requests with 410 status code

---
 legistar/base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/legistar/base.py b/legistar/base.py
index e7f81b2..77d637f 100644
--- a/legistar/base.py
+++ b/legistar/base.py
@@ -268,6 +268,11 @@ def sessionSecrets(self, page):
 
         return(payload)
 
+    def accept_response(self, response, **kwargs):
+        if response.status_code == 410:
+            return True
+        return super().accept_response(response, **kwargs)
+
 
 def fieldKey(x):
     field_id = x.attrib['id']

From a19e96759285801179c96e6596425f66e51a473c Mon Sep 17 00:00:00 2001
From: Hannah Cushman Garland <hannah.cushman@datamade.us>
Date: Thu, 6 Mar 2025 15:36:46 -0600
Subject: [PATCH 5/6] Only raise HTTP errors for unacceptable responses

---
 legistar/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/legistar/base.py b/legistar/base.py
index 77d637f..8df5326 100644
--- a/legistar/base.py
+++ b/legistar/base.py
@@ -341,7 +341,8 @@ def search(self, route, item_key, search_conditions):
         except requests.HTTPError as e:
             if e.response.status_code == 400:
                 raise ValueError(e.response.json()['Message'])
-            raise
+            if not self.accept_response(e.response):
+                raise
 
     def pages(self, url, params=None, item_key=None):
         if params is None:

From 61c41ab57be8e95d7526768f4e431255d43061b1 Mon Sep 17 00:00:00 2001
From: msj <monkruman.st.jules@datamde.us>
Date: Mon, 14 Apr 2025 15:37:52 -0400
Subject: [PATCH 6/6] Handle 503 http errors when scraping event web details

---
 legistar/events.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/legistar/events.py b/legistar/events.py
index 44e5011..839a1e1 100644
--- a/legistar/events.py
+++ b/legistar/events.py
@@ -386,6 +386,13 @@ def web_detail(self, event):
         except scrapelib.HTTPError as e:
             if e.response.status_code == 410:
                 return None
+            elif e.response.status_code == 503:
+                # Events with draft agendas sometimes have an EventInSiteURL
+                # that resolves to a 503 status code
+                self.logger.error(
+                    f"Error while fetching event detail at {insite_url}: {e}"
+                )
+                return None
             else:
                 raise