From f746622dcb82b65bcf4f1beca823201c44593fec Mon Sep 17 00:00:00 2001
From: Hannah Cushman Garland <hannah.cushman@datamade.us>
Date: Tue, 5 Nov 2024 09:15:56 -0600
Subject: [PATCH] Handle reports that cannot be opened

---
 scrapers/office/scrape_search.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/scrapers/office/scrape_search.py b/scrapers/office/scrape_search.py
index 756d3c1..ef44456 100644
--- a/scrapers/office/scrape_search.py
+++ b/scrapers/office/scrape_search.py
@@ -116,12 +116,19 @@ def _parse_filing_pdf(self, version):
                 return {}
 
             else:
-                version_pdf = pdfplumber.open(io.BytesIO(pdf.content))
+                # Skip reports that can't be opened
+                try:
+                    version_pdf = pdfplumber.open(io.BytesIO(pdf.content))
+                except Exception as e:
+                    logger.error(
+                        f"Could not open report document at {report_url} due to the following exception:\n{str(e)}"
+                    )
+                    return {}
 
+                # Skip reports that can't be parsed
                 try:
                     version_content = parse_pdf(version_pdf)
                 except Exception as e:
-                    # Skip reports that can't be parsed
                     logger.error(
                         f"Could not parse report document at {report_url} due to the following exception:\n{str(e)}"
                     )