Handle reports that cannot be opened

datamade · Nov 5, 2024 · f746622 · f746622
1 parent bc82cbe
commit f746622
Showing 1 changed file with 9 additions and 2 deletions.
diff --git a/scrapers/office/scrape_search.py b/scrapers/office/scrape_search.py
@@ -116,12 +116,19 @@ def _parse_filing_pdf(self, version):
                 return {}
 
             else:
-                version_pdf = pdfplumber.open(io.BytesIO(pdf.content))
+                # Skip reports that can't be opened
+                try:
+                    version_pdf = pdfplumber.open(io.BytesIO(pdf.content))
+                except Exception as e:
+                    logger.error(
+                        f"Could not open report document at {report_url} due to the following exception:\n{str(e)}"
+                    )
+                    return {}
 
+                # Skip reports that can't be parsed
                 try:
                     version_content = parse_pdf(version_pdf)
                 except Exception as e:
-                    # Skip reports that can't be parsed
                     logger.error(
                         f"Could not parse report document at {report_url} due to the following exception:\n{str(e)}"
                     )