From f746622dcb82b65bcf4f1beca823201c44593fec Mon Sep 17 00:00:00 2001 From: Hannah Cushman Garland Date: Tue, 5 Nov 2024 09:15:56 -0600 Subject: [PATCH] Handle reports that cannot be opened --- scrapers/office/scrape_search.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scrapers/office/scrape_search.py b/scrapers/office/scrape_search.py index 756d3c1..ef44456 100644 --- a/scrapers/office/scrape_search.py +++ b/scrapers/office/scrape_search.py @@ -116,12 +116,19 @@ def _parse_filing_pdf(self, version): return {} else: - version_pdf = pdfplumber.open(io.BytesIO(pdf.content)) + # Skip reports that can't be opened + try: + version_pdf = pdfplumber.open(io.BytesIO(pdf.content)) + except Exception as e: + logger.error( + f"Could not open report document at {report_url} due to the following exception:\n{str(e)}" + ) + return {} + # Skip reports that can't be parsed try: version_content = parse_pdf(version_pdf) except Exception as e: - # Skip reports that can't be parsed logger.error( f"Could not parse report document at {report_url} due to the following exception:\n{str(e)}" )