Tests are halfway passing

neurosynth · Feb 24, 2023 · 55ffe2f · 55ffe2f
1 parent 2b857de
commit 55ffe2f
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 45 deletions.
diff --git a/ace/scrape.py b/ace/scrape.py
@@ -284,7 +284,7 @@ def retrieve_journal_articles(self, journal, delay=None, mode='browser', search=
                 outf = open(filename, 'w')
                 # Still having encoding issues with some journals (e.g., 
                 # PLoS ONE). Why???
-                outf.write(doc.encode('utf-8'))
+                outf.write(doc)
                 outf.close()
                 articles_found += 1
 

diff --git a/ace/sources.py b/ace/sources.py
@@ -239,21 +239,22 @@ def parse_article(self, html, pmid=None, **kwargs):
             url = '%s/T%d.expansion.html' % (content_url, t_num)
             table_soup = self._download_table(url)
             tc = table_soup.find(class_='table-expansion')
-            t = tc.find('table', {'id': 'table-%d' % (t_num)})
-            t = self.parse_table(t)
-            if t:
-                t.position = t_num
-                t.label = tc.find(class_='table-label').text
-                t.number = t.label.split(' ')[-1].strip()
-                try:
-                    t.caption = tc.find(class_='table-caption').get_text()
-                except:
-                    pass
-                try:
-                    t.notes = tc.find(class_='table-footnotes').get_text()
-                except:
-                    pass
-                tables.append(t)
+            if tc:
+                t = tc.find('table', {'id': 'table-%d' % (t_num)})
+                t = self.parse_table(t)
+                if t:
+                    t.position = t_num
+                    t.label = tc.find(class_='table-label').text
+                    t.number = t.label.split(' ')[-1].strip()
+                    try:
+                        t.caption = tc.find(class_='table-caption').get_text()
+                    except:
+                        pass
+                    try:
+                        t.notes = tc.find(class_='table-footnotes').get_text()
+                    except:
+                        pass
+                    tables.append(t)
 
         self.article.tables = tables
         return self.article
@@ -413,19 +414,22 @@ def parse_article(self, html, pmid=None, **kwargs):
             url = 'http://www.mitpressjournals.org/action/showPopup?citid=citart1&id=T%d&doi=%s' % (
                 num, doi)
             table_soup = self._download_table(url)
-            tc = table_soup.find('table').find('table')  # JCogNeuro nests tables 2-deep
-            t = self.parse_table(tc)
-            if t:
-                t.position = num
-                t.number = num
-                cap = tc.caption.find('span', class_='title')
-                t.label = cap.b.get_text()
-                t.caption = cap.get_text()
-                try:
-                    t.notes = table_soup.find('div', class_="footnote").p.get_text()
-                except:
-                    pass
-                tables.append(t)
+            tc = table_soup.find('table')  # JCogNeuro nests tables 2-deep
+            if tc:
+                tc = tc.find('table')
+            if tc:
+                t = self.parse_table(tc)
+                if t:
+                    t.position = num
+                    t.number = num
+                    cap = tc.caption.find('span', class_='title')
+                    t.label = cap.b.get_text()
+                    t.caption = cap.get_text()
+                    try:
+                        t.notes = table_soup.find('div', class_="footnote").p.get_text()
+                    except:
+                        pass
+                    tables.append(t)
 
         self.article.tables = tables
         return self.article
@@ -511,21 +515,22 @@ def parse_article(self, html, pmid=None, **kwargs):
             url = '%s/T%d.expansion.html' % (content_url, t_num)
             table_soup = self._download_table(url)
             tc = table_soup.find(class_='table-expansion')
-            t = tc.find('table', {'id': 'table-%d' % (t_num)})
-            t = self.parse_table(t)
-            if t:
-                t.position = t_num
-                t.label = tc.find(class_='table-label').text
-                t.number = t.label.split(' ')[-1].strip()
-                try:
-                    t.caption = tc.find(class_='table-caption').get_text()
-                except:
-                    pass
-                try:
-                    t.notes = tc.find(class_='table-footnotes').get_text()
-                except:
-                    pass
-                tables.append(t)
+            if tc:
+                t = tc.find('table', {'id': 'table-%d' % (t_num)})
+                t = self.parse_table(t)
+                if t:
+                    t.position = t_num
+                    t.label = tc.find(class_='table-label').text
+                    t.number = t.label.split(' ')[-1].strip()
+                    try:
+                        t.caption = tc.find(class_='table-caption').get_text()
+                    except:
+                        pass
+                    try:
+                        t.notes = tc.find(class_='table-footnotes').get_text()
+                    except:
+                        pass
+                    tables.append(t)
 
         self.article.tables = tables
         return self.article

diff --git a/ace/tests/test_ace.py b/ace/tests/test_ace.py
@@ -73,7 +73,7 @@ def testDatabaseProcessingStream(self):
 
     def testJournalScraping(self):
         scrape_path = join(get_test_data_path(), 'scrape_test')
-        os.mkdir(scrape_path, exists_ok=True)
+        os.makedirs(scrape_path, exist_ok=True)
         # Test with PLoS ONE because it's OA
         scraper = scrape.Scraper(scrape_path)
         scraper.retrieve_journal_articles('PLoS ONE', delay=5.0, mode='direct',