-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScraper_script.js
72 lines (60 loc) · 2.61 KB
/
Scraper_script.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
const puppeteer = require('puppeteer');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
(async () => {
try {
let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.goto('https://www.goodreads.com/quotes', { waitUntil: 'networkidle2', timeout: 60000 });
let allData = [];
const maxPages = 100; // Adjust the limit for the number of pages you want to scrape
let currentPage = 0;
// Function to scrape quotes and image links from the current page
async function scrapeData() {
await page.waitForSelector('.quoteText');
await page.waitForSelector('a.leftAlignedImage img[src]');
let data = await page.evaluate(() => {
let quotesArray = [];
let textElements = document.querySelectorAll('.quoteText');
let imgElements = document.querySelectorAll('a.leftAlignedImage img[src]');
textElements.forEach((element, index) => {
let quote = element.innerText.trim();
let imgSrc = imgElements[index] ? imgElements[index].src : '';
quotesArray.push({ quote, image: imgSrc });
});
return quotesArray;
});
allData = allData.concat(data);
}
// Function to navigate to the next page
async function goToNextPage() {
const nextButtonSelector = 'a.next_page';
const nextButton = await page.$(nextButtonSelector);
if (nextButton) {
await nextButton.click();
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 });
return true;
} else {
return false;
}
}
// Scrape data and navigate to the next page until the limit is reached
do {
await scrapeData();
currentPage++;
} while (currentPage < maxPages && await goToNextPage());
// Define the CSV writer
const csvWriter = createCsvWriter({
path: 'quotes_and_images.csv',
header: [
{ id: 'quote', title: 'Quote' },
{ id: 'image', title: 'Image Source' }
]
});
// Write the data to the CSV file
await csvWriter.writeRecords(allData);
console.log('Quotes and images have been saved to quotes_and_images.csv');
await browser.close();
} catch (error) {
console.error('Error:', error);
}
})();