import { Dataset, PlaywrightCrawler, ProxyConfiguration } from 'crawlee'; import { chromium } from 'playwright'; const proxyConfiguration = new ProxyConfiguration({ proxyUrls: [ //'https://abcd:pwd@proxy.scrapeops.io:5353', 'http://abcd:pwd@unblock.oxylabs.io:60000', ] }); const ZIP_CODE = '79054'; let PAGE_NUMBER = 1; const crawler = new PlaywrightCrawler({ launchContext: { launchOptions: { headless: true, browser: chromium, proxy: proxyConfiguration, }, }, maxRequestsPerCrawl: 1000000000000000000000000000, async requestHandler({ request, page, enqueueLinks, log }) { log.info(`Processing ${request.url}...`); const data = await page.$$eval('.results-row', ($posts) => { const scrapedData: { title: string; href: string }[] = []; $posts.forEach(($post) => { scrapedData.push({ title: $post.querySelector('.profile-title')?.textContent, href: $post.querySelector('a.profile-title')?.getAttribute('href'), }); }); return scrapedData; }); await Dataset.pushData(data); // Check if the "Next" button is present on the page const nextPageButton = await page.$(`a[title="Next Therapists in ${ZIP_CODE}"]`); if (nextPageButton) { const nextPageUrl = await page.evaluate((button) => button.getAttribute('href'), nextPageButton); await enqueueLinks({ urls: [nextPageUrl], }); // Increment the page number for the next request PAGE_NUMBER++; } else { log.info(`${request.url} is the last page!`); } }, failedRequestHandler({ request, log }) { log.info(`Request ${request.url} failed too many times.`); }, }); const initialUrl = `https://www.example.com/us/abcd/${ZIP_CODE}?page=${PAGE_NUMBER}`; await crawler.addRequests([initialUrl]); await crawler.run(); console.log('Crawler finished.'); ************