node js puppeteer webscraping code example
Example 1: puppeteer example nodejs
require('dotenv/config')
const puppeteer = require('puppeteer')
(async () => {
try {
const browser = await puppeteer.launch({
headless: false,
args: [
'--allow-external-pages',
'--allow-third-party-modules',
'--data-reduction-proxy-http-proxies',
'--no-sandbox'
]
})
const context = await browser.createIncognitoBrowserContext()
const page = await context.newPage()
await page.goto('https://medium.com', { waitUntil: 'networkidle2' })
await page.setUserAgent(process.env.USER_AGENT)
await page.evaluate(() => window.scrollBy(0, 1000))
await page.waitForTimeout()
await page.click('.qw a')
await page.waitForNavigation({ waitUntil: 'networkidle2', delay: 1000 })
await page.click('a[aria-label="Search"]')
await page.waitForNavigation({ waitUntil: 'networkidle2', delay: 1000 })
await page.focus('.js-searchInput')
await page.type('.js-searchInput', 'react', { delay: 100 })
await page.waitForSelector('.js-searchInput', { timeout: 1000 })
await page.keyboard.press(String.fromCharCode(13))
await page.waitForSelector('.js-postListHandle', { timeout: 1000 })
await page.screenshot({ path: 'screnshoot.jpg', quality: 80 })
await browser.close()
} catch (err) {
console.log(`'Puppeteer Error Detencted -> ${err}'`)
}
})()
Example 2: web scraping node js puppeteer
const puppeteer = require('puppeteer')
const $ = require('cheerio')
(async () => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto('https://www.imdb.com/title/tt7126948/?ref_=nv_sr_srsg_0', { waitUntil: 'networkidle0' })
const content = await page.evaluate(() => document.body.innerHTML)
const data = $('div[class="title_wrapper"]', content).find('h1').text()
console.log(data)
await browser.close()
})()