README

Page Scraper

Web page scraper with a jQuery-like syntax for Node. Powered by got and cheerio.

Installation

$ npm install page-scraper

Usage

const scrape = require('page-scraper');

(async () => {
  const $ = await scrape('https://example.com');

  // Extract the page with jQuery like syntax.
  console.log({
    title: $('title').text(),
    heading: $('h1').text(),
    paragraphs: $('p').map((index, el) => $(el).text()).get(),
    link: $('p > a').attr('href')
  });
})();

Check the cheerio documentation for a complete guide on how to scrape the page using jQuery like syntax.

Recipes

Handling Error

const scrape = require('page-scraper');

(async () => {
  try {
    const $ = await scrape('https://httpbin.org/status/400');
  } catch(error) {
    // The error message.
    console.error(error.message);

    if (error.hasOwnProperty('response')) {
      // The HTTP status code.
      console.error(error.response.statusCode);
    }

    if (error.hasOwnProperty('

)) {
      // The HTML document.
      console.error(error.$.html());
    }
  }
})();

Note that if the page is not an HTML document, it will throw an error too.

const scrape = require('./src');

(async () => {
  try {
    const $ = await scrape('https://httpbin.org/json');
  } catch(error) {
    console.error(error.message);

    if (error.hasOwnProperty('response')) {
      // The response body.
      console.error(error.response.body);
    }
  }
})();

Scraping Multiple Pages

const scrape = require('./src');

(async () => {
  const $ = await Promise.all([
    scrape('https://example.com'),
    scrape('https://httpbin.org/html')
  ]);

  console.log({
    heading_1: $[0]('h1').text(),
    heading_2: $[1]('h1').text()
  });
})();

page-scraper

Usage no npm install needed!