dagmar

deadly simple crawling/scraping package for Node.

Usage no npm install needed!

<script type="module">
  import dagmar from 'https://cdn.skypack.dev/dagmar';
</script>

README

Build Status Dependency Status devDependency Status

Dagmar

Dagmar is a deadly simple crawling/scraping package for Node.

It features:

  • A clean, simple API
  • Possible use of server-side DOM & automatic jQuery insertion with Cheerio
  • node 0.10+ support

How to install

$ npm install dagmar

Crash course


var crawler = new Crawler();

crawler.forEach(function(error, response, body) {
  if (error || response.statusCode !== 200) {
    console.log(error);
  } else {
    console.log(body);
  }
});

crawler.end(function() {
  console.log('Done.');
});

crawler.queue("http://www.google.com");
crawler.queue("http://www.yahoo.com");
crawler.queue("http://www.apple.com");
crawler.queue("http://www.twitter.com");
crawler.queue("http://www.facebook.com");

crawler.start();

Using Cheerio


var crawler = new Crawler();

crawler.forEach(function(error, response, body) {
  var $, list;
  if (!error && response.statusCode === 200) {
    $ = cheerio.load(body);
    list = $('ul', '<ul id="fruits">...</ul>');
    console.log(list);
  } else {
    console.log(error);
  }
});

crawler.end(function() {
  console.log('Done.');
});

crawler.queue("http://www.fruits.org");

crawler.start();

Full crawler retrieving href and adding to queue

var crawler = new Crawler();

crawler.forEach(function(error, response, body) {
  if (!error && response.statusCode === 200) {
    var $ = cheerio.load(body);
    return $('a').each(function(index, a) {
      var url = $(a).attr('href');
      crawler.queue(url);
    });
  } else {
    console.log(error);
  }
});

crawler.end(function() {
  console.log('Done.');
});

crawler.queue("http://www.fruits.org");

crawler.start();