@deeptakirandas/pdf-parse

Pure javascript cross-platform module to extract text from PDFs.

Usage no npm install needed!

<script type="module">
  import deeptakirandasPdfParse from 'https://cdn.skypack.dev/@deeptakirandas/pdf-parse';
</script>

README

@deeptakirandas/pdf-parse

Javascript cross-platform module to extract texts from PDFs.

Installation

npm i @deeptakirandas/pdf-parse

Basic Usage - Local Files

const fs = require('fs');
const pdf = require('@deeptakirandas/pdf-parse');

let dataBuffer = fs.readFileSync('path to PDF file...');

pdf(dataBuffer).then(function(data) {

    // number of pages
    console.log(data.numpages);
    // number of rendered pages
    console.log(data.numrender);
    // PDF info
    console.log(data.info);
    // PDF metadata
    console.log(data.metadata); 
    // PDF.js version
    // check https://mozilla.github.io/pdf.js/getting_started/
    console.log(data.version);
    // PDF text
    console.log(data.text); 
        
});

Basic Usage - HTTP

You can use crawler-request which uses the pdf-parse

Exception Handling

const fs = require('fs');
const pdf = require('@deeptakirandas/pdf-parse');

let dataBuffer = fs.readFileSync('path to PDF file...');

pdf(dataBuffer).then(function(data) {
    // use data
})
.catch(function(error){
    // handle exceptions
})

License

MIT licensed and all it's dependencies are MIT