word-overlap

overlap of words in a sentence or phrase to determine whether they are referring to the same context

Usage no npm install needed!

<script type="module">
  import wordOverlap from 'https://cdn.skypack.dev/word-overlap';
</script>

README

word-overlap

NPM version Build Status Coverage Status Code Climate Dependency Status

Check the number of words overlapping between 2 phrases or sentences

Used in cases to check whether 2 titles / sentences / phrases are referring to the same context. E.g. 2 event names.

Install

  1. with npm

    npm install word-overlap
    
  • with browserify
    1. in file main.js

      // in main.js
      var overlap = require('word-overlap');
      
      var sentence1 = 'The Hitchhikings Meetup in Betelgeuse by Ford Prefect';
      var sentence2 = 'The hitchhikings meetups by the hitchhikers';
      
      var reply = overlap(sentence1, sentence2, {
        ignoreCase: true,
        minWordLength: 2,
        ignoreCommonWords: true
      });
      
      console.log(reply);
      
    • in file index.html

      <script src="build.js"></script>
      
    • make the file build.js

      browserify main.js -o build.js --exclude WNdb --exclude lapack
      

Usage

var overlap = require('word-overlap');

var sentence1 = 'The Hitchhikings Meetup in Betelgeuse by Ford Prefect';
var sentence2 = 'The hitchhikings meetups by the hitchhikers';

simple case

overlap(sentence1, sentence2);
// [ 'The', 'by' ]

option: ignore case

overlap(sentence1, sentence2, {
   ignoreCase: true
});
// [ 'the', 'hitchhikings', 'by' ]

option: min word length

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2
});
// [ 'the', 'hitchhiking', 'by' ]

option: ignore default common words

Common words by default include: a, an, the, this, that, there, it, in, on, for, not, your, you, at, to, is, us, out, by, I

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true
});
// [ 'hitchhikings' ]

option: ignore number

Ignore numbers such as: 5e3, 0xff, -1.1, 0, 1, 1.1, 10, 10.10, 100, '-1.1', etc.

sentence1 = 'Welcome to 2015';
sentence2 = '2015 Meetup for the year';
console.log(overlap(sentence1, sentence2, {
  ignoreNumber: true
}));
// [ ]

option: add your common words to ignore

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true,
  common: [ 'hitchhikings' ]
});
// [ ]

option: depluralize words

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true,
  depluralize: true
});
// [ 'hitchhiking', 'meetup' ]

option: depluralize words with plurals to ignore

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true,
  depluralize: true,
  ignorePlurals: [ 'hitchhikings' ]
});
// [ 'hitchhikings', 'meetup' ]

option: stemming

var sentence1 = 'A programming course in SmallTalk';
var sentence2 = 'Have you programmed in SmallTalk?';

overlap(sentence1, sentence2, {
  stemming: true,
  ignoreCommonWords: true
});
// [ 'program', 'smalltalk' ]

Try out the examples in file example.js with the command node example.js

Contribute

Please see CONTRIBUTING.md for details.

Versioning

This repository follows the Semantic Versioning guidelines:

  1. For patches, run grunt bump
  • For minor release, run grunt bump:minor
  • For major release, run grunt bump:major

License

(C) Sayanee Basu 2014, released under an MIT license