lexer4js

Simple JavaScript lexer

Usage no npm install needed!

<script type="module">
  import lexer4js from 'https://cdn.skypack.dev/lexer4js';
</script>

README

Lexer4JS

npm npm version David npm bundle size (minified + gzip) code style: prettier Snyk Vulnerabilities for GitHub Repo GitHub top language GitHub code size in bytes GitHub last commit TLOC Node.js CI CodeQL

🚀 Using lexer

Pass the source code to the tokenize(source) method like this

import { Lexer } from "lexer4js";

const source = fs.readFileSync("source.txt", "utf8");

const lexer = new Lexer();
const tokens = lexer.tokenize(source);

You would get list of tokens. Calling .toString() method of Token will result in such output

tokens.forEach((token) => console.log(token.toString()));
CLASS 'class' [L1:0]
IDENTIFIER 'Foo' [L1:6]
OPENING_CURLY_BRACE '{' [L1:10]
PRIVATE 'private' [L2:4]
DOUBLE 'double' [L2:12]
IDENTIFIER 'big' [L2:19]
ASSIGNMENT '=' [L2:23]
SCIENTIFIC_LITERAL '3.2e+23' [L2:25]
SEMICOLON ';' [L2:32]
PRIVATE 'private' [L3:4]
DOUBLE 'double' [L3:12]
IDENTIFIER 'small' [L3:19]
ASSIGNMENT '=' [L3:25]
SUBTRACTION '-' [L3:27]
SCIENTIFIC_LITERAL '4.70e-9' [L3:28]
SEMICOLON ';' [L3:35]
PRIVATE 'private' [L5:4]
STRING 'string' [L5:12]
IDENTIFIER 'message' [L5:19]
ASSIGNMENT '=' [L5:27]
STRING_LITERAL '\\"FooBarBaz\\"' [L5:29]
SEMICOLON ';' [L5:40]
PRIVATE 'private' [L6:4]
CHAR 'char' [L6:12]
IDENTIFIER 'newline' [L6:17]
ASSIGNMENT '=' [L6:25]
CHAR_LITERAL ''\\\\n'' [L6:27]
SEMICOLON ';' [L6:31]
PRIVATE 'private' [L8:4]
INT 'int' [L8:12]
IDENTIFIER 'hex' [L8:16]
ASSIGNMENT '=' [L8:20]
HEX_LITERAL '0x0A0B0C' [L8:22]
SEMICOLON ';' [L8:30]
PRIVATE 'private' [L9:4]
INT 'int' [L9:12]
IDENTIFIER 'octal' [L9:16]
ASSIGNMENT '=' [L9:22]
OCTAL_LITERAL '0737' [L9:24]
SEMICOLON ';' [L9:28]
PRIVATE 'private' [L10:4]
INT 'int' [L10:12]
IDENTIFIER 'binary' [L10:16]
ASSIGNMENT '=' [L10:23]
BINARY_LITERAL '0b01001001110' [L10:25]
SEMICOLON ';' [L10:38]
PRIVATE 'private' [L12:4]
BOOLEAN 'boolean' [L12:12]
IDENTIFIER 'flag' [L12:20]
ASSIGNMENT '=' [L12:25]
FALSE 'false' [L12:27]
SEMICOLON ';' [L12:32]
PRIVATE 'private' [L14:4]
STRING 'string' [L14:12]
IDENTIFIER 'multiline' [L14:19]
ASSIGNMENT '=' [L14:29]
STRING_LITERAL '\\"\\"' [L14:31]
STRING_LITERAL '\\"
        Hello, World!
        Who I am?
    \\"' [L14:33]
STRING_LITERAL '\\"\\"' [L17:5]
SEMICOLON ';' [L17:7]
PUBLIC 'public' [L23:4]
VOID 'void' [L23:11]
IDENTIFIER 'main' [L23:16]
OPENING_BRACE '(' [L23:20]
STRING 'string' [L23:21]
OPENING_SQUARE_BRACE '[' [L23:27]
CLOSING_SQUARE_BRACE ']' [L23:28]
IDENTIFIER 'args' [L23:30]
CLOSING_BRACE ')' [L23:34]
OPENING_CURLY_BRACE '{' [L23:36]
INT 'int' [L24:8]
IDENTIFIER 'size' [L24:12]
ASSIGNMENT '=' [L24:17]
INT_LITERAL '3' [L24:19]
SEMICOLON ';' [L24:20]
INT 'int' [L25:8]
OPENING_SQUARE_BRACE '[' [L25:12]
IDENTIFIER 'size' [L25:13]
CLOSING_SQUARE_BRACE ']' [L25:17]
IDENTIFIER 'array' [L25:19]
ASSIGNMENT '=' [L25:25]
OPENING_CURLY_BRACE '{' [L25:27]
INT_LITERAL '1' [L25:29]
COMMA ',' [L25:30]
INT_LITERAL '2' [L25:32]
COMMA ',' [L25:33]
INT_LITERAL '3' [L25:35]
CLOSING_CURLY_BRACE '}' [L25:37]
SEMICOLON ';' [L25:38]
INT 'int' [L26:8]
IDENTIFIER 'index' [L26:12]
ASSIGNMENT '=' [L26:18]
INT_LITERAL '0' [L26:20]
SEMICOLON ';' [L26:21]
FLOAT 'float' [L27:8]
IDENTIFIER 'e' [L27:14]
ASSIGNMENT '=' [L27:16]
FLOAT_LITERAL '2.73' [L27:18]
SEMICOLON ';' [L27:22]
WHILE 'while' [L28:8]
OPENING_BRACE '(' [L28:14]
IDENTIFIER 'index' [L28:15]
NOT_EQUALS '!=' [L28:21]
INT_LITERAL '0' [L28:24]
CLOSING_BRACE ')' [L28:25]
OPENING_CURLY_BRACE '{' [L28:27]
IDENTIFIER 'index' [L29:12]
ASSIGNMENT '=' [L29:18]
IDENTIFIER 'index' [L29:20]
SUBTRACTION '-' [L29:26]
INT_LITERAL '1' [L29:28]
SEMICOLON ';' [L29:29]
IDENTIFIER 'var' [L30:12]
IDENTIFIER 'coefficient' [L30:16]
ASSIGNMENT '=' [L30:28]
IDENTIFIER 'big' [L30:30]
MULTIPLICATION '*' [L30:34]
IDENTIFIER 'small' [L30:36]
DIVISION '/' [L30:42]
IDENTIFIER 'hex' [L30:44]
SEMICOLON ';' [L30:47]
IDENTIFIER 'println' [L31:12]
OPENING_BRACE '(' [L31:19]
IDENTIFIER 'message' [L31:20]
COMMA ',' [L31:27]
IDENTIFIER 'array' [L31:29]
OPENING_SQUARE_BRACE '[' [L31:34]
IDENTIFIER 'index' [L31:35]
CLOSING_SQUARE_BRACE ']' [L31:40]
MULTIPLICATION '*' [L31:42]
IDENTIFIER 'coefficient' [L31:44]
COMMA ',' [L31:55]
IDENTIFIER 'newline' [L31:57]
CLOSING_BRACE ')' [L31:64]
SEMICOLON ';' [L31:65]
CLOSING_CURLY_BRACE '}' [L32:8]
FOR 'for' [L33:8]
OPENING_BRACE '(' [L33:12]
IDENTIFIER 'var' [L33:13]
IDENTIFIER 'num' [L33:17]
COLON ':' [L33:21]
IDENTIFIER 'array' [L33:23]
CLOSING_BRACE ')' [L33:28]
OPENING_CURLY_BRACE '{' [L33:30]
IDENTIFIER 'var' [L34:12]
IDENTIFIER 'coefficient' [L34:16]
ASSIGNMENT '=' [L34:28]
IDENTIFIER 'big' [L34:30]
MULTIPLICATION '*' [L34:34]
IDENTIFIER 'small' [L34:36]
DIVISION '/' [L34:42]
IDENTIFIER 'hex' [L34:44]
SEMICOLON ';' [L34:47]
IDENTIFIER 'println' [L35:12]
OPENING_BRACE '(' [L35:19]
IDENTIFIER 'message' [L35:20]
COMMA ',' [L35:27]
IDENTIFIER 'num' [L35:29]
MULTIPLICATION '*' [L35:33]
IDENTIFIER 'coefficient' [L35:35]
COMMA ',' [L35:46]
IDENTIFIER 'newline' [L35:48]
CLOSING_BRACE ')' [L35:55]
SEMICOLON ';' [L35:56]
CLOSING_CURLY_BRACE '}' [L36:8]
IDENTIFIER 'var' [L37:8]
IDENTIFIER 'secret' [L37:12]
ASSIGNMENT '=' [L37:19]
IDENTIFIER 'hex' [L37:21]
XOR '^' [L37:25]
IDENTIFIER 'octal' [L37:27]
XOR '^' [L37:33]
IDENTIFIER 'binary' [L37:35]
SEMICOLON ';' [L37:41]
IF 'if' [L38:8]
OPENING_BRACE '(' [L38:11]
IDENTIFIER 'flag' [L38:12]
AND '&&' [L38:17]
IDENTIFIER 'size' [L38:20]
OR '||' [L38:25]
IDENTIFIER 'secret' [L38:28]
CLOSING_BRACE ')' [L38:34]
OPENING_CURLY_BRACE '{' [L38:36]
IDENTIFIER 'println' [L39:10]
OPENING_BRACE '(' [L39:17]
IDENTIFIER 'secret' [L39:18]
XOR '^' [L39:25]
IDENTIFIER 'flag' [L39:27]
CLOSING_BRACE ')' [L39:31]
SEMICOLON ';' [L39:32]
CLOSING_CURLY_BRACE '}' [L40:8]
IDENTIFIER 'println' [L41:8]
OPENING_BRACE '(' [L41:15]
IDENTIFIER 'secret' [L41:16]
CLOSING_BRACE ')' [L41:22]
SEMICOLON ';' [L41:23]
CLOSING_CURLY_BRACE '}' [L42:4]
CLOSING_CURLY_BRACE '}' [L43:0]"