Node Stemmer is a Node.js interface to the stemming algorithms from the Snowball project, largely inspired by Richard Boulton's PyStemmer.
It uses ffi-rs and expects to find the file libstemmer.so (a version of Libstemmer compiled as shared library) in LD_LIBRARY_PATH.
In order to set-up this kind of environment you can take a look at docker-node-libstemmer Dockerfile or you can use the corresponding docker image: amaccis/node-libstemmer
You can install Node Stemmer using npm.
npm i node-stemmer
node-stemmer | libstemmer |
---|---|
3.0.2 | 3.0.0, 3.0.1 |
1.0.0 | 2.0.0, 2.1.0, 2.2.0 |
import { Stemmer, CharacterEncoding } from 'node-stemmer';
const algorithms = Stemmer.algorithms();
console.log(algorithms);
/*
[
'arabic', 'armenian', 'basque',
'catalan', 'danish', 'dutch',
'dutch_porter', 'english', 'esperanto',
'estonian', 'finnish', 'french',
'german', 'greek', 'hindi',
'hungarian', 'indonesian', 'irish',
'italian', 'lithuanian', 'nepali',
'norwegian', 'porter', 'portuguese',
'romanian', 'russian', 'serbian',
'spanish', 'swedish', 'tamil',
'turkish', 'yiddish'
]
*/
const algorithm = 'english';
const word = Buffer.from('cycling');
const stemmer = new Stemmer(algorithm); // default character encoding is UTF-8
const stem = stemmer.stemWord(word);
console.log(stem);
/*
cycl
*/
const algorithm = 'basque';
const word = Buffer.from('aberatsenetakoa');
const stemmer = new Stemmer(algorithm, CharacterEncoding.ISO_8859_1);
const stem = stemmer.stemWord(word);
console.log(stem);
/*
aberatse
*/
All files are MIT © Andrea Maccis.