description-improv / src /lib /wordProcessing.ts
Felix Zieger
updates
1a45d5d
raw
history blame contribute delete
623 Bytes
import nlp from 'compromise';
export const normalizeWord = (word: string, language: string = 'en'): string => {
let processedWord = word;
// Only apply compromise for English
if (language === 'en') {
const doc = nlp(word);
processedWord = doc.nouns().toSingular().out('text');
// Handle cases where compromise doesn't produce output
if (!processedWord) {
processedWord = word;
}
}
// Apply standard normalization for all languages
return processedWord
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.toLowerCase()
.replace(/[^a-z]/g, '')
.trim();
};