File size: 789 Bytes
1a45d5d 65676ec 1a45d5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import nlp from 'compromise';
export const normalizeWord = (word: string, language: string = 'en'): string => {
let processedWord = word;
// Only apply compromise for English
if (language === 'en') {
const doc = nlp(word);
processedWord = doc.nouns().toSingular().out('text');
// Handle cases where compromise doesn't produce output
if (!processedWord) {
processedWord = word;
}
}
// Apply standard normalization for all languages
return processedWord
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.toLowerCase()
// Handle German umlauts and their alternative spellings
.replace(/ü/g, 'ue')
.replace(/ä/g, 'ae')
.replace(/ö/g, 'oe')
.replace(/ß/g, 'ss')
.replace(/[^a-z]/g, '')
.trim();
};
|