File size: 789 Bytes
1a45d5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65676ec
 
 
 
 
 
 
1a45d5d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import nlp from 'compromise';

export const normalizeWord = (word: string, language: string = 'en'): string => {
  let processedWord = word;
  
  // Only apply compromise for English
  if (language === 'en') {
    const doc = nlp(word);
    processedWord = doc.nouns().toSingular().out('text');
    
    // Handle cases where compromise doesn't produce output
    if (!processedWord) {
      processedWord = word;
    }
  }

  // Apply standard normalization for all languages
  return processedWord
    .normalize('NFD')
    .replace(/[\u0300-\u036f]/g, '')
    .toLowerCase()

    // Handle German umlauts and their alternative spellings
    .replace(/ü/g, 'ue')
    .replace(/ä/g, 'ae')
    .replace(/ö/g, 'oe')
    .replace(/ß/g, 'ss')

    .replace(/[^a-z]/g, '')
    .trim();
};