rockerritesh commited on
Commit
9419731
·
verified ·
1 Parent(s): 3640923

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -6,15 +6,22 @@ import re
6
  import string
7
  import nltk
8
 
9
- # Download NLTK resources
10
- nltk.download('words')
 
 
 
 
 
 
 
 
 
11
 
12
- # English words from NLTK corpus
13
- english_words = set(nltk.corpus.words.words())
14
 
15
  # Define Devanagari digits and patterns for matching
16
  DEVANAGARI_DIGITS = {'०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '१०'}
17
- DEVANAGARI_PATTERN = re.compile(r'^[०-९]+(?:[.,/][०-९]+)*$') # Match Devanagari digits
18
  NUMERIC_PATTERN = re.compile(r'^\d+(?:[.,/]\d+)*$') # Match numeric patterns
19
 
20
  # Unicode conversion mappings
@@ -81,10 +88,10 @@ def convert(preeti):
81
 
82
  return converted
83
 
84
- def is_english_word(word):
85
- """Check if a word is English."""
86
- word = word.lower().strip(string.punctuation)
87
- return word in english_words
88
 
89
  def is_valid_numeric(word):
90
  """Check if the word is a valid numeric string."""
 
6
  import string
7
  import nltk
8
 
9
+ # # Download NLTK resources
10
+ # nltk.download('words')
11
+
12
+ # # English words from NLTK corpus
13
+ # english_words = set(nltk.corpus.words.words())
14
+
15
+ with open("index.dic") as f:
16
+ hunspell_words = {line.split("/")[0].strip() for line in f if not line.startswith("#")}
17
+
18
+ def is_english_word(word):
19
+ return word.lower() in hunspell_words
20
 
 
 
21
 
22
  # Define Devanagari digits and patterns for matching
23
  DEVANAGARI_DIGITS = {'०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '१०'}
24
+ DEVANAGARI_PATTERN = re.compile(r'^[०-९]+(?:[.,/-][०-९]+)*$') # Match Devanagari digits
25
  NUMERIC_PATTERN = re.compile(r'^\d+(?:[.,/]\d+)*$') # Match numeric patterns
26
 
27
  # Unicode conversion mappings
 
88
 
89
  return converted
90
 
91
+ # def is_english_word(word):
92
+ # """Check if a word is English."""
93
+ # word = word.lower().strip(string.punctuation)
94
+ # return word in english_words
95
 
96
  def is_valid_numeric(word):
97
  """Check if the word is a valid numeric string."""