Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,15 +6,22 @@ import re
|
|
6 |
import string
|
7 |
import nltk
|
8 |
|
9 |
-
# Download NLTK resources
|
10 |
-
nltk.download('words')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
# English words from NLTK corpus
|
13 |
-
english_words = set(nltk.corpus.words.words())
|
14 |
|
15 |
# Define Devanagari digits and patterns for matching
|
16 |
DEVANAGARI_DIGITS = {'०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '१०'}
|
17 |
-
DEVANAGARI_PATTERN = re.compile(r'^[०-९]+(?:[
|
18 |
NUMERIC_PATTERN = re.compile(r'^\d+(?:[.,/]\d+)*$') # Match numeric patterns
|
19 |
|
20 |
# Unicode conversion mappings
|
@@ -81,10 +88,10 @@ def convert(preeti):
|
|
81 |
|
82 |
return converted
|
83 |
|
84 |
-
def is_english_word(word):
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
|
89 |
def is_valid_numeric(word):
|
90 |
"""Check if the word is a valid numeric string."""
|
|
|
6 |
import string
|
7 |
import nltk
|
8 |
|
9 |
+
# # Download NLTK resources
|
10 |
+
# nltk.download('words')
|
11 |
+
|
12 |
+
# # English words from NLTK corpus
|
13 |
+
# english_words = set(nltk.corpus.words.words())
|
14 |
+
|
15 |
+
with open("index.dic") as f:
|
16 |
+
hunspell_words = {line.split("/")[0].strip() for line in f if not line.startswith("#")}
|
17 |
+
|
18 |
+
def is_english_word(word):
|
19 |
+
return word.lower() in hunspell_words
|
20 |
|
|
|
|
|
21 |
|
22 |
# Define Devanagari digits and patterns for matching
|
23 |
DEVANAGARI_DIGITS = {'०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '१०'}
|
24 |
+
DEVANAGARI_PATTERN = re.compile(r'^[०-९]+(?:[.,/-][०-९]+)*$') # Match Devanagari digits
|
25 |
NUMERIC_PATTERN = re.compile(r'^\d+(?:[.,/]\d+)*$') # Match numeric patterns
|
26 |
|
27 |
# Unicode conversion mappings
|
|
|
88 |
|
89 |
return converted
|
90 |
|
91 |
+
# def is_english_word(word):
|
92 |
+
# """Check if a word is English."""
|
93 |
+
# word = word.lower().strip(string.punctuation)
|
94 |
+
# return word in english_words
|
95 |
|
96 |
def is_valid_numeric(word):
|
97 |
"""Check if the word is a valid numeric string."""
|