Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
|
|
2 |
import PyPDF2
|
3 |
import io
|
4 |
import os
|
|
|
5 |
|
6 |
# Updated Unicode mappings
|
7 |
unicodeatoz = ["ब", "द", "अ", "म", "भ", "ा", "न", "ज", "ष्", "व", "प", "ि", "फ", "ल", "य", "उ", "त्र", "च", "क", "त", "ग", "ख", "ध", "ह", "थ", "श"]
|
@@ -46,34 +47,25 @@ symbolsDict = {
|
|
46 |
"÷": "/"
|
47 |
}
|
48 |
|
|
|
49 |
def normalizePreeti(preetitxt):
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
preetitxt =
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
index += 2
|
63 |
-
continue
|
64 |
-
if preetitxt[index + 1] == '{' and character != 'f':
|
65 |
-
normalized += '{' + character
|
66 |
-
index += 1
|
67 |
-
continue
|
68 |
-
except IndexError:
|
69 |
-
pass
|
70 |
-
if character == 'l':
|
71 |
-
previoussymbol = 'l'
|
72 |
-
continue
|
73 |
-
else:
|
74 |
-
normalized += character + previoussymbol
|
75 |
-
previoussymbol = ''
|
76 |
-
return normalized
|
77 |
|
78 |
def convert(preeti):
|
79 |
converted = ''
|
|
|
2 |
import PyPDF2
|
3 |
import io
|
4 |
import os
|
5 |
+
import re
|
6 |
|
7 |
# Updated Unicode mappings
|
8 |
unicodeatoz = ["ब", "द", "अ", "म", "भ", "ा", "न", "ज", "ष्", "व", "प", "ि", "फ", "ल", "य", "उ", "त्र", "च", "क", "त", "ग", "ख", "ध", "ह", "थ", "श"]
|
|
|
47 |
"÷": "/"
|
48 |
}
|
49 |
|
50 |
+
# Add normalization to handle conjunct consonants
|
51 |
def normalizePreeti(preetitxt):
|
52 |
+
# Replace complex combinations for conjunct consonants first
|
53 |
+
preetitxt = re.sub(r'qm', 'ष', preetitxt)
|
54 |
+
preetitxt = re.sub(r'f]', 'ो', preetitxt)
|
55 |
+
preetitxt = re.sub(r'km', 'फ', preetitxt)
|
56 |
+
preetitxt = re.sub(r'0f', 'ण', preetitxt)
|
57 |
+
preetitxt = re.sub(r'If', 'क्ष', preetitxt)
|
58 |
+
preetitxt = re.sub(r'if', 'ष', preetitxt)
|
59 |
+
preetitxt = re.sub(r'cf', 'आ', preetitxt)
|
60 |
+
preetitxt = re.sub(r'ग्घ', 'घ', preetitxt) # Handle conjunct consonants
|
61 |
|
62 |
+
# Additional normalization logic for complex combinations
|
63 |
+
preetitxt = re.sub(r'([a-zA-Z])\{', r'\1्', preetitxt) # Handle half letters like क्, न्, म्, etc.
|
64 |
+
|
65 |
+
# Special handling for common Nepali conjuncts
|
66 |
+
preetitxt = preetitxt.replace('ज्ञ', 'ज्ञ') # Handle frequently used conjuncts
|
67 |
+
|
68 |
+
return preetitxt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def convert(preeti):
|
71 |
converted = ''
|