rockerritesh commited on
Commit
46bafe8
·
verified ·
1 Parent(s): 5f4cce3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -26
app.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  import PyPDF2
3
  import io
4
  import os
 
5
 
6
  # Updated Unicode mappings
7
  unicodeatoz = ["ब", "द", "अ", "म", "भ", "ा", "न", "ज", "ष्", "व", "प", "ि", "फ", "ल", "य", "उ", "त्र", "च", "क", "त", "ग", "ख", "ध", "ह", "थ", "श"]
@@ -46,34 +47,25 @@ symbolsDict = {
46
  "÷": "/"
47
  }
48
 
 
49
  def normalizePreeti(preetitxt):
50
- normalized = ''
51
- previoussymbol = ''
52
- # Additional normalization for complex combinations
53
- preetitxt = preetitxt.replace('qm', 's|').replace('f]', 'ो').replace('km', 'फ').replace('0f', 'ण').replace('If', 'क्ष').replace('if', 'ष').replace('cf', 'आ')
 
 
 
 
 
54
 
55
- index = -1
56
- while index + 1 < len(preetitxt):
57
- index += 1
58
- character = preetitxt[index]
59
- try:
60
- if preetitxt[index + 2] == '{' and (preetitxt[index + 1] == 'f' or preetitxt[index + 1] == 'ो'):
61
- normalized += '{' + character + preetitxt[index + 1]
62
- index += 2
63
- continue
64
- if preetitxt[index + 1] == '{' and character != 'f':
65
- normalized += '{' + character
66
- index += 1
67
- continue
68
- except IndexError:
69
- pass
70
- if character == 'l':
71
- previoussymbol = 'l'
72
- continue
73
- else:
74
- normalized += character + previoussymbol
75
- previoussymbol = ''
76
- return normalized
77
 
78
  def convert(preeti):
79
  converted = ''
 
2
  import PyPDF2
3
  import io
4
  import os
5
+ import re
6
 
7
  # Updated Unicode mappings
8
  unicodeatoz = ["ब", "द", "अ", "म", "भ", "ा", "न", "ज", "ष्", "व", "प", "ि", "फ", "ल", "य", "उ", "त्र", "च", "क", "त", "ग", "ख", "ध", "ह", "थ", "श"]
 
47
  "÷": "/"
48
  }
49
 
50
+ # Add normalization to handle conjunct consonants
51
  def normalizePreeti(preetitxt):
52
+ # Replace complex combinations for conjunct consonants first
53
+ preetitxt = re.sub(r'qm', 'ष', preetitxt)
54
+ preetitxt = re.sub(r'f]', 'ो', preetitxt)
55
+ preetitxt = re.sub(r'km', 'फ', preetitxt)
56
+ preetitxt = re.sub(r'0f', 'ण', preetitxt)
57
+ preetitxt = re.sub(r'If', 'क्ष', preetitxt)
58
+ preetitxt = re.sub(r'if', 'ष', preetitxt)
59
+ preetitxt = re.sub(r'cf', 'आ', preetitxt)
60
+ preetitxt = re.sub(r'ग्घ', 'घ', preetitxt) # Handle conjunct consonants
61
 
62
+ # Additional normalization logic for complex combinations
63
+ preetitxt = re.sub(r'([a-zA-Z])\{', r'\1्', preetitxt) # Handle half letters like क्, न्, म्, etc.
64
+
65
+ # Special handling for common Nepali conjuncts
66
+ preetitxt = preetitxt.replace('ज्ञ', 'ज्ञ') # Handle frequently used conjuncts
67
+
68
+ return preetitxt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def convert(preeti):
71
  converted = ''