Spaces:

cdactvm
/

Tamil_ASR_Demo

Sleeping

App Files Files Community

cdactvm commited on Jan 1

Commit

8811fa1

verified ·

1 Parent(s): 6f9ee50

Update convert2list.py

Browse files

Files changed (1) hide show

convert2list.py +36 -55

convert2list.py CHANGED Viewed

@@ -1,55 +1,36 @@
-#!/usr/bin/env python
-# coding: utf-8
-# In[2]:
-# import nbimporter
-import nbimporter
-from Text2List import text_to_list
-def convert_to_list(text, text_list):
-    matched_words = []
-    unmatched_text = ''  # To accumulate unmatched characters
-    # Sort text_list by length in descending order to prioritize longest matches first
-    text_list_sorted = sorted(text_list, key=len, reverse=True)
-    while text:
-        matched = False
-        for word in text_list_sorted:
-            if text.startswith(word):
-                # Add any accumulated unmatched text before appending the matched word
-                if unmatched_text:
-                    matched_words.append(unmatched_text)
-                    unmatched_text = ''  # Reset unmatched text accumulator
-                matched_words.append(word)
-                text = text[len(word):]  # Remove the matched part from text
-                matched = True
-                break
-        if not matched:
-            # Accumulate unmatched characters
-            unmatched_text += text[0]
-            text = text[1:]
-    # If there's any remaining unmatched text, add it to the result
-    if unmatched_text:
-        matched_words.append(unmatched_text)
-    # Join matched words and unmatched text with a space
-    result = ' '.join(matched_words)
-    return result
-# text = "जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच"
-# if __name__=="__main__":
-#     converted=convert_to_list(text, text_to_list())
-#     print(converted)
-# In[ ]:

+# import nbimporter
+import nbimporter
+from Text2List import text_to_list
+def convert_to_list(text, text_list):
+    matched_words = []
+    unmatched_text = ''  # To accumulate unmatched characters
+    # Sort text_list by length in descending order to prioritize longest matches first
+    text_list_sorted = sorted(text_list, key=len, reverse=True)
+    while text:
+        matched = False
+        for word in text_list_sorted:
+            if text.startswith(word):
+                # Add any accumulated unmatched text before appending the matched word
+                if unmatched_text:
+                    matched_words.append(unmatched_text)
+                    unmatched_text = ''  # Reset unmatched text accumulator
+                matched_words.append(word)
+                text = text[len(word):]  # Remove the matched part from text
+                matched = True
+                break
+        if not matched:
+            # Accumulate unmatched characters
+            unmatched_text += text[0]
+            text = text[1:]
+    # If there's any remaining unmatched text, add it to the result
+    if unmatched_text:
+        matched_words.append(unmatched_text)
+    # Join matched words and unmatched text with a space
+    result = ' '.join(matched_words)
+    return result