Spaces:

not-lain
/

utils

Running

App Files Files Community

not-lain commited on Nov 21, 2024

Commit

b1e96d2

1 Parent(s): d2ae61e

update json handling

Browse files

Files changed (1) hide show

app.py +34 -2

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from docx import Document
 import subprocess
 import os
 from typing import Optional, List
 def extract_text_from_pptx(file_path):
@@ -106,6 +108,36 @@ def extract_text_from_doc_or_docx(file):
         return "Unsupported file type. Please upload a .doc or .docx file."
 def sanitize_list_of_lists(text: str) -> Optional[List[List]]:
     left = text.find("[")
     right = text.rfind("]")
@@ -119,13 +151,13 @@ def sanitize_list_of_lists(text: str) -> Optional[List[List]]:
                 # parse list of lists
                 for front, back in list_of_lists:
                     out.append({"frontText": front, "backText": back})
-                return out
             # errors
             except Exception as e:
                 print(e)
                 # return anything that was already parsed
                 if out != []:
-                    return out
                 # original schedma is not respected
                 else:
                     return None

 import subprocess
 import os
 from typing import Optional, List
+import string
+import random
 def extract_text_from_pptx(file_path):
         return "Unsupported file type. Please upload a .doc or .docx file."
+# function that generates a random string
+def generate_random_string(length=23):
+  characters = string.ascii_letters + string.digits  # Includes letters and digits
+  random_string = ''.join(random.choice(characters) for _ in range(length))
+  return random_string
+# function that adds the necessary json fields
+def handle_json_output(json_list : list) :
+    n = len(json_list)
+    for i in range(n) :
+        # not last element
+        random_string1 = generate_random_string()
+        random_string2 = generate_random_string()
+        element = json_list[i]
+        # middle item
+        if i != n-1 :
+            element["termType"] = "basic"
+            element["frontHTML"] = (f'<div id="element-richtextarea-{random_string1}style="position:absolute;left:100px;top:50px;width:800px;height:300px;text-align:center;display:flex;align-items:center;font-size:40px;\">'
+            f'<p>{element["frontText"]}</p></div>')
+            element["backtHTML"] = (f'<div id="element-richtextarea-{random_string2}style="position:absolute;left:100px;top:50px;width:800px;height:300px;text-align:center;display:flex;align-items:center;font-size:40px;\">'
+            f'<p>{element["backText"]}</p></div>')
+        # last item on the list
+        else:
+            element["termType"] = "cloze"
+            element["frontHTML"] = (f'<div id="element-richtextarea-{random_string1}style="position:absolute;left:100px;top:50px;width:800px;height:300px;text-align:center;display:flex;align-items:center;font-size:40px;\">'
+            f'<p>{element["frontText"]}</p> <p><span class="closure" data-index="1" data-hint="">{element["backText"]}</p></div>')
+            element["backText"] ,  element["backHTML"] = "" , ""
+    return json_list
 def sanitize_list_of_lists(text: str) -> Optional[List[List]]:
     left = text.find("[")
     right = text.rfind("]")
                 # parse list of lists
                 for front, back in list_of_lists:
                     out.append({"frontText": front, "backText": back})
+                return handle_json_output(out)
             # errors
             except Exception as e:
                 print(e)
                 # return anything that was already parsed
                 if out != []:
+                    return handle_json_output(out)
                 # original schedma is not respected
                 else:
                     return None