not-lain commited on
Commit
b1e96d2
·
1 Parent(s): d2ae61e

update json handling

Browse files
Files changed (1) hide show
  1. app.py +34 -2
app.py CHANGED
@@ -6,6 +6,8 @@ from docx import Document
6
  import subprocess
7
  import os
8
  from typing import Optional, List
 
 
9
 
10
 
11
  def extract_text_from_pptx(file_path):
@@ -106,6 +108,36 @@ def extract_text_from_doc_or_docx(file):
106
  return "Unsupported file type. Please upload a .doc or .docx file."
107
 
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  def sanitize_list_of_lists(text: str) -> Optional[List[List]]:
110
  left = text.find("[")
111
  right = text.rfind("]")
@@ -119,13 +151,13 @@ def sanitize_list_of_lists(text: str) -> Optional[List[List]]:
119
  # parse list of lists
120
  for front, back in list_of_lists:
121
  out.append({"frontText": front, "backText": back})
122
- return out
123
  # errors
124
  except Exception as e:
125
  print(e)
126
  # return anything that was already parsed
127
  if out != []:
128
- return out
129
  # original schedma is not respected
130
  else:
131
  return None
 
6
  import subprocess
7
  import os
8
  from typing import Optional, List
9
+ import string
10
+ import random
11
 
12
 
13
  def extract_text_from_pptx(file_path):
 
108
  return "Unsupported file type. Please upload a .doc or .docx file."
109
 
110
 
111
+ # function that generates a random string
112
+ def generate_random_string(length=23):
113
+ characters = string.ascii_letters + string.digits # Includes letters and digits
114
+ random_string = ''.join(random.choice(characters) for _ in range(length))
115
+ return random_string
116
+
117
+ # function that adds the necessary json fields
118
+ def handle_json_output(json_list : list) :
119
+ n = len(json_list)
120
+ for i in range(n) :
121
+ # not last element
122
+ random_string1 = generate_random_string()
123
+ random_string2 = generate_random_string()
124
+ element = json_list[i]
125
+ # middle item
126
+ if i != n-1 :
127
+ element["termType"] = "basic"
128
+ element["frontHTML"] = (f'<div id="element-richtextarea-{random_string1}style="position:absolute;left:100px;top:50px;width:800px;height:300px;text-align:center;display:flex;align-items:center;font-size:40px;\">'
129
+ f'<p>{element["frontText"]}</p></div>')
130
+ element["backtHTML"] = (f'<div id="element-richtextarea-{random_string2}style="position:absolute;left:100px;top:50px;width:800px;height:300px;text-align:center;display:flex;align-items:center;font-size:40px;\">'
131
+ f'<p>{element["backText"]}</p></div>')
132
+ # last item on the list
133
+ else:
134
+ element["termType"] = "cloze"
135
+ element["frontHTML"] = (f'<div id="element-richtextarea-{random_string1}style="position:absolute;left:100px;top:50px;width:800px;height:300px;text-align:center;display:flex;align-items:center;font-size:40px;\">'
136
+ f'<p>{element["frontText"]}</p> <p><span class="closure" data-index="1" data-hint="">{element["backText"]}</p></div>')
137
+ element["backText"] , element["backHTML"] = "" , ""
138
+ return json_list
139
+
140
+
141
  def sanitize_list_of_lists(text: str) -> Optional[List[List]]:
142
  left = text.find("[")
143
  right = text.rfind("]")
 
151
  # parse list of lists
152
  for front, back in list_of_lists:
153
  out.append({"frontText": front, "backText": back})
154
+ return handle_json_output(out)
155
  # errors
156
  except Exception as e:
157
  print(e)
158
  # return anything that was already parsed
159
  if out != []:
160
+ return handle_json_output(out)
161
  # original schedma is not respected
162
  else:
163
  return None