Spaces:

mohamedrady
/

clockwork-temptation

Runtime error

App Files Files Community

mohamedrady commited on Jul 18, 2024

Commit

c28bde1

verified ·

1 Parent(s): a531aee

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -21

app.py CHANGED Viewed

@@ -52,12 +52,12 @@ def count_tokens(text, tokenizer):
 # دالة لاستخراج النص من ملفات PDF
 def extract_pdf_text(file_path):
     with open(file_path, "rb") as pdf_file:
         pdf_reader = PyPDF2.PdfReader(pdf_file)
-        text = ""
         for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
-            text += page.extract_text()
     return text
 # دالة لاستخراج النص من ملفات DOCX
@@ -81,15 +81,15 @@ def read_text_file(file_path):
 # دالة لاستخراج المشاهد من النص
 def extract_scenes(text):
-    scenes = re.split(r'داخلي|خارجي', text)
     scenes = [scene.strip() for scene in scenes if scene.strip()]
     return scenes
 # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
 def extract_scene_details(scene):
     details = {}
-    location_match = re.search(r'(داخلي|خارجي)', scene)
-    time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
     if location_match:
         details['location'] = location_match.group()
@@ -176,29 +176,30 @@ def analyze_files(input_files, output_directory, tokenizer, max_length):
             results.append(result)
             # حفظ النتائج
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_sentences.txt"), "a", encoding="utf-8") as file:
-                file.write("\n".join(sentences))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_quotes.txt"), "a", encoding="utf-8") as file:
-                file.write("\n".join(quotes))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_token_count.txt"), "a", encoding="utf-8") as file:
-                file.write(str(token_count))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_scenes.txt"), "a", encoding="utf-8") as file:
-                file.write("\n".join(scenes))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_scene_details.txt"), "a", encoding="utf-8") as file:
-                file.write(str(scene_details))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_ages.txt"), "a", encoding="utf-8") as file:
-                file.write(str(ages))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_character_descriptions.txt"), "a", encoding="utf-8") as file:
-                file.write(str(character_descriptions))
-            with open(os.path.join(output_directory, f"{os.path.basename(file_path)}_dialogues.txt"), "a", encoding="utf-8") as file:
-                file.write(str(dialogues))
     return results

 # دالة لاستخراج النص من ملفات PDF
 def extract_pdf_text(file_path):
+    text = ""
     with open(file_path, "rb") as pdf_file:
         pdf_reader = PyPDF2.PdfReader(pdf_file)
         for page_num in range(len(pdf_reader.pages)):
             page = pdf_reader.pages[page_num]
+            text += page.extract_text() or ""
     return text
 # دالة لاستخراج النص من ملفات DOCX
 # دالة لاستخراج المشاهد من النص
 def extract_scenes(text):
+    scenes = re.split(r'داخلي|خارجي|... داخلي ...|... خارجي ...', text)
     scenes = [scene.strip() for scene in scenes if scene.strip()]
     return scenes
 # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
 def extract_scene_details(scene):
     details = {}
+    location_match = re.search(r'(داخلي|خارجي|... داخلي ...|... خارجي ...)', scene)
+    time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب|... ليل ...|... نهار ...)', scene)
     if location_match:
         details['location'] = location_match.group()
             results.append(result)
             # حفظ النتائج
+            base_filename = os.path.basename(file_path)
+            with open(os.path.join(output_directory, f"{base_filename}_sentences.txt"), "a", encoding="utf-8") as file:
+                file.write("\n".join(sentences) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_quotes.txt"), "a", encoding="utf-8") as file:
+                file.write("\n".join(quotes) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_token_count.txt"), "a", encoding="utf-8") as file:
+                file.write(str(token_count) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_scenes.txt"), "a", encoding="utf-8") as file:
+                file.write("\n".join(scenes) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_scene_details.txt"), "a", encoding="utf-8") as file:
+                file.write(str(scene_details) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_ages.txt"), "a", encoding="utf-8") as file:
+                file.write(str(ages) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_character_descriptions.txt"), "a", encoding="utf-8") as file:
+                file.write(str(character_descriptions) + "\n")
+            with open(os.path.join(output_directory, f"{base_filename}_dialogues.txt"), "a", encoding="utf-8") as file:
+                file.write(str(dialogues) + "\n")
     return results