Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -52,12 +52,12 @@ def count_tokens(text, tokenizer):
|
|
52 |
|
53 |
# دالة لاستخراج النص من ملفات PDF
|
54 |
def extract_pdf_text(file_path):
|
|
|
55 |
with open(file_path, "rb") as pdf_file:
|
56 |
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
57 |
-
text = ""
|
58 |
for page_num in range(len(pdf_reader.pages)):
|
59 |
page = pdf_reader.pages[page_num]
|
60 |
-
text += page.extract_text()
|
61 |
return text
|
62 |
|
63 |
# دالة لاستخراج النص من ملفات DOCX
|
@@ -81,15 +81,15 @@ def read_text_file(file_path):
|
|
81 |
|
82 |
# دالة لاستخراج المشاهد من النص
|
83 |
def extract_scenes(text):
|
84 |
-
scenes = re.split(r'
|
85 |
scenes = [scene.strip() for scene in scenes if scene.strip()]
|
86 |
return scenes
|
87 |
|
88 |
# دالة لاستخراج تفاصيل المشهد (المكان والوقت)
|
89 |
def extract_scene_details(scene):
|
90 |
details = {}
|
91 |
-
location_match = re.search(r'(
|
92 |
-
time_match = re.search(r'(
|
93 |
|
94 |
if location_match:
|
95 |
details['location'] = location_match.group()
|
@@ -176,29 +176,30 @@ def analyze_files(input_files, output_directory, tokenizer, max_length):
|
|
176 |
results.append(result)
|
177 |
|
178 |
# حفظ النتائج
|
179 |
-
|
180 |
-
|
|
|
181 |
|
182 |
-
with open(os.path.join(output_directory, f"{
|
183 |
-
file.write("\n".join(quotes))
|
184 |
|
185 |
-
with open(os.path.join(output_directory, f"{
|
186 |
-
file.write(str(token_count))
|
187 |
|
188 |
-
with open(os.path.join(output_directory, f"{
|
189 |
-
file.write("\n".join(scenes))
|
190 |
|
191 |
-
with open(os.path.join(output_directory, f"{
|
192 |
-
file.write(str(scene_details))
|
193 |
|
194 |
-
with open(os.path.join(output_directory, f"{
|
195 |
-
file.write(str(ages))
|
196 |
|
197 |
-
with open(os.path.join(output_directory, f"{
|
198 |
-
file.write(str(character_descriptions))
|
199 |
|
200 |
-
with open(os.path.join(output_directory, f"{
|
201 |
-
file.write(str(dialogues))
|
202 |
|
203 |
return results
|
204 |
|
|
|
52 |
|
53 |
# دالة لاستخراج النص من ملفات PDF
|
54 |
def extract_pdf_text(file_path):
|
55 |
+
text = ""
|
56 |
with open(file_path, "rb") as pdf_file:
|
57 |
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
|
58 |
for page_num in range(len(pdf_reader.pages)):
|
59 |
page = pdf_reader.pages[page_num]
|
60 |
+
text += page.extract_text() or ""
|
61 |
return text
|
62 |
|
63 |
# دالة لاستخراج النص من ملفات DOCX
|
|
|
81 |
|
82 |
# دالة لاستخراج المشاهد من النص
|
83 |
def extract_scenes(text):
|
84 |
+
scenes = re.split(r'داخلي|خارجي|... داخلي ...|... خارجي ...', text)
|
85 |
scenes = [scene.strip() for scene in scenes if scene.strip()]
|
86 |
return scenes
|
87 |
|
88 |
# دالة لاستخراج تفاصيل المشهد (المكان والوقت)
|
89 |
def extract_scene_details(scene):
|
90 |
details = {}
|
91 |
+
location_match = re.search(r'(داخلي|خارجي|... داخلي ...|... خارجي ...)', scene)
|
92 |
+
time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب|... ليل ...|... نهار ...)', scene)
|
93 |
|
94 |
if location_match:
|
95 |
details['location'] = location_match.group()
|
|
|
176 |
results.append(result)
|
177 |
|
178 |
# حفظ النتائج
|
179 |
+
base_filename = os.path.basename(file_path)
|
180 |
+
with open(os.path.join(output_directory, f"{base_filename}_sentences.txt"), "a", encoding="utf-8") as file:
|
181 |
+
file.write("\n".join(sentences) + "\n")
|
182 |
|
183 |
+
with open(os.path.join(output_directory, f"{base_filename}_quotes.txt"), "a", encoding="utf-8") as file:
|
184 |
+
file.write("\n".join(quotes) + "\n")
|
185 |
|
186 |
+
with open(os.path.join(output_directory, f"{base_filename}_token_count.txt"), "a", encoding="utf-8") as file:
|
187 |
+
file.write(str(token_count) + "\n")
|
188 |
|
189 |
+
with open(os.path.join(output_directory, f"{base_filename}_scenes.txt"), "a", encoding="utf-8") as file:
|
190 |
+
file.write("\n".join(scenes) + "\n")
|
191 |
|
192 |
+
with open(os.path.join(output_directory, f"{base_filename}_scene_details.txt"), "a", encoding="utf-8") as file:
|
193 |
+
file.write(str(scene_details) + "\n")
|
194 |
|
195 |
+
with open(os.path.join(output_directory, f"{base_filename}_ages.txt"), "a", encoding="utf-8") as file:
|
196 |
+
file.write(str(ages) + "\n")
|
197 |
|
198 |
+
with open(os.path.join(output_directory, f"{base_filename}_character_descriptions.txt"), "a", encoding="utf-8") as file:
|
199 |
+
file.write(str(character_descriptions) + "\n")
|
200 |
|
201 |
+
with open(os.path.join(output_directory, f"{base_filename}_dialogues.txt"), "a", encoding="utf-8") as file:
|
202 |
+
file.write(str(dialogues) + "\n")
|
203 |
|
204 |
return results
|
205 |
|