mohamedrady commited on
Commit
0f146ca
·
verified ·
1 Parent(s): 7b7b995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -17,7 +17,7 @@ openai.api_key = "sk-proj-62TDbO5KABSdkZaFPPD4T3BlbkFJkhqOYpHhL6OucTzNdWSU"
17
  nltk.download('punkt')
18
 
19
  # التحقق من توفر GPU واستخدامه
20
- device = 0 إذا torch.cuda.is_available() else -1
21
 
22
  # تحميل نماذج التحليل اللغوي
23
  analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
@@ -42,7 +42,7 @@ def camel_ner_analysis(text):
42
  entities = ner.predict(tokens)
43
  entity_dict = {"PERSON": [], "LOC": [], "ORG": [], "DATE": []}
44
  for token, tag in zip(tokens, entities):
45
- إذا tag in entity_dict:
46
  entity_dict[tag].append((token, tag))
47
  return entity_dict
48
 
@@ -61,7 +61,7 @@ def nltk_extract_quotes(text):
61
  quotes = []
62
  sentences = nltk.tokenize.sent_tokenize(text, language='arabic')
63
  for sentence in sentences:
64
- إذا '"' in sentence أو '«' in sentence أو '»' in sentence:
65
  quotes.append(sentence)
66
  return quotes
67
 
@@ -72,10 +72,10 @@ def count_tokens(text):
72
 
73
  # دالة لاستخراج النص من ملفات PDF
74
  def extract_pdf_text(file_path):
75
- مع open(file_path, "rb") كما pdf_file:
76
  pdf_reader = PyPDF2.PdfReader(pdf_file)
77
  text = ""
78
- لكل page_num in range(len(pdf_reader.pages)):
79
  page = pdf_reader.pages[page_num]
80
  text += page.extract_text()
81
  return text
@@ -83,7 +83,7 @@ def extract_pdf_text(file_path):
83
  # دالة لاستخراج المشاهد من النص
84
  def extract_scenes(text):
85
  scenes = re.split(r'داخلي|خارجي', text)
86
- scenes = [scene.strip() for scene in scenes إذا scene.strip()]
87
  return scenes
88
 
89
  # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
@@ -92,9 +92,9 @@ def extract_scene_details(scene):
92
  location_match = re.search(r'(داخلي|خارجي)', scene)
93
  time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
94
 
95
- إذا location_match:
96
  details['location'] = location_match.group()
97
- إذا time_match:
98
  details['time'] = time_match.group()
99
 
100
  return details
@@ -125,11 +125,11 @@ def analyze_and_complete(file_paths):
125
  results = []
126
  output_directory = os.getenv("SPACE_DIR", "/app/output")
127
 
128
- لكل file_path in file_paths:
129
- إذا file_path.endswith(".pdf"):
130
  text = extract_pdf_text(file_path)
131
  else:
132
- مع open(file_path, "r", encoding="utf-8") كما file:
133
  text = file.read()
134
 
135
  filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
@@ -145,40 +145,40 @@ def analyze_and_complete(file_paths):
145
  character_frequency = extract_character_frequency(camel_entities)
146
  dialogues = extract_dialogues(text)
147
 
148
- scene_details = [extract_scene_details(scene) لكل scene in scenes]
149
 
150
  # حفظ النتائج إلى ملفات
151
- مع open(os.path.join(output_directory, f"{filename_prefix}_entities.txt"), "w", encoding="utf-8") كما file:
152
  file.write(str(camel_entities))
153
 
154
- مع open(os.path.join(output_directory, f"{filename_prefix}_sentiments.txt"), "w", encoding="utf-8") كما file:
155
  file.write(str(sentiments))
156
 
157
- مع open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") كما file:
158
  file.write("\n".join(sentences))
159
 
160
- مع open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") كما file:
161
  file.write("\n".join(quotes))
162
 
163
- مع open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") كما file:
164
  file.write(str(token_count))
165
 
166
- مع open(os.path.join(output_directory, f"{filename_prefix}_scenes.txt"), "w", encoding="utf-8") كما file:
167
  file.write("\n".join(scenes))
168
 
169
- مع open(os.path.join(output_directory, f"{filename_prefix}_scene_details.txt"), "w", encoding="utf-8") كما file:
170
  file.write(str(scene_details))
171
 
172
- مع open(os.path.join(output_directory, f"{filename_prefix}_ages.txt"), "w", encoding="utf-8") كما file:
173
  file.write(str(ages))
174
 
175
- مع open(os.path.join(output_directory, f"{filename_prefix}_character_descriptions.txt"), "w", encoding="utf-8") كما file:
176
  file.write(str(character_descriptions))
177
 
178
- مع open(os.path.join(output_directory, f"{filename_prefix}_character_frequency.txt"), "w", encoding="utf-8") كما file:
179
  file.write(str(character_frequency))
180
 
181
- مع open(os.path.join(output_directory, f"{filename_prefix}_dialogues.txt"), "w", encoding="utf-8") كما file:
182
  file.write(str(dialogues))
183
 
184
  results.append((str(camel_entities), str(sentiments), "\n".join(sentences), "\n".join(quotes), str(token_count), "\n".join(scenes), str(scene_details), str(ages), str(character_descriptions), str(character_frequency), str(dialogues)))
@@ -189,7 +189,7 @@ def analyze_and_complete(file_paths):
189
  interface = gr.Interface(
190
  fn=analyze_and_complete,
191
  inputs=gr.File(file_count="multiple", type="filepath"),
192
- outputs=gr.outputs.JSON(),
193
  title="Movie Script Analyzer and Completer",
194
  description="Upload text, PDF, or DOCX files to analyze and complete the movie script."
195
  )
 
17
  nltk.download('punkt')
18
 
19
  # التحقق من توفر GPU واستخدامه
20
+ device = 0 if torch.cuda.is_available() else -1
21
 
22
  # تحميل نماذج التحليل اللغوي
23
  analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
 
42
  entities = ner.predict(tokens)
43
  entity_dict = {"PERSON": [], "LOC": [], "ORG": [], "DATE": []}
44
  for token, tag in zip(tokens, entities):
45
+ if tag in entity_dict:
46
  entity_dict[tag].append((token, tag))
47
  return entity_dict
48
 
 
61
  quotes = []
62
  sentences = nltk.tokenize.sent_tokenize(text, language='arabic')
63
  for sentence in sentences:
64
+ if '"' in sentence or '«' in sentence or '»' in sentence:
65
  quotes.append(sentence)
66
  return quotes
67
 
 
72
 
73
  # دالة لاستخراج النص من ملفات PDF
74
  def extract_pdf_text(file_path):
75
+ with open(file_path, "rb") as pdf_file:
76
  pdf_reader = PyPDF2.PdfReader(pdf_file)
77
  text = ""
78
+ for page_num in range(len(pdf_reader.pages)):
79
  page = pdf_reader.pages[page_num]
80
  text += page.extract_text()
81
  return text
 
83
  # دالة لاستخراج المشاهد من النص
84
  def extract_scenes(text):
85
  scenes = re.split(r'داخلي|خارجي', text)
86
+ scenes = [scene.strip() for scene in scenes if scene.strip()]
87
  return scenes
88
 
89
  # دالة لاستخراج تفاصيل المشهد (المكان والوقت)
 
92
  location_match = re.search(r'(داخلي|خارجي)', scene)
93
  time_match = re.search(r'(ليلاً|نهاراً|شروق|غروب)', scene)
94
 
95
+ if location_match:
96
  details['location'] = location_match.group()
97
+ if time_match:
98
  details['time'] = time_match.group()
99
 
100
  return details
 
125
  results = []
126
  output_directory = os.getenv("SPACE_DIR", "/app/output")
127
 
128
+ for file_path in file_paths:
129
+ if file_path.endswith(".pdf"):
130
  text = extract_pdf_text(file_path)
131
  else:
132
+ with open(file_path, "r", encoding="utf-8") as file:
133
  text = file.read()
134
 
135
  filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
 
145
  character_frequency = extract_character_frequency(camel_entities)
146
  dialogues = extract_dialogues(text)
147
 
148
+ scene_details = [extract_scene_details(scene) for scene in scenes]
149
 
150
  # حفظ النتائج إلى ملفات
151
+ with open(os.path.join(output_directory, f"{filename_prefix}_entities.txt"), "w", encoding="utf-8") as file:
152
  file.write(str(camel_entities))
153
 
154
+ with open(os.path.join(output_directory, f"{filename_prefix}_sentiments.txt"), "w", encoding="utf-8") as file:
155
  file.write(str(sentiments))
156
 
157
+ with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
158
  file.write("\n".join(sentences))
159
 
160
+ with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
161
  file.write("\n".join(quotes))
162
 
163
+ with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
164
  file.write(str(token_count))
165
 
166
+ with open(os.path.join(output_directory, f"{filename_prefix}_scenes.txt"), "w", encoding="utf-8") as file:
167
  file.write("\n".join(scenes))
168
 
169
+ with open(os.path.join(output_directory, f"{filename_prefix}_scene_details.txt"), "w", encoding="utf-8") as file:
170
  file.write(str(scene_details))
171
 
172
+ with open(os.path.join(output_directory, f"{filename_prefix}_ages.txt"), "w", encoding="utf-8") as file:
173
  file.write(str(ages))
174
 
175
+ with open(os.path.join(output_directory, f"{filename_prefix}_character_descriptions.txt"), "w", encoding="utf-8") as file:
176
  file.write(str(character_descriptions))
177
 
178
+ with open(os.path.join(output_directory, f"{filename_prefix}_character_frequency.txt"), "w", encoding="utf-8") as file:
179
  file.write(str(character_frequency))
180
 
181
+ with open(os.path.join(output_directory, f"{filename_prefix}_dialogues.txt"), "w", encoding="utf-8") as file:
182
  file.write(str(dialogues))
183
 
184
  results.append((str(camel_entities), str(sentiments), "\n".join(sentences), "\n".join(quotes), str(token_count), "\n".join(scenes), str(scene_details), str(ages), str(character_descriptions), str(character_frequency), str(dialogues)))
 
189
  interface = gr.Interface(
190
  fn=analyze_and_complete,
191
  inputs=gr.File(file_count="multiple", type="filepath"),
192
+ outputs=gr.JSON(),
193
  title="Movie Script Analyzer and Completer",
194
  description="Upload text, PDF, or DOCX files to analyze and complete the movie script."
195
  )