Spaces:

mohamedrady
/

clockwork-temptation

Runtime error

App Files Files Community

mohamedrady commited on Jul 16, 2024

Commit

4ec5ed1

verified ·

1 Parent(s): e1de8ca

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+import spacy
+import nltk
+import torch
+from transformers import pipeline
+import PyPDF2
+import gradio as gr
+# Initialize required tools
+nlp = spacy.load("en_core_web_sm")
+nltk.download('punkt')
+# Check if GPU is available and use it
+device = 0 if torch.cuda.is_available() else -1
+analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
+# Define functions for text analysis
+def spacy_ner_analysis(text):
+    doc = nlp(text)
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
+    return entities
+def nltk_extract_sentences(text):
+    sentences = nltk.tokenize.sent_tokenize(text)
+    return sentences
+def nltk_extract_quotes(text):
+    quotes = []
+    sentences = nltk.tokenize.sent_tokenize(text)
+    for sentence in sentences:
+        if '"' in sentence:
+            quotes.append(sentence)
+    return quotes
+def count_tokens(text):
+    tokens = nltk.tokenize.word_tokenize(text)
+    return len(tokens)
+def extract_pdf_text(file_path):
+    with open(file_path, "rb") as pdf_file:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        text = ""
+        for page_num in range(len(pdf_reader.pages)):
+            page = pdf_reader.pages[page_num]
+            text += page.extract_text()
+    return text
+def analyze_text(text):
+    try:
+        result = analyzer(text)
+        return result
+    except Exception as e:
+        print(f"Error analyzing text: {str(e)}")
+        return ""
+def process_text(text, output_directory, filename_prefix):
+    spacy_entities = spacy_ner_analysis(text)
+    sentences = nltk_extract_sentences(text)
+    quotes = nltk_extract_quotes(text)
+    token_count = count_tokens(text)
+    # Save results to files
+    with open(os.path.join(output_directory, f"{filename_prefix}_spacy_entities.txt"), "w", encoding="utf-8") as file:
+        file.write(str(spacy_entities))
+    with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
+        file.write("\n".join(sentences))
+    with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
+        file.write("\n".join(quotes))
+    with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
+        file.write(str(token_count))
+def analyze_and_complete(file_path):
+    if file_path.endswith(".pdf"):
+        text = extract_pdf_text(file_path)
+    else:
+        with open(file_path, "r", encoding="utf-8") as file:
+            text = file.read()
+    output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/سيناريوهات/ليالي ألف ليلة"
+    filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
+    process_text(text, output_directory, filename_prefix)
+    spacy_entities = spacy_ner_analysis(text)
+    sentences = nltk_extract_sentences(text)
+    quotes = nltk_extract_quotes(text)
+    token_count = count_tokens(text)
+    return str(spacy_entities), "\n".join(sentences), "\n".join(quotes), str(token_count)
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=analyze_and_complete,
+    inputs=gr.File(file_count="single", type="filepath"),
+    outputs=["text", "text", "text", "text"],
+    title="Movie Script Analyzer and Completer",
+    description="Upload a text, PDF, or DOCX file to analyze and complete the movie script."
+)
+if __name__ == "__main__":
+    interface.launch()