mohamedrady commited on
Commit
4ec5ed1
ยท
verified ยท
1 Parent(s): e1de8ca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import spacy
3
+ import nltk
4
+ import torch
5
+ from transformers import pipeline
6
+ import PyPDF2
7
+ import gradio as gr
8
+
9
+ # Initialize required tools
10
+ nlp = spacy.load("en_core_web_sm")
11
+ nltk.download('punkt')
12
+
13
+ # Check if GPU is available and use it
14
+ device = 0 if torch.cuda.is_available() else -1
15
+ analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
16
+
17
+ # Define functions for text analysis
18
+ def spacy_ner_analysis(text):
19
+ doc = nlp(text)
20
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
21
+ return entities
22
+
23
+ def nltk_extract_sentences(text):
24
+ sentences = nltk.tokenize.sent_tokenize(text)
25
+ return sentences
26
+
27
+ def nltk_extract_quotes(text):
28
+ quotes = []
29
+ sentences = nltk.tokenize.sent_tokenize(text)
30
+ for sentence in sentences:
31
+ if '"' in sentence:
32
+ quotes.append(sentence)
33
+ return quotes
34
+
35
+ def count_tokens(text):
36
+ tokens = nltk.tokenize.word_tokenize(text)
37
+ return len(tokens)
38
+
39
+ def extract_pdf_text(file_path):
40
+ with open(file_path, "rb") as pdf_file:
41
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
42
+ text = ""
43
+ for page_num in range(len(pdf_reader.pages)):
44
+ page = pdf_reader.pages[page_num]
45
+ text += page.extract_text()
46
+ return text
47
+
48
+ def analyze_text(text):
49
+ try:
50
+ result = analyzer(text)
51
+ return result
52
+ except Exception as e:
53
+ print(f"Error analyzing text: {str(e)}")
54
+ return ""
55
+
56
+ def process_text(text, output_directory, filename_prefix):
57
+ spacy_entities = spacy_ner_analysis(text)
58
+ sentences = nltk_extract_sentences(text)
59
+ quotes = nltk_extract_quotes(text)
60
+ token_count = count_tokens(text)
61
+
62
+ # Save results to files
63
+ with open(os.path.join(output_directory, f"{filename_prefix}_spacy_entities.txt"), "w", encoding="utf-8") as file:
64
+ file.write(str(spacy_entities))
65
+
66
+ with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
67
+ file.write("\n".join(sentences))
68
+
69
+ with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
70
+ file.write("\n".join(quotes))
71
+
72
+ with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
73
+ file.write(str(token_count))
74
+
75
+ def analyze_and_complete(file_path):
76
+ if file_path.endswith(".pdf"):
77
+ text = extract_pdf_text(file_path)
78
+ else:
79
+ with open(file_path, "r", encoding="utf-8") as file:
80
+ text = file.read()
81
+
82
+ output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/ุณูŠู†ุงุฑูŠูˆู‡ุงุช/ู„ูŠุงู„ูŠ ุงู”ู„ู ู„ูŠู„ุฉ"
83
+ filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
84
+ process_text(text, output_directory, filename_prefix)
85
+
86
+ spacy_entities = spacy_ner_analysis(text)
87
+ sentences = nltk_extract_sentences(text)
88
+ quotes = nltk_extract_quotes(text)
89
+ token_count = count_tokens(text)
90
+
91
+ return str(spacy_entities), "\n".join(sentences), "\n".join(quotes), str(token_count)
92
+
93
+ # Define the Gradio interface
94
+ interface = gr.Interface(
95
+ fn=analyze_and_complete,
96
+ inputs=gr.File(file_count="single", type="filepath"),
97
+ outputs=["text", "text", "text", "text"],
98
+ title="Movie Script Analyzer and Completer",
99
+ description="Upload a text, PDF, or DOCX file to analyze and complete the movie script."
100
+ )
101
+
102
+ if __name__ == "__main__":
103
+ interface.launch()