import os import spacy import nltk import torch from transformers import pipeline import PyPDF2 import gradio as gr # Download and initialize required tools spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") nltk.download('punkt') # Check if GPU is available and use it device = 0 if torch.cuda.is_available() else -1 analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device) def spacy_ner_analysis(text): doc = nlp(text) entities = [(ent.text, ent.label_) for ent in doc.ents] return entities def nltk_extract_sentences(text): sentences = nltk.tokenize.sent_tokenize(text) return sentences def nltk_extract_quotes(text): quotes = [] sentences = nltk.tokenize.sent_tokenize(text) for sentence in sentences: if '"' in sentence: quotes.append(sentence) return quotes def count_tokens(text): tokens = nltk.tokenize.word_tokenize(text) return len(tokens) def extract_pdf_text(file_path): with open(file_path, "rb") as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() return text def analyze_and_complete(file_paths): results = [] for file_path in file_paths: if file_path.endswith(".pdf"): text = extract_pdf_text(file_path) else: with open(file_path, "r", encoding="utf-8") as file: text = file.read() output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/سيناريوهات/ليالي ألف ليلة" filename_prefix = os.path.splitext(os.path.basename(file_path))[0] spacy_entities = spacy_ner_analysis(text) sentences = nltk_extract_sentences(text) quotes = nltk_extract_quotes(text) token_count = count_tokens(text) results.append((str(spacy_entities), "\n".join(sentences), "\n".join(quotes), str(token_count))) return results # Define the Gradio interface interface = gr.Interface( fn=analyze_and_complete, inputs=gr.File(file_count="multiple", type="filepath"), outputs=["text", "text", "text", "text"], title="Movie Script Analyzer and Completer", description="Upload text, PDF, or DOCX files to analyze and complete the movie script." ) if __name__ == "__main__": interface.launch()