Spaces:

mohamedrady
/

clockwork-temptation

Runtime error

App Files Files Community

clockwork-temptation / app.py

mohamedrady

Update app.py

6969f60 verified 12 months ago

raw

history blame

2.52 kB

	import os
	import spacy
	import nltk
	import torch
	from transformers import pipeline
	import PyPDF2
	import gradio as gr

	# Download and initialize required tools
	spacy.cli.download("en_core_web_sm")
	nlp = spacy.load("en_core_web_sm")
	nltk.download('punkt')

	# Check if GPU is available and use it
	device = 0 if torch.cuda.is_available() else -1
	analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)

	def spacy_ner_analysis(text):
	doc = nlp(text)
	entities = [(ent.text, ent.label_) for ent in doc.ents]
	return entities

	def nltk_extract_sentences(text):
	sentences = nltk.tokenize.sent_tokenize(text)
	return sentences

	def nltk_extract_quotes(text):
	quotes = []
	sentences = nltk.tokenize.sent_tokenize(text)
	for sentence in sentences:
	if '"' in sentence:
	quotes.append(sentence)
	return quotes

	def count_tokens(text):
	tokens = nltk.tokenize.word_tokenize(text)
	return len(tokens)

	def extract_pdf_text(file_path):
	with open(file_path, "rb") as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text()
	return text

	def analyze_and_complete(file_paths):
	results = []
	for file_path in file_paths:
	if file_path.endswith(".pdf"):
	text = extract_pdf_text(file_path)
	else:
	with open(file_path, "r", encoding="utf-8") as file:
	text = file.read()

	output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/سيناريوهات/ليالي ألف ليلة"
	filename_prefix = os.path.splitext(os.path.basename(file_path))[0]

	spacy_entities = spacy_ner_analysis(text)
	sentences = nltk_extract_sentences(text)
	quotes = nltk_extract_quotes(text)
	token_count = count_tokens(text)

	results.append((str(spacy_entities), "\n".join(sentences), "\n".join(quotes), str(token_count)))
	return results

	# Define the Gradio interface
	interface = gr.Interface(
	fn=analyze_and_complete,
	inputs=gr.File(file_count="multiple", type="filepath"),
	outputs=["text", "text", "text", "text"],
	title="Movie Script Analyzer and Completer",
	description="Upload text, PDF, or DOCX files to analyze and complete the movie script."
	)

	if __name__ == "__main__":
	interface.launch()