Spaces:

mohamedrady
/

clockwork-temptation

Runtime error

App Files Files Community

clockwork-temptation / app.py

mohamedrady

Create app.py

4ec5ed1 verified 12 months ago

raw

history blame

3.49 kB

	import os
	import spacy
	import nltk
	import torch
	from transformers import pipeline
	import PyPDF2
	import gradio as gr

	# Initialize required tools
	nlp = spacy.load("en_core_web_sm")
	nltk.download('punkt')

	# Check if GPU is available and use it
	device = 0 if torch.cuda.is_available() else -1
	analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)

	# Define functions for text analysis
	def spacy_ner_analysis(text):
	doc = nlp(text)
	entities = [(ent.text, ent.label_) for ent in doc.ents]
	return entities

	def nltk_extract_sentences(text):
	sentences = nltk.tokenize.sent_tokenize(text)
	return sentences

	def nltk_extract_quotes(text):
	quotes = []
	sentences = nltk.tokenize.sent_tokenize(text)
	for sentence in sentences:
	if '"' in sentence:
	quotes.append(sentence)
	return quotes

	def count_tokens(text):
	tokens = nltk.tokenize.word_tokenize(text)
	return len(tokens)

	def extract_pdf_text(file_path):
	with open(file_path, "rb") as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text()
	return text

	def analyze_text(text):
	try:
	result = analyzer(text)
	return result
	except Exception as e:
	print(f"Error analyzing text: {str(e)}")
	return ""

	def process_text(text, output_directory, filename_prefix):
	spacy_entities = spacy_ner_analysis(text)
	sentences = nltk_extract_sentences(text)
	quotes = nltk_extract_quotes(text)
	token_count = count_tokens(text)

	# Save results to files
	with open(os.path.join(output_directory, f"{filename_prefix}_spacy_entities.txt"), "w", encoding="utf-8") as file:
	file.write(str(spacy_entities))

	with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
	file.write("\n".join(sentences))

	with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
	file.write("\n".join(quotes))

	with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
	file.write(str(token_count))

	def analyze_and_complete(file_path):
	if file_path.endswith(".pdf"):
	text = extract_pdf_text(file_path)
	else:
	with open(file_path, "r", encoding="utf-8") as file:
	text = file.read()

	output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/سيناريوهات/ليالي ألف ليلة"
	filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
	process_text(text, output_directory, filename_prefix)

	spacy_entities = spacy_ner_analysis(text)
	sentences = nltk_extract_sentences(text)
	quotes = nltk_extract_quotes(text)
	token_count = count_tokens(text)

	return str(spacy_entities), "\n".join(sentences), "\n".join(quotes), str(token_count)

	# Define the Gradio interface
	interface = gr.Interface(
	fn=analyze_and_complete,
	inputs=gr.File(file_count="single", type="filepath"),
	outputs=["text", "text", "text", "text"],
	title="Movie Script Analyzer and Completer",
	description="Upload a text, PDF, or DOCX file to analyze and complete the movie script."
	)

	if __name__ == "__main__":
	interface.launch()