File size: 2,522 Bytes
4ec5ed1
 
 
 
 
 
 
 
c3d42ed
 
4ec5ed1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b4a700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ec5ed1
 
 
 
7b4a700
6969f60
4ec5ed1
7b4a700
4ec5ed1
 
 
c3d42ed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import spacy
import nltk
import torch
from transformers import pipeline
import PyPDF2
import gradio as gr

# Download and initialize required tools
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
nltk.download('punkt')

# Check if GPU is available and use it
device = 0 if torch.cuda.is_available() else -1
analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)

def spacy_ner_analysis(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

def nltk_extract_sentences(text):
    sentences = nltk.tokenize.sent_tokenize(text)
    return sentences

def nltk_extract_quotes(text):
    quotes = []
    sentences = nltk.tokenize.sent_tokenize(text)
    for sentence in sentences:
        if '"' in sentence:
            quotes.append(sentence)
    return quotes

def count_tokens(text):
    tokens = nltk.tokenize.word_tokenize(text)
    return len(tokens)

def extract_pdf_text(file_path):
    with open(file_path, "rb") as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
    return text

def analyze_and_complete(file_paths):
    results = []
    for file_path in file_paths:
        if file_path.endswith(".pdf"):
            text = extract_pdf_text(file_path)
        else:
            with open(file_path, "r", encoding="utf-8") as file:
                text = file.read()
        
        output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/ุณูŠู†ุงุฑูŠูˆู‡ุงุช/ู„ูŠุงู„ูŠ ุงู”ู„ู ู„ูŠู„ุฉ"
        filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
        
        spacy_entities = spacy_ner_analysis(text)
        sentences = nltk_extract_sentences(text)
        quotes = nltk_extract_quotes(text)
        token_count = count_tokens(text)
        
        results.append((str(spacy_entities), "\n".join(sentences), "\n".join(quotes), str(token_count)))
    return results

# Define the Gradio interface
interface = gr.Interface(
    fn=analyze_and_complete,
    inputs=gr.File(file_count="multiple", type="filepath"),
    outputs=["text", "text", "text", "text"],
    title="Movie Script Analyzer and Completer",
    description="Upload text, PDF, or DOCX files to analyze and complete the movie script."
)

if __name__ == "__main__":
    interface.launch()