File size: 1,808 Bytes
26b5631
 
 
 
 
 
 
e18eff0
 
 
 
 
26b5631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import spacy
import os
import shutil
import gradio as gr
from booknlp.booknlp import BookNLP
from spacy.cli import download


#This will download the booknlp files using my huggingface backup     
import download_missing_booknlp_models 


# Ensure Spacy model is downloaded
def ensure_spacy_model():
    try:
        # Try to load the model to see if it's already installed
        spacy.load("en_core_web_sm")
    except OSError:
        # If not installed, download the model
        download("en_core_web_sm")

# Initialize Spacy model
ensure_spacy_model()

# Initialize BookNLP model
model_params = {
    "pipeline": "entity,quote,supersense,event,coref", 
    "model": "big"
}
booknlp = BookNLP("en", model_params)

# Define function to process file
def process_book(file):
    input_file = file.name
    output_dir = "output_dir/booknlp_output/"
    book_id = os.path.splitext(os.path.basename(input_file))[0]
    
    # Create output directory if it doesn't exist
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)

    # Run BookNLP
    booknlp.process(input_file, output_dir, book_id)
    
    # Zip the output folder
    shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir)
    return f"{output_dir}/{book_id}_output.zip"

# Gradio Interface
def gradio_interface():
    file_input = gr.inputs.File(file_types=['.txt'], label="Upload a .txt file")
    file_output = gr.outputs.File(label="Download the output files")
    
    gr.Interface(
        fn=process_book, 
        inputs=file_input, 
        outputs=file_output, 
        title="BookNLP Processor",
        description="Upload a .txt book and process it using BookNLP. Download the processed output files."
    ).launch()

if __name__ == "__main__":
    gradio_interface()