drewThomasson commited on
Commit
26b5631
·
verified ·
1 Parent(s): 38e3ab9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import os
3
+ import shutil
4
+ import gradio as gr
5
+ from booknlp.booknlp import BookNLP
6
+ from spacy.cli import download
7
+
8
+ # Ensure Spacy model is downloaded
9
+ def ensure_spacy_model():
10
+ try:
11
+ # Try to load the model to see if it's already installed
12
+ spacy.load("en_core_web_sm")
13
+ except OSError:
14
+ # If not installed, download the model
15
+ download("en_core_web_sm")
16
+
17
+ # Initialize Spacy model
18
+ ensure_spacy_model()
19
+
20
+ # Initialize BookNLP model
21
+ model_params = {
22
+ "pipeline": "entity,quote,supersense,event,coref",
23
+ "model": "big"
24
+ }
25
+ booknlp = BookNLP("en", model_params)
26
+
27
+ # Define function to process file
28
+ def process_book(file):
29
+ input_file = file.name
30
+ output_dir = "output_dir/booknlp_output/"
31
+ book_id = os.path.splitext(os.path.basename(input_file))[0]
32
+
33
+ # Create output directory if it doesn't exist
34
+ if os.path.exists(output_dir):
35
+ shutil.rmtree(output_dir)
36
+ os.makedirs(output_dir)
37
+
38
+ # Run BookNLP
39
+ booknlp.process(input_file, output_dir, book_id)
40
+
41
+ # Zip the output folder
42
+ shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir)
43
+ return f"{output_dir}/{book_id}_output.zip"
44
+
45
+ # Gradio Interface
46
+ def gradio_interface():
47
+ file_input = gr.inputs.File(file_types=['.txt'], label="Upload a .txt file")
48
+ file_output = gr.outputs.File(label="Download the output files")
49
+
50
+ gr.Interface(
51
+ fn=process_book,
52
+ inputs=file_input,
53
+ outputs=file_output,
54
+ title="BookNLP Processor",
55
+ description="Upload a .txt book and process it using BookNLP. Download the processed output files."
56
+ ).launch()
57
+
58
+ if __name__ == "__main__":
59
+ gradio_interface()