Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,15 @@ import spacy
|
|
2 |
import os
|
3 |
import shutil
|
4 |
import gradio as gr
|
|
|
5 |
from booknlp.booknlp import BookNLP
|
6 |
from spacy.cli import download
|
7 |
|
8 |
# Ensure Spacy model is downloaded
|
9 |
def ensure_spacy_model():
|
10 |
try:
|
11 |
-
# Try to load the model to see if it's already installed
|
12 |
spacy.load("en_core_web_sm")
|
13 |
except OSError:
|
14 |
-
# If not installed, download the model
|
15 |
download("en_core_web_sm")
|
16 |
|
17 |
# Initialize Spacy model
|
@@ -24,12 +23,23 @@ model_params = {
|
|
24 |
}
|
25 |
booknlp = BookNLP("en", model_params)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Define function to process file
|
28 |
def process_book(file):
|
29 |
input_file = file.name
|
30 |
output_dir = "output_dir/booknlp_output/"
|
31 |
book_id = os.path.splitext(os.path.basename(input_file))[0]
|
32 |
|
|
|
|
|
|
|
|
|
33 |
# Create output directory if it doesn't exist
|
34 |
if os.path.exists(output_dir):
|
35 |
shutil.rmtree(output_dir)
|
@@ -45,15 +55,26 @@ def process_book(file):
|
|
45 |
|
46 |
# Gradio Interface
|
47 |
def gradio_interface():
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
file_output = gr.File(label="Download the output files")
|
50 |
|
|
|
|
|
|
|
51 |
gr.Interface(
|
52 |
fn=process_book,
|
53 |
inputs=file_input,
|
54 |
outputs=file_output,
|
55 |
-
title="BookNLP Processor",
|
56 |
-
description=
|
57 |
).launch()
|
58 |
|
59 |
if __name__ == "__main__":
|
|
|
2 |
import os
|
3 |
import shutil
|
4 |
import gradio as gr
|
5 |
+
import subprocess
|
6 |
from booknlp.booknlp import BookNLP
|
7 |
from spacy.cli import download
|
8 |
|
9 |
# Ensure Spacy model is downloaded
|
10 |
def ensure_spacy_model():
|
11 |
try:
|
|
|
12 |
spacy.load("en_core_web_sm")
|
13 |
except OSError:
|
|
|
14 |
download("en_core_web_sm")
|
15 |
|
16 |
# Initialize Spacy model
|
|
|
23 |
}
|
24 |
booknlp = BookNLP("en", model_params)
|
25 |
|
26 |
+
# Define function to convert ebook to txt using Calibre
|
27 |
+
def convert_to_txt(input_file):
|
28 |
+
output_txt = os.path.splitext(input_file)[0] + ".txt"
|
29 |
+
if not os.path.exists(output_txt):
|
30 |
+
subprocess.run(["ebook-convert", input_file, output_txt], check=True)
|
31 |
+
return output_txt
|
32 |
+
|
33 |
# Define function to process file
|
34 |
def process_book(file):
|
35 |
input_file = file.name
|
36 |
output_dir = "output_dir/booknlp_output/"
|
37 |
book_id = os.path.splitext(os.path.basename(input_file))[0]
|
38 |
|
39 |
+
# Check if the file is already a .txt file
|
40 |
+
if not input_file.endswith(".txt"):
|
41 |
+
input_file = convert_to_txt(input_file)
|
42 |
+
|
43 |
# Create output directory if it doesn't exist
|
44 |
if os.path.exists(output_dir):
|
45 |
shutil.rmtree(output_dir)
|
|
|
55 |
|
56 |
# Gradio Interface
|
57 |
def gradio_interface():
|
58 |
+
# Define supported file formats
|
59 |
+
supported_formats = [
|
60 |
+
'.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm',
|
61 |
+
'.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit',
|
62 |
+
'.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb',
|
63 |
+
'.rtf', '.snb', '.tcr', '.txt', '.txtz'
|
64 |
+
]
|
65 |
+
|
66 |
+
file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)")
|
67 |
file_output = gr.File(label="Download the output files")
|
68 |
|
69 |
+
# Show supported formats in the description
|
70 |
+
description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first."
|
71 |
+
|
72 |
gr.Interface(
|
73 |
fn=process_book,
|
74 |
inputs=file_input,
|
75 |
outputs=file_output,
|
76 |
+
title="BookNLP Processor with Ebook Support",
|
77 |
+
description=description
|
78 |
).launch()
|
79 |
|
80 |
if __name__ == "__main__":
|