Spaces:
Runtime error
Runtime error

added article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)."
4f6be96
verified
import os | |
import gradio as gr | |
import subprocess | |
# Define function to convert ebook to txt using Calibre | |
def convert_to_txt(input_file): | |
output_txt = os.path.splitext(input_file)[0] + ".txt" | |
if not os.path.exists(output_txt): | |
subprocess.run(["ebook-convert", input_file, output_txt], check=True) | |
return output_txt | |
# Define function to process file | |
def process_book(file): | |
import shutil | |
import spacy | |
from booknlp.booknlp import BookNLP | |
from spacy.cli import download | |
#This will download the booknlp files using my huggingface backup | |
import download_missing_booknlp_models | |
input_file = file.name | |
output_dir = "output_dir/booknlp_output/" | |
book_id = os.path.splitext(os.path.basename(input_file))[0] | |
# Ensure Spacy model is downloaded | |
def ensure_spacy_model(): | |
try: | |
spacy.load("en_core_web_sm") | |
except OSError: | |
download("en_core_web_sm") | |
# Initialize Spacy model and BookNLP | |
ensure_spacy_model() | |
model_params = { | |
"pipeline": "entity,quote,supersense,event,coref", | |
"model": "big" | |
} | |
booknlp = BookNLP("en", model_params) | |
# Check if the file is already a .txt file | |
if not input_file.endswith(".txt"): | |
input_file = convert_to_txt(input_file) | |
# Create output directory if it doesn't exist | |
if os.path.exists(output_dir): | |
shutil.rmtree(output_dir) | |
os.makedirs(output_dir) | |
# Run BookNLP | |
booknlp.process(input_file, output_dir, book_id) | |
# Zip the output folder | |
zip_file = f"{output_dir}/{book_id}_output.zip" | |
shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir) | |
return zip_file | |
# Gradio Interface | |
def gradio_interface(): | |
# Define supported file formats | |
supported_formats = [ | |
'.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm', | |
'.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit', | |
'.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb', | |
'.rtf', '.snb', '.tcr', '.txt', '.txtz' | |
] | |
file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)") | |
file_output = gr.File(label="Download the output files") | |
# Show supported formats in the description | |
description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first." | |
gr.Interface( | |
fn=process_book, | |
inputs=file_input, | |
outputs=file_output, | |
title="BookNLP Processor with Ebook Support", | |
description=description, | |
article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)." | |
).launch() | |
if __name__ == "__main__": | |
gradio_interface() | |