BookNLP-Demo / app.py
drewThomasson's picture
added article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)."
4f6be96 verified
import os
import gradio as gr
import subprocess
# Define function to convert ebook to txt using Calibre
def convert_to_txt(input_file):
output_txt = os.path.splitext(input_file)[0] + ".txt"
if not os.path.exists(output_txt):
subprocess.run(["ebook-convert", input_file, output_txt], check=True)
return output_txt
# Define function to process file
def process_book(file):
import shutil
import spacy
from booknlp.booknlp import BookNLP
from spacy.cli import download
#This will download the booknlp files using my huggingface backup
import download_missing_booknlp_models
input_file = file.name
output_dir = "output_dir/booknlp_output/"
book_id = os.path.splitext(os.path.basename(input_file))[0]
# Ensure Spacy model is downloaded
def ensure_spacy_model():
try:
spacy.load("en_core_web_sm")
except OSError:
download("en_core_web_sm")
# Initialize Spacy model and BookNLP
ensure_spacy_model()
model_params = {
"pipeline": "entity,quote,supersense,event,coref",
"model": "big"
}
booknlp = BookNLP("en", model_params)
# Check if the file is already a .txt file
if not input_file.endswith(".txt"):
input_file = convert_to_txt(input_file)
# Create output directory if it doesn't exist
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir)
# Run BookNLP
booknlp.process(input_file, output_dir, book_id)
# Zip the output folder
zip_file = f"{output_dir}/{book_id}_output.zip"
shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir)
return zip_file
# Gradio Interface
def gradio_interface():
# Define supported file formats
supported_formats = [
'.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm',
'.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit',
'.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb',
'.rtf', '.snb', '.tcr', '.txt', '.txtz'
]
file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)")
file_output = gr.File(label="Download the output files")
# Show supported formats in the description
description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first."
gr.Interface(
fn=process_book,
inputs=file_input,
outputs=file_output,
title="BookNLP Processor with Ebook Support",
description=description,
article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)."
).launch()
if __name__ == "__main__":
gradio_interface()