import os import gradio as gr import subprocess # Define function to convert ebook to txt using Calibre def convert_to_txt(input_file): output_txt = os.path.splitext(input_file)[0] + ".txt" if not os.path.exists(output_txt): subprocess.run(["ebook-convert", input_file, output_txt], check=True) return output_txt # Define function to process file def process_book(file): import shutil import spacy from booknlp.booknlp import BookNLP from spacy.cli import download #This will download the booknlp files using my huggingface backup import download_missing_booknlp_models input_file = file.name output_dir = "output_dir/booknlp_output/" book_id = os.path.splitext(os.path.basename(input_file))[0] # Ensure Spacy model is downloaded def ensure_spacy_model(): try: spacy.load("en_core_web_sm") except OSError: download("en_core_web_sm") # Initialize Spacy model and BookNLP ensure_spacy_model() model_params = { "pipeline": "entity,quote,supersense,event,coref", "model": "big" } booknlp = BookNLP("en", model_params) # Check if the file is already a .txt file if not input_file.endswith(".txt"): input_file = convert_to_txt(input_file) # Create output directory if it doesn't exist if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) # Run BookNLP booknlp.process(input_file, output_dir, book_id) # Zip the output folder zip_file = f"{output_dir}/{book_id}_output.zip" shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir) return zip_file # Gradio Interface def gradio_interface(): # Define supported file formats supported_formats = [ '.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm', '.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit', '.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb', '.rtf', '.snb', '.tcr', '.txt', '.txtz' ] file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)") file_output = gr.File(label="Download the output files") # Show supported formats in the description description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first." gr.Interface( fn=process_book, inputs=file_input, outputs=file_output, title="BookNLP Processor with Ebook Support", description=description, article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)." ).launch() if __name__ == "__main__": gradio_interface()