File size: 2,631 Bytes
26b5631
 
 
 
00e7d0c
26b5631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00e7d0c
 
 
 
 
 
 
26b5631
 
 
 
 
 
00e7d0c
 
 
 
26b5631
 
 
 
 
 
 
 
 
9e582dd
26b5631
9e582dd
26b5631
 
 
00e7d0c
 
 
 
 
 
 
 
 
9e582dd
26b5631
00e7d0c
 
 
26b5631
 
 
 
00e7d0c
 
26b5631
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import spacy
import os
import shutil
import gradio as gr
import subprocess
from booknlp.booknlp import BookNLP
from spacy.cli import download

# Ensure Spacy model is downloaded
def ensure_spacy_model():
    try:
        spacy.load("en_core_web_sm")
    except OSError:
        download("en_core_web_sm")

# Initialize Spacy model
ensure_spacy_model()

# Initialize BookNLP model
model_params = {
    "pipeline": "entity,quote,supersense,event,coref", 
    "model": "big"
}
booknlp = BookNLP("en", model_params)

# Define function to convert ebook to txt using Calibre
def convert_to_txt(input_file):
    output_txt = os.path.splitext(input_file)[0] + ".txt"
    if not os.path.exists(output_txt):
        subprocess.run(["ebook-convert", input_file, output_txt], check=True)
    return output_txt

# Define function to process file
def process_book(file):
    input_file = file.name
    output_dir = "output_dir/booknlp_output/"
    book_id = os.path.splitext(os.path.basename(input_file))[0]
    
    # Check if the file is already a .txt file
    if not input_file.endswith(".txt"):
        input_file = convert_to_txt(input_file)

    # Create output directory if it doesn't exist
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)

    # Run BookNLP
    booknlp.process(input_file, output_dir, book_id)
    
    # Zip the output folder
    zip_file = f"{output_dir}/{book_id}_output.zip"
    shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir)
    return zip_file

# Gradio Interface
def gradio_interface():
    # Define supported file formats
    supported_formats = [
        '.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm', 
        '.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit', 
        '.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb', 
        '.rtf', '.snb', '.tcr', '.txt', '.txtz'
    ]

    file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)")
    file_output = gr.File(label="Download the output files")
    
    # Show supported formats in the description
    description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first."

    gr.Interface(
        fn=process_book, 
        inputs=file_input, 
        outputs=file_output, 
        title="BookNLP Processor with Ebook Support",
        description=description
    ).launch()

if __name__ == "__main__":
    gradio_interface()