drewThomasson commited on
Commit
00e7d0c
·
verified ·
1 Parent(s): 9e582dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -5
app.py CHANGED
@@ -2,16 +2,15 @@ import spacy
2
  import os
3
  import shutil
4
  import gradio as gr
 
5
  from booknlp.booknlp import BookNLP
6
  from spacy.cli import download
7
 
8
  # Ensure Spacy model is downloaded
9
  def ensure_spacy_model():
10
  try:
11
- # Try to load the model to see if it's already installed
12
  spacy.load("en_core_web_sm")
13
  except OSError:
14
- # If not installed, download the model
15
  download("en_core_web_sm")
16
 
17
  # Initialize Spacy model
@@ -24,12 +23,23 @@ model_params = {
24
  }
25
  booknlp = BookNLP("en", model_params)
26
 
 
 
 
 
 
 
 
27
  # Define function to process file
28
  def process_book(file):
29
  input_file = file.name
30
  output_dir = "output_dir/booknlp_output/"
31
  book_id = os.path.splitext(os.path.basename(input_file))[0]
32
 
 
 
 
 
33
  # Create output directory if it doesn't exist
34
  if os.path.exists(output_dir):
35
  shutil.rmtree(output_dir)
@@ -45,15 +55,26 @@ def process_book(file):
45
 
46
  # Gradio Interface
47
  def gradio_interface():
48
- file_input = gr.File(file_types=['.txt'], label="Upload a .txt file")
 
 
 
 
 
 
 
 
49
  file_output = gr.File(label="Download the output files")
50
 
 
 
 
51
  gr.Interface(
52
  fn=process_book,
53
  inputs=file_input,
54
  outputs=file_output,
55
- title="BookNLP Processor",
56
- description="Upload a .txt book and process it using BookNLP. Download the processed output files."
57
  ).launch()
58
 
59
  if __name__ == "__main__":
 
2
  import os
3
  import shutil
4
  import gradio as gr
5
+ import subprocess
6
  from booknlp.booknlp import BookNLP
7
  from spacy.cli import download
8
 
9
  # Ensure Spacy model is downloaded
10
  def ensure_spacy_model():
11
  try:
 
12
  spacy.load("en_core_web_sm")
13
  except OSError:
 
14
  download("en_core_web_sm")
15
 
16
  # Initialize Spacy model
 
23
  }
24
  booknlp = BookNLP("en", model_params)
25
 
26
+ # Define function to convert ebook to txt using Calibre
27
+ def convert_to_txt(input_file):
28
+ output_txt = os.path.splitext(input_file)[0] + ".txt"
29
+ if not os.path.exists(output_txt):
30
+ subprocess.run(["ebook-convert", input_file, output_txt], check=True)
31
+ return output_txt
32
+
33
  # Define function to process file
34
  def process_book(file):
35
  input_file = file.name
36
  output_dir = "output_dir/booknlp_output/"
37
  book_id = os.path.splitext(os.path.basename(input_file))[0]
38
 
39
+ # Check if the file is already a .txt file
40
+ if not input_file.endswith(".txt"):
41
+ input_file = convert_to_txt(input_file)
42
+
43
  # Create output directory if it doesn't exist
44
  if os.path.exists(output_dir):
45
  shutil.rmtree(output_dir)
 
55
 
56
  # Gradio Interface
57
  def gradio_interface():
58
+ # Define supported file formats
59
+ supported_formats = [
60
+ '.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm',
61
+ '.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit',
62
+ '.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb',
63
+ '.rtf', '.snb', '.tcr', '.txt', '.txtz'
64
+ ]
65
+
66
+ file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)")
67
  file_output = gr.File(label="Download the output files")
68
 
69
+ # Show supported formats in the description
70
+ description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first."
71
+
72
  gr.Interface(
73
  fn=process_book,
74
  inputs=file_input,
75
  outputs=file_output,
76
+ title="BookNLP Processor with Ebook Support",
77
+ description=description
78
  ).launch()
79
 
80
  if __name__ == "__main__":