drewThomasson commited on
Commit
fe3d0f6
·
verified ·
1 Parent(s): c010f6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -17
app.py CHANGED
@@ -6,23 +6,6 @@ import subprocess
6
  from booknlp.booknlp import BookNLP
7
  from spacy.cli import download
8
 
9
- # Ensure Spacy model is downloaded
10
- def ensure_spacy_model():
11
- try:
12
- spacy.load("en_core_web_sm")
13
- except OSError:
14
- download("en_core_web_sm")
15
-
16
- # Initialize Spacy model
17
- ensure_spacy_model()
18
-
19
- # Initialize BookNLP model
20
- model_params = {
21
- "pipeline": "entity,quote,supersense,event,coref",
22
- "model": "big"
23
- }
24
- booknlp = BookNLP("en", model_params)
25
-
26
  # Define function to convert ebook to txt using Calibre
27
  def convert_to_txt(input_file):
28
  output_txt = os.path.splitext(input_file)[0] + ".txt"
@@ -36,6 +19,21 @@ def process_book(file):
36
  output_dir = "output_dir/booknlp_output/"
37
  book_id = os.path.splitext(os.path.basename(input_file))[0]
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Check if the file is already a .txt file
40
  if not input_file.endswith(".txt"):
41
  input_file = convert_to_txt(input_file)
 
6
  from booknlp.booknlp import BookNLP
7
  from spacy.cli import download
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Define function to convert ebook to txt using Calibre
10
  def convert_to_txt(input_file):
11
  output_txt = os.path.splitext(input_file)[0] + ".txt"
 
19
  output_dir = "output_dir/booknlp_output/"
20
  book_id = os.path.splitext(os.path.basename(input_file))[0]
21
 
22
+ # Ensure Spacy model is downloaded
23
+ def ensure_spacy_model():
24
+ try:
25
+ spacy.load("en_core_web_sm")
26
+ except OSError:
27
+ download("en_core_web_sm")
28
+
29
+ # Initialize Spacy model and BookNLP
30
+ ensure_spacy_model()
31
+ model_params = {
32
+ "pipeline": "entity,quote,supersense,event,coref",
33
+ "model": "big"
34
+ }
35
+ booknlp = BookNLP("en", model_params)
36
+
37
  # Check if the file is already a .txt file
38
  if not input_file.endswith(".txt"):
39
  input_file = convert_to_txt(input_file)