Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
import warnings
|
2 |
warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
|
|
|
|
|
3 |
|
4 |
import fitz
|
5 |
import gradio as gr
|
@@ -20,7 +23,7 @@ llm = HuggingFaceEndpoint(
|
|
20 |
llm_engine_hf = ChatHuggingFace(llm=llm)
|
21 |
|
22 |
def read_pdf(file_path):
|
23 |
-
|
24 |
try:
|
25 |
pdf_document = fitz.open(file_path)
|
26 |
text = ""
|
@@ -28,15 +31,28 @@ def read_pdf(file_path):
|
|
28 |
page = pdf_document[page_num]
|
29 |
text += page.get_text()
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
return text
|
|
|
32 |
except Exception as e:
|
33 |
-
|
|
|
|
|
34 |
|
35 |
def read_txt(file_path):
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
def summarize(file, n_words):
|
42 |
global llm
|
@@ -47,11 +63,8 @@ def summarize(file, n_words):
|
|
47 |
else:
|
48 |
text = read_txt(file_path)
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
print(f"Slicing the first {CONTEXT_WINDOW} characters")
|
53 |
-
text = text[:CONTEXT_WINDOW]
|
54 |
-
|
55 |
lang = detect(text[:CONTEXT_WINDOW])
|
56 |
template_translate = '''
|
57 |
Please carefully read the following document:
|
@@ -74,6 +87,8 @@ The summary should be in {LANG} language.
|
|
74 |
formatted_prompt = prompt_summarize.format(TEXT=chunk, LANG=lang)
|
75 |
summary = llm.invoke(formatted_prompt)
|
76 |
summaries.append(summary)
|
|
|
|
|
77 |
|
78 |
final_summary = "\n\n".join(summaries)
|
79 |
return final_summary
|
|
|
1 |
import warnings
|
2 |
warnings.simplefilter(action='ignore', category=FutureWarning)
|
3 |
+
import logging
|
4 |
+
logging.basicConfig(level=logging.INFO)
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
|
7 |
import fitz
|
8 |
import gradio as gr
|
|
|
23 |
llm_engine_hf = ChatHuggingFace(llm=llm)
|
24 |
|
25 |
def read_pdf(file_path):
|
26 |
+
logger.info("Reading a PDF file")
|
27 |
try:
|
28 |
pdf_document = fitz.open(file_path)
|
29 |
text = ""
|
|
|
31 |
page = pdf_document[page_num]
|
32 |
text += page.get_text()
|
33 |
|
34 |
+
if not text.strip():
|
35 |
+
message = "PDF contains no text. It may be due to the PDF being password-protected, collapsed, or full of images."
|
36 |
+
logger.info(message)
|
37 |
+
return message
|
38 |
+
|
39 |
return text
|
40 |
+
|
41 |
except Exception as e:
|
42 |
+
error_message = f"Error reading PDF file: {e}"
|
43 |
+
logger.error(error_message)
|
44 |
+
return error_message
|
45 |
|
46 |
def read_txt(file_path):
|
47 |
+
logger.info("Reading a TXT file")
|
48 |
+
try:
|
49 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
50 |
+
text = f.read()
|
51 |
+
return text
|
52 |
+
except Exception as e:
|
53 |
+
error_message = f"Error reading TXT file: {e}"
|
54 |
+
logger.error(error_message)
|
55 |
+
return error_message
|
56 |
|
57 |
def summarize(file, n_words):
|
58 |
global llm
|
|
|
63 |
else:
|
64 |
text = read_txt(file_path)
|
65 |
|
66 |
+
logger.info("Length of text is %d", len(text))
|
67 |
+
|
|
|
|
|
|
|
68 |
lang = detect(text[:CONTEXT_WINDOW])
|
69 |
template_translate = '''
|
70 |
Please carefully read the following document:
|
|
|
87 |
formatted_prompt = prompt_summarize.format(TEXT=chunk, LANG=lang)
|
88 |
summary = llm.invoke(formatted_prompt)
|
89 |
summaries.append(summary)
|
90 |
+
|
91 |
+
logger.info(f"Chunked into {len(summaries)}.")
|
92 |
|
93 |
final_summary = "\n\n".join(summaries)
|
94 |
return final_summary
|