Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -35,10 +35,7 @@ class DocumentState:
|
|
35 |
doc_state = DocumentState()
|
36 |
|
37 |
def process_pdf_file(file_path):
|
38 |
-
"""
|
39 |
-
Convert PDF to images and extract text using PyMuPDF with improved error handling
|
40 |
-
and image quality settings.
|
41 |
-
"""
|
42 |
try:
|
43 |
doc = fitz.open(file_path)
|
44 |
images = []
|
@@ -47,38 +44,24 @@ def process_pdf_file(file_path):
|
|
47 |
for page_num in range(doc.page_count):
|
48 |
try:
|
49 |
page = doc[page_num]
|
50 |
-
|
51 |
-
# Extract text with better formatting
|
52 |
page_text = page.get_text("text")
|
53 |
-
if page_text.strip():
|
54 |
text += f"Page {page_num + 1}:\n{page_text}\n\n"
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
# Resize if image is too large (keeping aspect ratio)
|
71 |
-
max_size = 1600
|
72 |
-
if max(img.size) > max_size:
|
73 |
-
ratio = max_size / max(img.size)
|
74 |
-
new_size = tuple(int(dim * ratio) for dim in img.size)
|
75 |
-
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
76 |
-
|
77 |
-
images.append(img)
|
78 |
-
|
79 |
-
except Exception as e:
|
80 |
-
logger.error(f"Error processing page {page_num} image: {str(e)}")
|
81 |
-
continue
|
82 |
|
83 |
except Exception as e:
|
84 |
logger.error(f"Error processing page {page_num}: {str(e)}")
|
@@ -95,28 +78,27 @@ def process_pdf_file(file_path):
|
|
95 |
logger.error(f"Error processing PDF file: {str(e)}")
|
96 |
raise
|
97 |
|
98 |
-
def
|
99 |
-
"""Process
|
100 |
try:
|
101 |
doc_state.clear()
|
102 |
|
103 |
-
if
|
104 |
-
|
105 |
-
else:
|
106 |
-
file_path = file
|
107 |
|
108 |
-
if
|
|
|
|
|
109 |
doc_state.doc_type = 'pdf'
|
110 |
try:
|
111 |
doc_state.current_doc_images, doc_state.current_doc_text = process_pdf_file(file_path)
|
112 |
return f"PDF processed successfully. Total pages: {len(doc_state.current_doc_images)}. You can now ask questions about the content."
|
113 |
except Exception as e:
|
114 |
-
return f"Error processing PDF: {str(e)}. Please try a different PDF file
|
115 |
else:
|
116 |
doc_state.doc_type = 'image'
|
117 |
try:
|
118 |
img = Image.open(file_path).convert("RGB")
|
119 |
-
# Resize if necessary
|
120 |
max_size = 1600
|
121 |
if max(img.size) > max_size:
|
122 |
ratio = max_size / max(img.size)
|
@@ -133,28 +115,13 @@ def process_file(file):
|
|
133 |
@spaces.GPU()
|
134 |
def bot_streaming(message, history, max_new_tokens=8192):
|
135 |
try:
|
136 |
-
txt = message["text"]
|
137 |
messages = []
|
138 |
|
139 |
-
# Process
|
140 |
-
if message.get("files") and len(message["files"]) > 0:
|
141 |
-
result = process_file(message["files"][0])
|
142 |
-
if "Error" in result:
|
143 |
-
yield result
|
144 |
-
return
|
145 |
-
|
146 |
-
# Process history with better error handling
|
147 |
for i, msg in enumerate(history):
|
148 |
try:
|
149 |
-
|
150 |
-
|
151 |
-
if "files" in msg[0] and len(msg[0]["files"]) > 0:
|
152 |
-
user_content.append({"type": "image"})
|
153 |
-
messages.append({"role": "user", "content": user_content})
|
154 |
-
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
|
155 |
-
elif isinstance(msg[0], str):
|
156 |
-
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
|
157 |
-
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
|
158 |
except Exception as e:
|
159 |
logger.error(f"Error processing history message {i}: {str(e)}")
|
160 |
continue
|
@@ -162,10 +129,10 @@ def bot_streaming(message, history, max_new_tokens=8192):
|
|
162 |
# Include document context
|
163 |
if doc_state.current_doc_images:
|
164 |
context = f"\nDocument context:\n{doc_state.current_doc_text}" if doc_state.current_doc_text else ""
|
165 |
-
current_msg = f"{
|
166 |
messages.append({"role": "user", "content": [{"type": "text", "text": current_msg}, {"type": "image"}]})
|
167 |
else:
|
168 |
-
messages.append({"role": "user", "content": [{"type": "text", "text":
|
169 |
|
170 |
# Process inputs
|
171 |
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
|
@@ -210,17 +177,21 @@ with gr.Blocks() as demo:
|
|
210 |
gr.Markdown("# Document Analyzer with Chat Support")
|
211 |
gr.Markdown("Upload a PDF or image and chat about its contents. For PDFs, all pages will be processed for visual analysis.")
|
212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
chatbot = gr.ChatInterface(
|
214 |
fn=bot_streaming,
|
215 |
title="Document Chat",
|
216 |
-
examples=[
|
217 |
-
[{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]}, 200],
|
218 |
-
[{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]}, 250],
|
219 |
-
[{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]}, 250],
|
220 |
-
[{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]}, 250],
|
221 |
-
[{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]}, 250],
|
222 |
-
],
|
223 |
-
textbox=gr.MultimodalTextbox(),
|
224 |
additional_inputs=[
|
225 |
gr.Slider(
|
226 |
minimum=10,
|
@@ -230,16 +201,20 @@ with gr.Blocks() as demo:
|
|
230 |
label="Maximum number of new tokens to generate",
|
231 |
)
|
232 |
],
|
233 |
-
cache_examples=False,
|
234 |
stop_btn="Stop Generation",
|
235 |
-
fill_height=True
|
236 |
-
multimodal=True
|
237 |
)
|
238 |
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
241 |
|
242 |
-
|
|
|
|
|
|
|
243 |
|
244 |
# Launch the interface
|
245 |
demo.launch(debug=True)
|
|
|
35 |
doc_state = DocumentState()
|
36 |
|
37 |
def process_pdf_file(file_path):
|
38 |
+
"""Convert PDF to images and extract text using PyMuPDF."""
|
|
|
|
|
|
|
39 |
try:
|
40 |
doc = fitz.open(file_path)
|
41 |
images = []
|
|
|
44 |
for page_num in range(doc.page_count):
|
45 |
try:
|
46 |
page = doc[page_num]
|
|
|
|
|
47 |
page_text = page.get_text("text")
|
48 |
+
if page_text.strip():
|
49 |
text += f"Page {page_num + 1}:\n{page_text}\n\n"
|
50 |
|
51 |
+
zoom = 2
|
52 |
+
mat = fitz.Matrix(zoom, zoom)
|
53 |
+
pix = page.get_pixmap(matrix=mat, alpha=False)
|
54 |
+
img_data = pix.tobytes("png")
|
55 |
+
img = Image.open(io.BytesIO(img_data))
|
56 |
+
img = img.convert("RGB")
|
57 |
+
|
58 |
+
max_size = 1600
|
59 |
+
if max(img.size) > max_size:
|
60 |
+
ratio = max_size / max(img.size)
|
61 |
+
new_size = tuple(int(dim * ratio) for dim in img.size)
|
62 |
+
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
63 |
+
|
64 |
+
images.append(img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
except Exception as e:
|
67 |
logger.error(f"Error processing page {page_num}: {str(e)}")
|
|
|
78 |
logger.error(f"Error processing PDF file: {str(e)}")
|
79 |
raise
|
80 |
|
81 |
+
def process_uploaded_file(file):
|
82 |
+
"""Process uploaded file and update document state."""
|
83 |
try:
|
84 |
doc_state.clear()
|
85 |
|
86 |
+
if file is None:
|
87 |
+
return "No file uploaded. Please upload a file."
|
|
|
|
|
88 |
|
89 |
+
file_path = file.name if isinstance(file, FileData) else file
|
90 |
+
|
91 |
+
if file_path.lower().endswith('.pdf'):
|
92 |
doc_state.doc_type = 'pdf'
|
93 |
try:
|
94 |
doc_state.current_doc_images, doc_state.current_doc_text = process_pdf_file(file_path)
|
95 |
return f"PDF processed successfully. Total pages: {len(doc_state.current_doc_images)}. You can now ask questions about the content."
|
96 |
except Exception as e:
|
97 |
+
return f"Error processing PDF: {str(e)}. Please try a different PDF file."
|
98 |
else:
|
99 |
doc_state.doc_type = 'image'
|
100 |
try:
|
101 |
img = Image.open(file_path).convert("RGB")
|
|
|
102 |
max_size = 1600
|
103 |
if max(img.size) > max_size:
|
104 |
ratio = max_size / max(img.size)
|
|
|
115 |
@spaces.GPU()
|
116 |
def bot_streaming(message, history, max_new_tokens=8192):
|
117 |
try:
|
|
|
118 |
messages = []
|
119 |
|
120 |
+
# Process history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
for i, msg in enumerate(history):
|
122 |
try:
|
123 |
+
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
|
124 |
+
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
except Exception as e:
|
126 |
logger.error(f"Error processing history message {i}: {str(e)}")
|
127 |
continue
|
|
|
129 |
# Include document context
|
130 |
if doc_state.current_doc_images:
|
131 |
context = f"\nDocument context:\n{doc_state.current_doc_text}" if doc_state.current_doc_text else ""
|
132 |
+
current_msg = f"{message}{context}"
|
133 |
messages.append({"role": "user", "content": [{"type": "text", "text": current_msg}, {"type": "image"}]})
|
134 |
else:
|
135 |
+
messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
|
136 |
|
137 |
# Process inputs
|
138 |
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
|
|
|
177 |
gr.Markdown("# Document Analyzer with Chat Support")
|
178 |
gr.Markdown("Upload a PDF or image and chat about its contents. For PDFs, all pages will be processed for visual analysis.")
|
179 |
|
180 |
+
with gr.Row():
|
181 |
+
file_upload = gr.File(
|
182 |
+
label="Upload Document (PDF or Image)",
|
183 |
+
file_types=["pdf", "image"]
|
184 |
+
)
|
185 |
+
upload_status = gr.Textbox(
|
186 |
+
label="Upload Status",
|
187 |
+
interactive=False
|
188 |
+
)
|
189 |
+
|
190 |
+
clear_btn = gr.Button("Clear Document Context")
|
191 |
+
|
192 |
chatbot = gr.ChatInterface(
|
193 |
fn=bot_streaming,
|
194 |
title="Document Chat",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
additional_inputs=[
|
196 |
gr.Slider(
|
197 |
minimum=10,
|
|
|
201 |
label="Maximum number of new tokens to generate",
|
202 |
)
|
203 |
],
|
|
|
204 |
stop_btn="Stop Generation",
|
205 |
+
fill_height=True
|
|
|
206 |
)
|
207 |
|
208 |
+
file_upload.change(
|
209 |
+
fn=process_uploaded_file,
|
210 |
+
inputs=[file_upload],
|
211 |
+
outputs=[upload_status]
|
212 |
+
)
|
213 |
|
214 |
+
clear_btn.click(
|
215 |
+
fn=clear_context,
|
216 |
+
outputs=[upload_status]
|
217 |
+
)
|
218 |
|
219 |
# Launch the interface
|
220 |
demo.launch(debug=True)
|