Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,9 +15,11 @@ from transformers import (
|
|
| 15 |
from transformers import Qwen2_5_VLForConditionalGeneration
|
| 16 |
|
| 17 |
# Helper Functions
|
|
|
|
| 18 |
def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
|
| 19 |
"""
|
| 20 |
Returns an HTML snippet for a thin animated progress bar with a label.
|
|
|
|
| 21 |
"""
|
| 22 |
return f'''
|
| 23 |
<div style="display: flex; align-items: center;">
|
|
@@ -34,6 +36,7 @@ def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_colo
|
|
| 34 |
</style>
|
| 35 |
'''
|
| 36 |
|
|
|
|
| 37 |
def downsample_video(video_path):
|
| 38 |
"""
|
| 39 |
Downsamples a video file by extracting 25 evenly spaced frames.
|
|
@@ -78,7 +81,7 @@ rolmocr_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
| 78 |
# Main Inference Function
|
| 79 |
@spaces.GPU
|
| 80 |
def model_inference(input_dict, history, use_rolmocr=False):
|
| 81 |
-
text = input_dict
|
| 82 |
files = input_dict.get("files", [])
|
| 83 |
|
| 84 |
if not text and not files:
|
|
@@ -133,25 +136,25 @@ def model_inference(input_dict, history, use_rolmocr=False):
|
|
| 133 |
thread.start()
|
| 134 |
|
| 135 |
buffer = ""
|
|
|
|
| 136 |
yield progress_bar_html(f"Processing with {model_name}")
|
| 137 |
|
| 138 |
-
# Stream
|
| 139 |
for new_text in streamer:
|
| 140 |
buffer += new_text
|
| 141 |
buffer = buffer.replace("<|im_end|>", "")
|
| 142 |
time.sleep(0.01)
|
| 143 |
yield buffer
|
| 144 |
|
| 145 |
-
# Ensure generation
|
| 146 |
thread.join()
|
| 147 |
|
| 148 |
-
#
|
| 149 |
try:
|
| 150 |
with open("response.txt", "w", encoding="utf-8") as f:
|
| 151 |
-
f.write(buffer
|
| 152 |
except Exception as e:
|
| 153 |
-
|
| 154 |
-
yield f"Warning: could not write response to file: {e}"
|
| 155 |
|
| 156 |
# Gradio Interface
|
| 157 |
examples = [
|
|
@@ -160,9 +163,10 @@ examples = [
|
|
| 160 |
[{"text": "Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
|
| 161 |
]
|
| 162 |
|
|
|
|
| 163 |
demo = gr.ChatInterface(
|
| 164 |
fn=model_inference,
|
| 165 |
-
description="# **Multimodal OCR
|
| 166 |
examples=examples,
|
| 167 |
textbox=gr.MultimodalTextbox(
|
| 168 |
label="Query Input",
|
|
@@ -176,5 +180,4 @@ demo = gr.ChatInterface(
|
|
| 176 |
additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
|
| 177 |
)
|
| 178 |
|
| 179 |
-
|
| 180 |
-
demo.launch(debug=True)
|
|
|
|
| 15 |
from transformers import Qwen2_5_VLForConditionalGeneration
|
| 16 |
|
| 17 |
# Helper Functions
|
| 18 |
+
|
| 19 |
def progress_bar_html(label: str, primary_color: str = "#4B0082", secondary_color: str = "#9370DB") -> str:
|
| 20 |
"""
|
| 21 |
Returns an HTML snippet for a thin animated progress bar with a label.
|
| 22 |
+
Colors can be customized; default colors are used for Qwen2VL/Aya‑Vision.
|
| 23 |
"""
|
| 24 |
return f'''
|
| 25 |
<div style="display: flex; align-items: center;">
|
|
|
|
| 36 |
</style>
|
| 37 |
'''
|
| 38 |
|
| 39 |
+
|
| 40 |
def downsample_video(video_path):
|
| 41 |
"""
|
| 42 |
Downsamples a video file by extracting 25 evenly spaced frames.
|
|
|
|
| 81 |
# Main Inference Function
|
| 82 |
@spaces.GPU
|
| 83 |
def model_inference(input_dict, history, use_rolmocr=False):
|
| 84 |
+
text = input_dict.get("text", "").strip()
|
| 85 |
files = input_dict.get("files", [])
|
| 86 |
|
| 87 |
if not text and not files:
|
|
|
|
| 136 |
thread.start()
|
| 137 |
|
| 138 |
buffer = ""
|
| 139 |
+
# Send initial progress bar
|
| 140 |
yield progress_bar_html(f"Processing with {model_name}")
|
| 141 |
|
| 142 |
+
# Stream generation
|
| 143 |
for new_text in streamer:
|
| 144 |
buffer += new_text
|
| 145 |
buffer = buffer.replace("<|im_end|>", "")
|
| 146 |
time.sleep(0.01)
|
| 147 |
yield buffer
|
| 148 |
|
| 149 |
+
# Ensure generation is complete
|
| 150 |
thread.join()
|
| 151 |
|
| 152 |
+
# Save the full response to response.txt
|
| 153 |
try:
|
| 154 |
with open("response.txt", "w", encoding="utf-8") as f:
|
| 155 |
+
f.write(buffer)
|
| 156 |
except Exception as e:
|
| 157 |
+
yield f"Error saving response: {e}"
|
|
|
|
| 158 |
|
| 159 |
# Gradio Interface
|
| 160 |
examples = [
|
|
|
|
| 163 |
[{"text": "Extract as JSON table from the table", "files": ["examples/4.jpg"]}],
|
| 164 |
]
|
| 165 |
|
| 166 |
+
|
| 167 |
demo = gr.ChatInterface(
|
| 168 |
fn=model_inference,
|
| 169 |
+
description="# **Multimodal OCR `@RolmOCR and Default Qwen2VL OCR`**",
|
| 170 |
examples=examples,
|
| 171 |
textbox=gr.MultimodalTextbox(
|
| 172 |
label="Query Input",
|
|
|
|
| 180 |
additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
|
| 181 |
)
|
| 182 |
|
| 183 |
+
demo.launch(debug=True)
|
|
|