Multimodal-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 6

Commit

7342b9f

verified ·

1 Parent(s): c373e16

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -3

app.py CHANGED Viewed

@@ -14,6 +14,23 @@ from PIL import Image
 import requests
 from io import BytesIO
 QV_MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
 qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -42,7 +59,7 @@ def model_inference(input_dict, history):
         else:
             # For simplicity, use the first provided image.
             image = load_image(files[0])
-            yield " > Processing with Aya-Vision ███████▒▒▒ 69%"
             messages = [{
                 "role": "user",
                 "content": [
@@ -121,7 +138,7 @@ def model_inference(input_dict, history):
     thread.start()
     buffer = ""
-    yield " > Processing with Qwen2VL Ocr ███████▒▒▒ 69%"
     for new_text in streamer:
         buffer += new_text
         buffer = buffer.replace("<|im_end|>", "")
@@ -145,7 +162,12 @@ demo = gr.ChatInterface(
     fn=model_inference,
     description="# **Multimodal OCR `@aya-vision 'prompt..'`**",
     examples=examples,
-    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="By default, it runs Qwen2VL OCR, Tag @aya-vision for Aya Vision 8B"),
     stop_btn="Stop Generation",
     multimodal=True,
     cache_examples=False,

 import requests
 from io import BytesIO
+# Helper function to return a progress bar HTML snippet.
+def progress_bar_html(label: str) -> str:
+    return f'''
+<div style="display: flex; align-items: center;">
+    <span style="margin-right: 10px; font-size: 14px;">{label}</span>
+    <div style="width: 110px; height: 5px; background-color: #f0f0f0; border-radius: 2px; overflow: hidden;">
+        <div style="width: 100%; height: 100%; background-color: #00ff3a; animation: loading 1.5s linear infinite;"></div>
+    </div>
+</div>
+<style>
+@keyframes loading {{
+    0% {{ transform: translateX(-100%); }}
+    100% {{ transform: translateX(100%); }}
+}}
+</style>
+    '''
 QV_MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
 qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
         else:
             # For simplicity, use the first provided image.
             image = load_image(files[0])
+            yield progress_bar_html("Processing with Aya-Vision")
             messages = [{
                 "role": "user",
                 "content": [
     thread.start()
     buffer = ""
+    yield progress_bar_html("Processing with Qwen2VL OCR")
     for new_text in streamer:
         buffer += new_text
         buffer = buffer.replace("<|im_end|>", "")
     fn=model_inference,
     description="# **Multimodal OCR `@aya-vision 'prompt..'`**",
     examples=examples,
+    textbox=gr.MultimodalTextbox(
+        label="Query Input",
+        file_types=["image"],
+        file_count="multiple",
+        placeholder="By default, it runs Qwen2VL OCR, Tag @aya-vision for Aya Vision 8B"
+    ),
     stop_btn="Stop Generation",
     multimodal=True,
     cache_examples=False,