Spaces:

prithivMLmods
/

Multimodal-VLM-v1.0

Running on Zero

prithivMLmods commited on 18 days ago

Commit

59ca834

verified ·

1 Parent(s): 2757a17

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -286,9 +286,9 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
                     markdown_output = gr.Markdown(label="(Result.md)")
             model_choice = gr.Radio(
-                choices=["GLM-4.1V-9B-Thinking", "Camel-Doc-OCR-062825", "Megalodon-OCR-Sync-0713", "DeepEyes-7B-Thinking", "Qwen2.5-VL-3B-Instruct-abliterated"],
                 label="Select Model",
-                value="GLM-4.1V-9B-Thinking"
             )
             gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
             gr.Markdown("> Camel-Doc-OCR-062825 and Megalodon-OCR-Sync-0713 are both fine-tuned versions of the Qwen2.5-VL series focused on document retrieval, content extraction, analysis recognition, and excelling in OCR and visual document analysis tasks for structured and unstructured content—Camel-Doc-OCR-062825 leveraging the Qwen2.5-VL-7B-Instruct as its base, while Megalodon-OCR-Sync-0713 uses Qwen2.5-VL-3B-Instruct and is especially trained on diverse captioning datasets.")

                     markdown_output = gr.Markdown(label="(Result.md)")
             model_choice = gr.Radio(
+                choices=["Camel-Doc-OCR-062825", "GLM-4.1V-9B-Thinking", "Megalodon-OCR-Sync-0713", "DeepEyes-7B-Thinking", "Qwen2.5-VL-3B-Instruct-abliterated"],
                 label="Select Model",
+                value="Camel-Doc-OCR-062825"
             )
             gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
             gr.Markdown("> Camel-Doc-OCR-062825 and Megalodon-OCR-Sync-0713 are both fine-tuned versions of the Qwen2.5-VL series focused on document retrieval, content extraction, analysis recognition, and excelling in OCR and visual document analysis tasks for structured and unstructured content—Camel-Doc-OCR-062825 leveraging the Qwen2.5-VL-7B-Instruct as its base, while Megalodon-OCR-Sync-0713 uses Qwen2.5-VL-3B-Instruct and is especially trained on diverse captioning datasets.")