prithivMLmods commited on
Commit
27577e4
·
verified ·
1 Parent(s): 8e118cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -282,7 +282,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
282
  with gr.Column(elem_classes="canvas-output"):
283
  gr.Markdown("## Output")
284
  output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2)
285
-
286
  with gr.Accordion("(Result.md)", open=False):
287
  markdown_output = gr.Markdown(label="(Result.md)")
288
 
@@ -291,9 +290,11 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
291
  label="Select Model",
292
  value="GLM-4.1V-9B-Thinking"
293
  )
294
-
295
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
296
-
 
 
 
297
  # Define the submit button actions
298
  image_submit.click(fn=generate_image,
299
  inputs=[
 
282
  with gr.Column(elem_classes="canvas-output"):
283
  gr.Markdown("## Output")
284
  output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2)
 
285
  with gr.Accordion("(Result.md)", open=False):
286
  markdown_output = gr.Markdown(label="(Result.md)")
287
 
 
290
  label="Select Model",
291
  value="GLM-4.1V-9B-Thinking"
292
  )
 
293
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
294
+ gr.Markdown("> Camel-Doc-OCR-062825 and Megalodon-OCR-Sync-0713 are both fine-tuned versions of the Qwen2.5-VL series focused on document retrieval, content extraction, analysis recognition, and excelling in OCR and visual document analysis tasks for structured and unstructured content—Camel-Doc-OCR-062825 leveraging the Qwen2.5-VL-7B-Instruct as its base, while Megalodon-OCR-Sync-0713 uses Qwen2.5-VL-3B-Instruct and is especially trained on diverse captioning datasets. ")
295
+ gr.Markdown("GLM-4.1V-9B-Thinking is a vision-language model (VLM) based on the GLM-4-9B-0414 foundation, with a strong emphasis on advanced reasoning capabilities, chain-of-thought inference, and robust bilingual (Chinese/English) performance on complex multimodal benchmarks.")
296
+ gr.Markdown("DeepEyes-7B stands out for its agentic reinforcement learning approach, focusing on “thinking with images” for better visual reasoning, math problem-solving, and mitigating hallucination using Qwen2.5-VL-7B-Instruct as its foundation. Finally, Qwen2.5-VL-3B-Instruct-abliterated is part of the Qwen2.5-VL family, known for its versatile vision-language understanding and generation, serving as the foundational architecture for several of these fine-tuned vision-language and OCR models.")
297
+
298
  # Define the submit button actions
299
  image_submit.click(fn=generate_image,
300
  inputs=[