prithivMLmods commited on
Commit
1d3bbac
·
verified ·
1 Parent(s): d48f554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -31
app.py CHANGED
@@ -231,35 +231,6 @@ css = """
231
  border-radius: 10px;
232
  padding: 20px;
233
  }
234
- .model-choice-reasoning {
235
- background-color: #2ecc71 !important; /* Green for reasoning models */
236
- color: white !important;
237
- padding: 5px 10px;
238
- border-radius: 5px;
239
- }
240
- .model-choice-ocr {
241
- background-color: #3498db !important; /* Blue for OCR models */
242
- color: white !important;
243
- padding: 5px 10px;
244
- border-radius: 5px;
245
- }
246
- """
247
-
248
- # JavaScript to apply classes to radio button labels
249
- js_script = """
250
- <script>
251
- document.addEventListener('DOMContentLoaded', function() {
252
- const labels = document.querySelectorAll('.gr-radio label');
253
- labels.forEach(label => {
254
- const text = label.textContent.trim();
255
- if (text === 'GLM-4.1V-9B-Thinking' || text === 'ViLaSR-7B') {
256
- label.classList.add('model-choice-reasoning');
257
- } else if (text === 'Camel-Doc-OCR-062825' || text === 'Megalodon-OCR-Sync-0713') {
258
- label.classList.add('model-choice-ocr');
259
- }
260
- });
261
- });
262
- </script>
263
  """
264
 
265
  # Create the Gradio Interface
@@ -302,8 +273,9 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
302
  value="Camel-Doc-OCR-062825"
303
  )
304
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-v1.0/discussions)")
305
- gr.HTML(js_script) # Inject JavaScript to apply classes
306
-
 
307
  # Define the submit button actions
308
  image_submit.click(fn=generate_image,
309
  inputs=[
 
231
  border-radius: 10px;
232
  padding: 20px;
233
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  """
235
 
236
  # Create the Gradio Interface
 
273
  value="Camel-Doc-OCR-062825"
274
  )
275
  gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-v1.0/discussions)")
276
+ gr.Markdown("> [Camel-Doc-OCR-062825](https://huggingface.co/prithivMLmods/Camel-Doc-OCR-062825), [GLM-4.1V-9B-Thinking](https://huggingface.co/zai-org/GLM-4.1V-9B-Thinking), [Megalodon-OCR-Sync-0713](https://huggingface.co/prithivMLmods/Megalodon-OCR-Sync-0713), and [ViLaSR-7B](https://huggingface.co/inclusionAI/ViLaSR) are recent vision-language models excelling in document intelligence and multimodal understanding. Camel-Doc-OCR-062825 is a Qwen2.5-VL-7B-Instruct finetune, highly optimized for document retrieval, structured extraction, analysis, and direct Markdown generation from images and PDFs. GLM-4.1V-9B-Thinking offers next-level multimodal reasoning, bringing visual and textual comprehension together for advanced question answering.")
277
+ gr.Markdown("> Megalodon-OCR-Sync-0713, finetuned from Qwen2.5-VL-3B-Instruct, specializes in context-aware multimodal document extraction and analysis, excelling at retrieval, layout parsing, math, and chart/table recognition, with robust video and long-form comprehension capabilities. ViLaSR-7B focuses on reinforcing spatial reasoning in visual-language tasks by combining interwoven thinking with visual drawing, making it especially suited for spatial reasoning and complex tip-based queries.")
278
+
279
  # Define the submit button actions
280
  image_submit.click(fn=generate_image,
281
  inputs=[