prithivMLmods commited on
Commit
62e717d
·
verified ·
1 Parent(s): 736d689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -301,12 +301,21 @@ demo = gr.ChatInterface(
301
  gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
302
  ],
303
  examples=[
304
- [{"text": "Create a short story based on the images.", "files": ["examples/1111.jpg", "examples/2222.jpg", "examples/3333.jpg"]}],
 
 
 
 
 
 
 
 
 
305
  [{"text": "Explain the Image", "files": ["examples/3.jpg"]}],
306
  [{"text": "Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
307
  [{"text": "Which movie character is this?", "files": ["examples/9999.jpg"]}],
308
  ["Explain Critical Temperature of Substance"],
309
- [{"text": "Transcription of the letter", "files": ["examples/222.png"]}],
310
  [{"text": "Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
311
  [{"text": "Describe the video", "files": ["examples/Missing.mp4"]}],
312
  [{"text": "Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
@@ -317,7 +326,7 @@ demo = gr.ChatInterface(
317
  ],
318
  cache_examples=False,
319
  type="messages",
320
- description="# **Gemma 3 Multimodal**",
321
  fill_height=True,
322
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="Tag with @qwen2-vl for Qwen2-VL inference if needed."),
323
  stop_btn="Stop Generation",
 
301
  gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
302
  ],
303
  examples=[
304
+ [
305
+ {
306
+ "text": "Create a short story based on the images.",
307
+ "files": [
308
+ "examples/1111.jpg",
309
+ "examples/2222.jpg",
310
+ "examples/3333.jpg",
311
+ ],
312
+ }
313
+ ],
314
  [{"text": "Explain the Image", "files": ["examples/3.jpg"]}],
315
  [{"text": "Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
316
  [{"text": "Which movie character is this?", "files": ["examples/9999.jpg"]}],
317
  ["Explain Critical Temperature of Substance"],
318
+ [{"text": "@qwen2-vl Transcription of the letter", "files": ["examples/222.png"]}],
319
  [{"text": "Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
320
  [{"text": "Describe the video", "files": ["examples/Missing.mp4"]}],
321
  [{"text": "Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
 
326
  ],
327
  cache_examples=False,
328
  type="messages",
329
+ description="# **Gemma 3 Multimodal \n`Use @qwen2-vl to switch to Qwen2-VL OCR for image inference and `@video-infer` for video input.`**",
330
  fill_height=True,
331
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="Tag with @qwen2-vl for Qwen2-VL inference if needed."),
332
  stop_btn="Stop Generation",