Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -301,12 +301,21 @@ demo = gr.ChatInterface(
|
|
301 |
gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
|
302 |
],
|
303 |
examples=[
|
304 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
[{"text": "Explain the Image", "files": ["examples/3.jpg"]}],
|
306 |
[{"text": "Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
|
307 |
[{"text": "Which movie character is this?", "files": ["examples/9999.jpg"]}],
|
308 |
["Explain Critical Temperature of Substance"],
|
309 |
-
[{"text": "Transcription of the letter", "files": ["examples/222.png"]}],
|
310 |
[{"text": "Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
|
311 |
[{"text": "Describe the video", "files": ["examples/Missing.mp4"]}],
|
312 |
[{"text": "Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
|
@@ -317,7 +326,7 @@ demo = gr.ChatInterface(
|
|
317 |
],
|
318 |
cache_examples=False,
|
319 |
type="messages",
|
320 |
-
description="# **Gemma 3 Multimodal
|
321 |
fill_height=True,
|
322 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="Tag with @qwen2-vl for Qwen2-VL inference if needed."),
|
323 |
stop_btn="Stop Generation",
|
|
|
301 |
gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
|
302 |
],
|
303 |
examples=[
|
304 |
+
[
|
305 |
+
{
|
306 |
+
"text": "Create a short story based on the images.",
|
307 |
+
"files": [
|
308 |
+
"examples/1111.jpg",
|
309 |
+
"examples/2222.jpg",
|
310 |
+
"examples/3333.jpg",
|
311 |
+
],
|
312 |
+
}
|
313 |
+
],
|
314 |
[{"text": "Explain the Image", "files": ["examples/3.jpg"]}],
|
315 |
[{"text": "Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
|
316 |
[{"text": "Which movie character is this?", "files": ["examples/9999.jpg"]}],
|
317 |
["Explain Critical Temperature of Substance"],
|
318 |
+
[{"text": "@qwen2-vl Transcription of the letter", "files": ["examples/222.png"]}],
|
319 |
[{"text": "Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
|
320 |
[{"text": "Describe the video", "files": ["examples/Missing.mp4"]}],
|
321 |
[{"text": "Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
|
|
|
326 |
],
|
327 |
cache_examples=False,
|
328 |
type="messages",
|
329 |
+
description="# **Gemma 3 Multimodal \n`Use @qwen2-vl to switch to Qwen2-VL OCR for image inference and `@video-infer` for video input.`**",
|
330 |
fill_height=True,
|
331 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="Tag with @qwen2-vl for Qwen2-VL inference if needed."),
|
332 |
stop_btn="Stop Generation",
|