Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -153,7 +153,7 @@ def save_image(img: Image.Image) -> str:
|
|
153 |
|
154 |
# GEMMA3-4B MULTIMODAL MODEL
|
155 |
|
156 |
-
gemma3_model_id = "google/gemma-3-4b-it
|
157 |
gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
|
158 |
gemma3_model_id, device_map="auto"
|
159 |
).eval()
|
@@ -233,13 +233,13 @@ def generate(
|
|
233 |
return
|
234 |
|
235 |
# GEMMA3-4B TEXT & MULTIMODAL (image) Branch
|
236 |
-
if lower_text.startswith("@gemma3-
|
237 |
# If it is video, let the dedicated branch handle it.
|
238 |
if lower_text.startswith("@video-infer"):
|
239 |
pass # video branch is handled below.
|
240 |
else:
|
241 |
# Remove the gemma3 flag from the prompt.
|
242 |
-
prompt_clean = re.sub(r"@gemma3-
|
243 |
if files:
|
244 |
# If image files are provided, load them.
|
245 |
images = [load_image(f) for f in files]
|
@@ -275,7 +275,7 @@ def generate(
|
|
275 |
thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
|
276 |
thread.start()
|
277 |
buffer = ""
|
278 |
-
yield progress_bar_html("Processing with Gemma3-
|
279 |
for new_text in streamer:
|
280 |
buffer += new_text
|
281 |
time.sleep(0.01)
|
@@ -326,7 +326,7 @@ def generate(
|
|
326 |
thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
|
327 |
thread.start()
|
328 |
buffer = ""
|
329 |
-
yield progress_bar_html("Processing video with Gemma3-
|
330 |
for new_text in streamer:
|
331 |
buffer += new_text
|
332 |
time.sleep(0.01)
|
@@ -414,14 +414,14 @@ demo = gr.ChatInterface(
|
|
414 |
gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
|
415 |
],
|
416 |
examples=[
|
417 |
-
[{"text": "@gemma3-
|
418 |
[{"text": "@video-infer Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
|
419 |
[{"text": "@video-infer Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
|
420 |
[{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
|
421 |
[{"text": "@video-infer Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
|
422 |
[{"text": "@video-infer Summarize the events in this video", "files": ["examples/sky.mp4"]}],
|
423 |
[{"text": "@video-infer What is in the video ?", "files": ["examples/redlight.mp4"]}],
|
424 |
-
[{"text": "@gemma3-
|
425 |
['@lightningv5 Chocolate dripping from a donut'],
|
426 |
["Python Program for Array Rotation"],
|
427 |
["@tts1 Who is Nikola Tesla, and why did he die?"],
|
@@ -429,9 +429,9 @@ demo = gr.ChatInterface(
|
|
429 |
],
|
430 |
cache_examples=False,
|
431 |
type="messages",
|
432 |
-
description="# **Gemma 3 `@gemma3-
|
433 |
fill_height=True,
|
434 |
-
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="@gemma3-
|
435 |
stop_btn="Stop Generation",
|
436 |
multimodal=True,
|
437 |
)
|
|
|
153 |
|
154 |
# GEMMA3-4B MULTIMODAL MODEL
|
155 |
|
156 |
+
gemma3_model_id = "google/gemma-3-12b-it" #alter google/gemma-3-4b-it
|
157 |
gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
|
158 |
gemma3_model_id, device_map="auto"
|
159 |
).eval()
|
|
|
233 |
return
|
234 |
|
235 |
# GEMMA3-4B TEXT & MULTIMODAL (image) Branch
|
236 |
+
if lower_text.startswith("@gemma3-12b"):
|
237 |
# If it is video, let the dedicated branch handle it.
|
238 |
if lower_text.startswith("@video-infer"):
|
239 |
pass # video branch is handled below.
|
240 |
else:
|
241 |
# Remove the gemma3 flag from the prompt.
|
242 |
+
prompt_clean = re.sub(r"@gemma3-12b", "", text, flags=re.IGNORECASE).strip().strip('"')
|
243 |
if files:
|
244 |
# If image files are provided, load them.
|
245 |
images = [load_image(f) for f in files]
|
|
|
275 |
thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
|
276 |
thread.start()
|
277 |
buffer = ""
|
278 |
+
yield progress_bar_html("Processing with Gemma3-12b")
|
279 |
for new_text in streamer:
|
280 |
buffer += new_text
|
281 |
time.sleep(0.01)
|
|
|
326 |
thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
|
327 |
thread.start()
|
328 |
buffer = ""
|
329 |
+
yield progress_bar_html("Processing video with Gemma3-12b")
|
330 |
for new_text in streamer:
|
331 |
buffer += new_text
|
332 |
time.sleep(0.01)
|
|
|
414 |
gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
|
415 |
],
|
416 |
examples=[
|
417 |
+
[{"text": "@gemma3-12b Explain the Image", "files": ["examples/3.jpg"]}],
|
418 |
[{"text": "@video-infer Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
|
419 |
[{"text": "@video-infer Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
|
420 |
[{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
|
421 |
[{"text": "@video-infer Explain what is happening in this video ?", "files": ["examples/oreo.mp4"]}],
|
422 |
[{"text": "@video-infer Summarize the events in this video", "files": ["examples/sky.mp4"]}],
|
423 |
[{"text": "@video-infer What is in the video ?", "files": ["examples/redlight.mp4"]}],
|
424 |
+
[{"text": "@gemma3-12b Transcription of the letter", "files": ["examples/222.png"]}],
|
425 |
['@lightningv5 Chocolate dripping from a donut'],
|
426 |
["Python Program for Array Rotation"],
|
427 |
["@tts1 Who is Nikola Tesla, and why did he die?"],
|
|
|
429 |
],
|
430 |
cache_examples=False,
|
431 |
type="messages",
|
432 |
+
description="# **Gemma 3 `@gemma3-12b, @video-infer for video understanding`**",
|
433 |
fill_height=True,
|
434 |
+
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="@gemma3-12b for multimodal, @video-infer for video, @lightningv5 for image gen !"),
|
435 |
stop_btn="Stop Generation",
|
436 |
multimodal=True,
|
437 |
)
|