prithivMLmods commited on
Commit
a90631f
·
verified ·
1 Parent(s): ba4faf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -53,7 +53,7 @@ def progress_bar_html(label: str) -> str:
53
 
54
  # TEXT & TTS MODELS
55
 
56
- model_id = "prithivMLmods/FastThink-0.5B-Tiny"
57
  tokenizer = AutoTokenizer.from_pretrained(model_id)
58
  model = AutoModelForCausalLM.from_pretrained(
59
  model_id,
@@ -153,7 +153,7 @@ def save_image(img: Image.Image) -> str:
153
 
154
  # GEMMA3-4B MULTIMODAL MODEL
155
 
156
- gemma3_model_id = "google/gemma-3-12b-it" #alter google/gemma-3-4b-it
157
  gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
158
  gemma3_model_id, device_map="auto"
159
  ).eval()
@@ -233,13 +233,13 @@ def generate(
233
  return
234
 
235
  # GEMMA3-4B TEXT & MULTIMODAL (image) Branch
236
- if lower_text.startswith("@gemma3-12b"):
237
  # If it is video, let the dedicated branch handle it.
238
  if lower_text.startswith("@video-infer"):
239
  pass # video branch is handled below.
240
  else:
241
  # Remove the gemma3 flag from the prompt.
242
- prompt_clean = re.sub(r"@gemma3-12b", "", text, flags=re.IGNORECASE).strip().strip('"')
243
  if files:
244
  # If image files are provided, load them.
245
  images = [load_image(f) for f in files]
@@ -275,7 +275,7 @@ def generate(
275
  thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
276
  thread.start()
277
  buffer = ""
278
- yield progress_bar_html("Processing with Gemma3-12b")
279
  for new_text in streamer:
280
  buffer += new_text
281
  time.sleep(0.01)
@@ -326,7 +326,7 @@ def generate(
326
  thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
327
  thread.start()
328
  buffer = ""
329
- yield progress_bar_html("Processing video with Gemma3-12b")
330
  for new_text in streamer:
331
  buffer += new_text
332
  time.sleep(0.01)
@@ -414,7 +414,7 @@ demo = gr.ChatInterface(
414
  gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
415
  ],
416
  examples=[
417
- [{"text": "@gemma3-12b Explain the Image", "files": ["examples/3.jpg"]}],
418
  [{"text": "@video-infer Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
419
  [{"text": "@video-infer Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
420
  [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
@@ -429,9 +429,9 @@ demo = gr.ChatInterface(
429
  ],
430
  cache_examples=False,
431
  type="messages",
432
- description="# **Gemma 3 `@gemma3-12b, @video-infer for video understanding`**",
433
  fill_height=True,
434
- textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="@gemma3-12b for multimodal, @video-infer for video, @lightningv5 for image gen !"),
435
  stop_btn="Stop Generation",
436
  multimodal=True,
437
  )
 
53
 
54
  # TEXT & TTS MODELS
55
 
56
+ model_id = "google/gemma-3-1b-it" #alter prithivMLmods/FastThink-0.5B-Tiny
57
  tokenizer = AutoTokenizer.from_pretrained(model_id)
58
  model = AutoModelForCausalLM.from_pretrained(
59
  model_id,
 
153
 
154
  # GEMMA3-4B MULTIMODAL MODEL
155
 
156
+ gemma3_model_id = "google/gemma-3-4b-it" #alter google/gemma-3-12b-it
157
  gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
158
  gemma3_model_id, device_map="auto"
159
  ).eval()
 
233
  return
234
 
235
  # GEMMA3-4B TEXT & MULTIMODAL (image) Branch
236
+ if lower_text.startswith("@gemma3"):
237
  # If it is video, let the dedicated branch handle it.
238
  if lower_text.startswith("@video-infer"):
239
  pass # video branch is handled below.
240
  else:
241
  # Remove the gemma3 flag from the prompt.
242
+ prompt_clean = re.sub(r"@gemma3", "", text, flags=re.IGNORECASE).strip().strip('"')
243
  if files:
244
  # If image files are provided, load them.
245
  images = [load_image(f) for f in files]
 
275
  thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
276
  thread.start()
277
  buffer = ""
278
+ yield progress_bar_html("Processing with Gemma3")
279
  for new_text in streamer:
280
  buffer += new_text
281
  time.sleep(0.01)
 
326
  thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
327
  thread.start()
328
  buffer = ""
329
+ yield progress_bar_html("Processing video with Gemma3")
330
  for new_text in streamer:
331
  buffer += new_text
332
  time.sleep(0.01)
 
414
  gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
415
  ],
416
  examples=[
417
+ [{"text": "@gemma3 Explain the Image", "files": ["examples/3.jpg"]}],
418
  [{"text": "@video-infer Explain the content of the Advertisement", "files": ["examples/videoplayback.mp4"]}],
419
  [{"text": "@video-infer Explain the content of the video in detail", "files": ["examples/breakfast.mp4"]}],
420
  [{"text": "@video-infer Describe the video", "files": ["examples/Missing.mp4"]}],
 
429
  ],
430
  cache_examples=False,
431
  type="messages",
432
+ description="# **Gemma 3 `@gemma3, @video-infer for video understanding`**",
433
  fill_height=True,
434
+ textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image", "video"], file_count="multiple", placeholder="@gemma3 for multimodal, @video-infer for video, @lightningv5 for image gen !"),
435
  stop_btn="Stop Generation",
436
  multimodal=True,
437
  )