Spaces:

prithivMLmods
/

FLUX-LoRA-DLC2

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 12

Commit

4bfb84e

verified ·

1 Parent(s): 46ce972

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -2

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ from transformers import (
     TextIteratorStreamer,
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
 )
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
@@ -208,6 +209,15 @@ def save_image(img: Image.Image) -> str:
     img.save(unique_name)
     return unique_name
 # -----------------------
 # MAIN GENERATION FUNCTION
 # -----------------------
@@ -225,7 +235,8 @@ def generate(
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
-    # If the prompt is an image generation command (using model flags)
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         lower_text.startswith("@turbov3")):
@@ -277,6 +288,52 @@ def generate(
         yield gr.Image(image_path)
         return
     # Otherwise, handle text/chat (and TTS) generation.
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
@@ -391,7 +448,7 @@ demo = gr.ChatInterface(
     description=DESCRIPTION,
     css=css,
     fill_height=True,
-    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
     stop_btn="Stop Generation",
     multimodal=True,
 )

     TextIteratorStreamer,
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
+    Gemma3ForConditionalGeneration,  # New import for Gemma3-4B
 )
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
     img.save(unique_name)
     return unique_name
+# -----------------------
+# GEMMA3-4B MULTIMODAL MODEL
+# -----------------------
+gemma3_model_id = "google/gemma-3-4b-it"
+gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
+    gemma3_model_id, device_map="auto"
+).eval()
+gemma3_processor = AutoProcessor.from_pretrained(gemma3_model_id)
 # -----------------------
 # MAIN GENERATION FUNCTION
 # -----------------------
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
+    # Image Generation Branch (Stable Diffusion models)
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         lower_text.startswith("@turbov3")):
         yield gr.Image(image_path)
         return
+    # GEMMA3-4B Branch for Multimodal/Text Generation with Streaming
+    if lower_text.startswith("@gemma3-4b"):
+        # Remove the gemma3 flag from the prompt.
+        prompt_clean = re.sub(r"@gemma3-4b", "", text, flags=re.IGNORECASE).strip().strip('"')
+        if files:
+            # If image files are provided, load them.
+            images = [load_image(f) for f in files]
+            messages = [{
+                "role": "user",
+                "content": [
+                    *[{"type": "image", "image": image} for image in images],
+                    {"type": "text", "text": prompt_clean},
+                ]
+            }]
+        else:
+            messages = [
+                {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
+                {"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
+            ]
+        inputs = gemma3_processor.apply_chat_template(
+            messages, add_generation_prompt=True, tokenize=True,
+            return_dict=True, return_tensors="pt"
+        ).to(gemma3_model.device, dtype=torch.bfloat16)
+        streamer = TextIteratorStreamer(
+            gemma3_processor.tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
+        )
+        generation_kwargs = {
+            **inputs,
+            "streamer": streamer,
+            "max_new_tokens": max_new_tokens,
+            "do_sample": True,
+            "temperature": temperature,
+            "top_p": top_p,
+            "top_k": top_k,
+            "repetition_penalty": repetition_penalty,
+        }
+        thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
+        thread.start()
+        buffer = ""
+        yield progress_bar_html("Processing with Gemma3-4b")
+        for new_text in streamer:
+            buffer += new_text
+            time.sleep(0.01)
+            yield buffer
+        return
     # Otherwise, handle text/chat (and TTS) generation.
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
     description=DESCRIPTION,
     css=css,
     fill_height=True,
+    textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 or @gemma3-4b for multimodal gen !"),
     stop_btn="Stop Generation",
     multimodal=True,
 )