Spaces:

prithivMLmods
/

FLUX-LoRA-DLC2

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 12

Commit

a3c0180

verified ·

1 Parent(s): 51559fa

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -52

app.py CHANGED Viewed

@@ -33,9 +33,8 @@ MAX_SEED = np.iinfo(np.int32).max
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# -----------------------
 # PROGRESS BAR HELPER
-# -----------------------
 def progress_bar_html(label: str) -> str:
     """
     Returns an HTML snippet for a thin progress bar with a label.
@@ -56,9 +55,8 @@ def progress_bar_html(label: str) -> str:
 </style>
     '''
-# -----------------------
 # TEXT & TTS MODELS
-# -----------------------
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
@@ -73,9 +71,8 @@ TTS_VOICES = [
     "en-US-GuyNeural",    # @tts2
 ]
-# -----------------------
 # MULTIMODAL (OCR) MODELS
-# -----------------------
 MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
 model_m = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -84,15 +81,6 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
-# -----------------------
-# GEMMA3-4B MODEL SETUP (NEW FEATURE)
-# -----------------------
-gemma3_model_id = "google/gemma-3-4b-it"
-gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
-    gemma3_model_id, device_map="auto"
-).eval()
-gemma3_processor = AutoProcessor.from_pretrained(gemma3_model_id)
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
@@ -130,9 +118,9 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
 dtype = torch.float16 if device.type == "cuda" else torch.float32
-# -----------------------
 # STABLE DIFFUSION IMAGE GENERATION MODELS
-# -----------------------
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
@@ -218,9 +206,18 @@ def save_image(img: Image.Image) -> str:
     img.save(unique_name)
     return unique_name
-# -----------------------
 # MAIN GENERATION FUNCTION
-# -----------------------
 @spaces.GPU
 def generate(
     input_dict: dict,
@@ -235,8 +232,8 @@ def generate(
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
-    # 1. IMAGE GENERATION COMMANDS (@lightningv5, @lightningv4, @turbov3)
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         lower_text.startswith("@turbov3")):
@@ -288,52 +285,53 @@ def generate(
         yield gr.Image(image_path)
         return
-    # 2. GEMMA3-4B MULTIMODAL GENERATION (NEW FEATURE)
     if lower_text.startswith("@gemma3-4b"):
-        # Remove the flag from the text prompt.
         prompt_clean = re.sub(r"@gemma3-4b", "", text, flags=re.IGNORECASE).strip().strip('"')
-        # Build messages: include a system message and user message.
-        messages = []
-        messages.append({
-            "role": "system",
-            "content": [{"type": "text", "text": "You are a helpful assistant."}]
-        })
-        user_content = []
         if files:
-            # If images are uploaded, load them and add them to the message.
-            images = [load_image(image) for image in files]
-            for img in images:
-                user_content.append({"type": "image", "image": img})
-        # Add the text part.
-        user_content.append({"type": "text", "text": prompt_clean})
-        messages.append({
-            "role": "user",
-            "content": user_content
-        })
-        # Prepare inputs using Gemma3's processor.
         inputs = gemma3_processor.apply_chat_template(
             messages, add_generation_prompt=True, tokenize=True,
             return_dict=True, return_tensors="pt"
         ).to(gemma3_model.device, dtype=torch.bfloat16)
-        input_len = inputs["input_ids"].shape[-1]
-        # Create a text streamer for incremental generation.
-        streamer = TextIteratorStreamer(gemma3_processor, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
         thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
-        yield progress_bar_html("Processing with Gemma3-4B")
         for new_text in streamer:
             buffer += new_text
             yield buffer
-        final_response = buffer
-        yield final_response
         return
-    # 3. TEXT & TTS GENERATION
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
     voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # PROGRESS BAR HELPER
 def progress_bar_html(label: str) -> str:
     """
     Returns an HTML snippet for a thin progress bar with a label.
 </style>
     '''
 # TEXT & TTS MODELS
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     "en-US-GuyNeural",    # @tts2
 ]
 # MULTIMODAL (OCR) MODELS
 MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
 model_m = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
 async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
     communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
 dtype = torch.float16 if device.type == "cuda" else torch.float32
 # STABLE DIFFUSION IMAGE GENERATION MODELS
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
     img.save(unique_name)
     return unique_name
+# GEMMA3-4B MULTIMODAL MODEL
+gemma3_model_id = "google/gemma-3-4b-it"
+gemma3_model = Gemma3ForConditionalGeneration.from_pretrained(
+    gemma3_model_id, device_map="auto"
+).eval()
+gemma3_processor = AutoProcessor.from_pretrained(gemma3_model_id)
 # MAIN GENERATION FUNCTION
 @spaces.GPU
 def generate(
     input_dict: dict,
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
+    # Image Generation Branch (Stable Diffusion models)
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         lower_text.startswith("@turbov3")):
         yield gr.Image(image_path)
         return
+    # GEMMA3-4B Branch for Multimodal/Text Generation with Streaming
     if lower_text.startswith("@gemma3-4b"):
+        # Remove the gemma3 flag from the prompt.
         prompt_clean = re.sub(r"@gemma3-4b", "", text, flags=re.IGNORECASE).strip().strip('"')
         if files:
+            # If image files are provided, load them.
+            images = [load_image(f) for f in files]
+            messages = [{
+                "role": "user",
+                "content": [
+                    *[{"type": "image", "image": image} for image in images],
+                    {"type": "text", "text": prompt_clean},
+                ]
+            }]
+        else:
+            messages = [
+                {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
+                {"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
+            ]
         inputs = gemma3_processor.apply_chat_template(
             messages, add_generation_prompt=True, tokenize=True,
             return_dict=True, return_tensors="pt"
         ).to(gemma3_model.device, dtype=torch.bfloat16)
+        streamer = TextIteratorStreamer(
+            gemma3_processor.tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
+        )
+        generation_kwargs = {
+            **inputs,
+            "streamer": streamer,
+            "max_new_tokens": max_new_tokens,
+            "do_sample": True,
+            "temperature": temperature,
+            "top_p": top_p,
+            "top_k": top_k,
+            "repetition_penalty": repetition_penalty,
+        }
         thread = Thread(target=gemma3_model.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
+        yield progress_bar_html("Processing with Gemma3-4b")
         for new_text in streamer:
             buffer += new_text
+            time.sleep(0.01)
             yield buffer
         return
+    # Otherwise, handle text/chat (and TTS) generation.
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
     voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)