Spaces:

hanzla
/

PlaygroundGemma3

Running on Zero

App Files Files Community

mjavaid commited on about 1 month ago

Commit

a3b555a

1 Parent(s): 199e7c3

first commit

Browse files

Files changed (1) hide show

app.py +43 -22

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ import os
 hf_token = os.environ["HF_TOKEN"]
-# Load the Gemma 3 pipeline
 pipe = pipeline(
-    "image-text-to-text",
     model="google/gemma-3-4b-it",
     device="cuda",
     torch_dtype=torch.bfloat16,
@@ -16,10 +16,7 @@ pipe = pipeline(
 )
 @spaces.GPU
-def generate_response(user_text, user_image):
-    if user_image is None:
-        return "Please upload an image (required)"
     messages = [
         {
             "role": "system",
@@ -27,36 +24,60 @@ def generate_response(user_text, user_image):
         }
     ]
-    user_content = [{"type": "image", "image": user_image}]
-    if user_text:
-        user_content.append({"type": "text", "text": user_text})
     messages.append({"role": "user", "content": user_content})
-    # Call the pipeline with the provided messages
     output = pipe(text=messages, max_new_tokens=200)
     try:
         response = output[0]["generated_text"][-1]["content"]
-        return response
-    except (KeyError, IndexError, TypeError):
-        return "Error processing the response. Please try again."
 with gr.Blocks() as demo:
-    gr.Markdown("# Gemma 3 Image Analysis")
-    gr.Markdown("Upload an image and optionally add a prompt to get the model's response.")
     with gr.Row():
-        img = gr.Image(type="pil", label="Upload an image (required)")
-        txt = gr.Textbox(label="Your prompt (optional)", placeholder="Describe what you see in this image")
-    output = gr.Textbox(label="Model Response")
-    submit_btn = gr.Button("Submit")
     submit_btn.click(
-        generate_response,
-        inputs=[txt, img],
-        outputs=output
     )
 if __name__ == "__main__":

 hf_token = os.environ["HF_TOKEN"]
+# Load the Gemma 3 pipeline - use the multimodal version for all cases
 pipe = pipeline(
+    "image-text-to-text",  # This pipeline can handle both text-only and text+image
     model="google/gemma-3-4b-it",
     device="cuda",
     torch_dtype=torch.bfloat16,
 )
 @spaces.GPU
+def get_response(message, chat_history, image=None):
     messages = [
         {
             "role": "system",
         }
     ]
+    user_content = []
+    # Only add image if provided
+    if image is not None:
+        user_content.append({"type": "image", "image": image})
+    # Always add the text message
+    if message:
+        user_content.append({"type": "text", "text": message})
     messages.append({"role": "user", "content": user_content})
+    # Call the pipeline
     output = pipe(text=messages, max_new_tokens=200)
     try:
         response = output[0]["generated_text"][-1]["content"]
+        chat_history.append((message, response))
+    except (KeyError, IndexError, TypeError) as e:
+        error_message = f"Error processing the response: {str(e)}"
+        chat_history.append((message, error_message))
+    return "", chat_history
 with gr.Blocks() as demo:
+    gr.Markdown("# Gemma 3 Chat Interface")
+    gr.Markdown("Chat with Gemma 3 with optional image upload capability")
+    chatbot = gr.Chatbot()
     with gr.Row():
+        msg = gr.Textbox(
+            show_label=False,
+            placeholder="Type your message here...",
+            scale=4
+        )
+        img = gr.Image(
+            type="pil",
+            label="Upload image (optional)",
+            scale=1
+        )
+    submit_btn = gr.Button("Send")
     submit_btn.click(
+        get_response,
+        inputs=[msg, chatbot, img],
+        outputs=[msg, chatbot]
+    )
+    msg.submit(
+        get_response,
+        inputs=[msg, chatbot, img],
+        outputs=[msg, chatbot]
     )
 if __name__ == "__main__":