Spaces:

prithivMLmods
/

Multimodal-VLM-v1.0

Running on Zero

App Files Files Community

prithivMLmods commited on Jun 29

Commit

118fb8b

verified ·

1 Parent(s): 1cce2c2

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -15

app.py CHANGED Viewed

@@ -172,6 +172,10 @@ def downsample_video(video_path):
     vidcap.release()
     return frames
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,
                    max_new_tokens: int = 1024,
@@ -296,7 +300,7 @@ def generate_video(model_name: str, text: str, video_path: str,
         time.sleep(0.01)
         yield buffer, buffer
-# Define examples for image, video, and object detection inference
 image_examples = [
     ["convert this page to doc [text] precisely for markdown.", "images/1.png"],
     ["convert this page to doc [table] precisely for markdown.", "images/2.png"],
@@ -309,11 +313,6 @@ video_examples = [
     ["explain the video in detail.", "videos/2.mp4"]
 ]
-object_detection_examples = [
-    ["object/1.png", "detect red and yellow cars."],
-    ["object/2.png", "detect the white cat."]
-]
 # Added CSS to style the output area as a "Canvas"
 css = """
 .submit-btn {
@@ -357,21 +356,13 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
                         with gr.Column():
                             input_img = gr.Image(label="Input Image", type="pil")
                             system_prompt = gr.Textbox(label="System Prompt", value=default_system_prompt, visible=False)
-                            text_input = gr.Textbox(label="Query Input")
                             submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
                         with gr.Column():
                             model_output_text = gr.Textbox(label="Model Output Text")
                             parsed_boxes = gr.Textbox(label="Parsed Boxes")
                             annotated_image = gr.Image(label="Annotated Image")
-                    gr.Examples(
-                        examples=object_detection_examples,
-                        inputs=[input_img, text_input],
-                        outputs=[model_output_text, parsed_boxes, annotated_image],
-                        fn=run_example,
-                        cache_examples=True,
-                    )
                     submit_btn.click(
                         fn=run_example,
                         inputs=[input_img, text_input, system_prompt],

     vidcap.release()
     return frames
+@spaces.GPU'system'gr.Examples(
+        examples=image_examples,
+        inputs=[image_query, image_upload]
+    )
 @spaces.GPU
 def generate_image(model_name: str, text: str, image: Image.Image,
                    max_new_tokens: int = 1024,
         time.sleep(0.01)
         yield buffer, buffer
+# Define examples for image and video inference
 image_examples = [
     ["convert this page to doc [text] precisely for markdown.", "images/1.png"],
     ["convert this page to doc [table] precisely for markdown.", "images/2.png"],
     ["explain the video in detail.", "videos/2.mp4"]
 ]
 # Added CSS to style the output area as a "Canvas"
 css = """
 .submit-btn {
                         with gr.Column():
                             input_img = gr.Image(label="Input Image", type="pil")
                             system_prompt = gr.Textbox(label="System Prompt", value=default_system_prompt, visible=False)
+                            text_input = gr.Textbox(label="Query Input", placeholder="Detect 'humans'")
                             submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
                         with gr.Column():
                             model_output_text = gr.Textbox(label="Model Output Text")
                             parsed_boxes = gr.Textbox(label="Parsed Boxes")
                             annotated_image = gr.Image(label="Annotated Image")
                     submit_btn.click(
                         fn=run_example,
                         inputs=[input_img, text_input, system_prompt],