sflindrs commited on
Commit
c9d256e
·
verified ·
1 Parent(s): 17af200

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -69
app.py CHANGED
@@ -1,83 +1,118 @@
 
1
  import gradio as gr
2
  from transformers import pipeline
3
- import os
4
- import spaces
5
 
6
- # Define some pre-populated vision models.
7
- PREDEFINED_MODELS = {
8
- "ViT Base (google/vit-base-patch16-224)": "google/vit-base-patch16-224",
9
- "DeiT Base (facebook/deit-base-distilled-patch16-224)": "facebook/deit-base-distilled-patch16-224",
10
- "CLIP ViT Base (openai/clip-vit-base-patch32)": "openai/clip-vit-base-patch32"
11
- }
 
 
 
 
 
 
 
12
 
13
- @spaces.GPU
14
- def compare_vision_models(image, model1_choice, model1_custom, model2_choice, model2_custom):
15
- """
16
- For each model selection, use the pre-defined model identifier unless the user selects "Custom" and enters an identifier.
17
- Then create an image-classification pipeline for each model and run inference on the provided image.
18
- """
19
- # Determine the model names to use:
20
- model1_name = (
21
- PREDEFINED_MODELS.get(model1_choice, model1_custom)
22
- if model1_choice != "Custom" else model1_custom
23
- )
24
- model2_name = (
25
- PREDEFINED_MODELS.get(model2_choice, model2_custom)
26
- if model2_choice != "Custom" else model2_custom
27
- )
28
-
29
- # Optionally, if you deploy on a GPU-enabled space (e.g. using ZeroGPU), you can set device=0.
30
- # Here, we check an environment variable "USE_GPU" (set it to "1" in your Space's settings if needed).
31
  device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
32
 
33
- # Create pipelines. In this example we assume the models support image classification.
34
- classifier1 = pipeline("image-classification", model=model1_name, device=device)
35
- classifier2 = pipeline("image-classification", model=model2_name, device=device)
 
 
36
 
37
- # Run inference
38
- preds1 = classifier1(image)
39
- preds2 = classifier2(image)
 
40
 
41
- # Format the predictions as text (each line shows the predicted label and its confidence score)
42
- result1 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds1])
43
- result2 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds2])
 
 
 
 
44
 
45
- return result1, result2
 
 
 
 
 
 
 
46
 
47
- # Build the Gradio interface using Blocks.
48
- with gr.Blocks(title="Vision Model Comparison Tool") as demo:
49
- gr.Markdown("## Vision Model Comparison Tool\nSelect two Hugging Face vision models to compare their outputs side-by-side!")
 
 
 
 
 
 
 
 
 
 
50
  with gr.Row():
51
- with gr.Column():
52
- gr.Markdown("### Model 1")
53
- model1_choice = gr.Dropdown(
54
- choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
55
- label="Select a pre-defined model or 'Custom'"
56
- )
57
- model1_custom = gr.Textbox(
58
- label="Custom Hugging Face Model",
59
- placeholder="e.g., username/model_name"
60
- )
61
- with gr.Column():
62
- gr.Markdown("### Model 2")
63
- model2_choice = gr.Dropdown(
64
- choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
65
- label="Select a pre-defined model or 'Custom'"
66
- )
67
- model2_custom = gr.Textbox(
68
- label="Custom Hugging Face Model",
69
- placeholder="e.g., username/model_name"
70
- )
71
- image_input = gr.Image(label="Input Image", type="pil")
72
- compare_btn = gr.Button("Compare Models")
 
 
 
 
 
73
  with gr.Row():
74
- output1 = gr.Textbox(label="Model 1 Output")
75
- output2 = gr.Textbox(label="Model 2 Output")
76
-
77
- compare_btn.click(
78
- fn=compare_vision_models,
79
- inputs=[image_input, model1_choice, model1_custom, model2_choice, model2_custom],
80
- outputs=[output1, output2]
81
  )
82
 
83
- demo.launch()
 
1
+ import os
2
  import gradio as gr
3
  from transformers import pipeline
4
+ import spaces # This module is available when deploying on HF Spaces with ZeroGPU
 
5
 
6
+ # --- Trending models for image text-to-text tasks ---
7
+ TRENDING_MODELS = [
8
+ "Salesforce/blip2-opt-2.7b",
9
+ "Salesforce/blip2-flan-t5-xl",
10
+ "Salesforce/blip-image-captioning-base",
11
+ "Salesforce/blip-image-captioning-large",
12
+ "nlpconnect/vit-gpt2-image-captioning",
13
+ "OFA-Sys/OFA-base",
14
+ "OFA-Sys/OFA-large",
15
+ "dandelin/vilt-b32-finetuned-vqa",
16
+ "dandelin/vilt-b32-mlm",
17
+ "uclanlp/visualbert-vqa-coco-pre"
18
+ ]
19
 
20
+ # --- Helper: if the user selects "Custom", then they can enter any model identifier ---
21
+ def resolve_model(chosen, custom):
22
+ if chosen == "Custom":
23
+ return custom.strip()
24
+ else:
25
+ return chosen
26
+
27
+ # --- Main inference function ---
28
+ # If you are using ZeroGPU on Hugging Face Spaces, make sure to set the environment variable USE_GPU=1.
29
+ # The @spaces.GPU() decorator ensures that heavy inference runs on GPU in a ZeroGPU Space.
30
+ @spaces.GPU()
31
+ def compare_image_to_text_models(image, prompt, model1_choice, model1_custom, model2_choice, model2_custom):
32
+ # Determine which model identifiers to use.
33
+ model1_name = resolve_model(model1_choice, model1_custom)
34
+ model2_name = resolve_model(model2_choice, model2_custom)
35
+
36
+ # Set device to GPU (0) if USE_GPU is enabled; otherwise use CPU (-1)
 
37
  device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
38
 
39
+ # Create pipelines for image-to-text.
40
+ # Note: Many instruction-following image models (e.g. BLIP2) accept a text prompt along with an image.
41
+ # We use the "image-to-text" task here so that the prompt is taken into account.
42
+ pipe1 = pipeline("image-to-text", model=model1_name, device=device)
43
+ pipe2 = pipeline("image-to-text", model=model2_name, device=device)
44
 
45
+ # Run inference on the image with the provided prompt.
46
+ # Depending on the model, the call signature may vary; here we assume a simple call with (image, prompt).
47
+ output1 = pipe1(image, prompt)
48
+ output2 = pipe2(image, prompt)
49
 
50
+ # Extract the generated text.
51
+ # (Many pipelines return a list of dicts with key 'generated_text'; if not, we simply convert the output to a string.)
52
+ def extract_text(output):
53
+ if isinstance(output, list) and len(output) > 0 and isinstance(output[0], dict) and "generated_text" in output[0]:
54
+ return output[0]["generated_text"]
55
+ else:
56
+ return str(output)
57
 
58
+ result1 = extract_text(output1)
59
+ result2 = extract_text(output2)
60
+
61
+ # Format results as chat conversations.
62
+ # Each chatbot conversation is a list of (speaker, message) tuples.
63
+ chat1 = [("User", prompt), ("Bot", result1)]
64
+ chat2 = [("User", prompt), ("Bot", result2)]
65
+ return chat1, chat2
66
 
67
+ # --- Build the Gradio interface ---
68
+ # Pre-populated sample prompt.
69
+ sample_prompt = "Describe the image in explicit detail. Return a nested JSON object in response."
70
+
71
+ with gr.Blocks(title="Image Text-to-Text Comparison Tool") as demo:
72
+ gr.Markdown(
73
+ """
74
+ # Image Text-to-Text Comparison Tool
75
+ Compare two trending image text-to-text (instruction-following) models side-by-side.
76
+ Select a model from the dropdown (or choose Custom to enter your own model identifier) and see how it describes the image.
77
+ """
78
+ )
79
+
80
  with gr.Row():
81
+ with gr.Column(scale=1):
82
+ gr.Markdown("## Input")
83
+ image_input = gr.Image(label="Upload an Image", type="pil")
84
+ prompt_input = gr.Textbox(label="Text Prompt", value=sample_prompt, lines=3)
85
+ with gr.Column(scale=1):
86
+ gr.Markdown("## Model Selection")
87
+ with gr.Row():
88
+ with gr.Column():
89
+ gr.Markdown("### Model 1")
90
+ model1_choice = gr.Dropdown(
91
+ choices=TRENDING_MODELS + ["Custom"],
92
+ value=TRENDING_MODELS[0],
93
+ label="Select Model 1"
94
+ )
95
+ model1_custom = gr.Textbox(label="Custom Model 1", placeholder="e.g., username/model_name")
96
+ with gr.Column():
97
+ gr.Markdown("### Model 2")
98
+ model2_choice = gr.Dropdown(
99
+ choices=TRENDING_MODELS + ["Custom"],
100
+ value=TRENDING_MODELS[1],
101
+ label="Select Model 2"
102
+ )
103
+ model2_custom = gr.Textbox(label="Custom Model 2", placeholder="e.g., username/model_name")
104
+
105
+ compare_button = gr.Button("Compare Models")
106
+
107
+ gr.Markdown("## Chatbot Outputs (Side-by-Side)")
108
  with gr.Row():
109
+ chatbot1 = gr.Chatbot(label="Model 1 Chatbot")
110
+ chatbot2 = gr.Chatbot(label="Model 2 Chatbot")
111
+
112
+ compare_button.click(
113
+ fn=compare_image_to_text_models,
114
+ inputs=[image_input, prompt_input, model1_choice, model1_custom, model2_choice, model2_custom],
115
+ outputs=[chatbot1, chatbot2]
116
  )
117
 
118
+ demo.launch()