vlm_comparer / app.py
sflindrs's picture
Create app.py
0fb4283 verified
raw
history blame
3.53 kB
import gradio as gr
from transformers import pipeline
import os
import spaces
# Define some pre-populated vision models.
PREDEFINED_MODELS = {
"ViT Base (google/vit-base-patch16-224)": "google/vit-base-patch16-224",
"DeiT Base (facebook/deit-base-distilled-patch16-224)": "facebook/deit-base-distilled-patch16-224",
"CLIP ViT Base (openai/clip-vit-base-patch32)": "openai/clip-vit-base-patch32"
}
@spaces.GPU
def compare_vision_models(image, model1_choice, model1_custom, model2_choice, model2_custom):
"""
For each model selection, use the pre-defined model identifier unless the user selects "Custom" and enters an identifier.
Then create an image-classification pipeline for each model and run inference on the provided image.
"""
# Determine the model names to use:
model1_name = (
PREDEFINED_MODELS.get(model1_choice, model1_custom)
if model1_choice != "Custom" else model1_custom
)
model2_name = (
PREDEFINED_MODELS.get(model2_choice, model2_custom)
if model2_choice != "Custom" else model2_custom
)
# Optionally, if you deploy on a GPU-enabled space (e.g. using ZeroGPU), you can set device=0.
# Here, we check an environment variable "USE_GPU" (set it to "1" in your Space's settings if needed).
device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1
# Create pipelines. In this example we assume the models support image classification.
classifier1 = pipeline("image-classification", model=model1_name, device=device)
classifier2 = pipeline("image-classification", model=model2_name, device=device)
# Run inference
preds1 = classifier1(image)
preds2 = classifier2(image)
# Format the predictions as text (each line shows the predicted label and its confidence score)
result1 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds1])
result2 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds2])
return result1, result2
# Build the Gradio interface using Blocks.
with gr.Blocks(title="Vision Model Comparison Tool") as demo:
gr.Markdown("## Vision Model Comparison Tool\nSelect two Hugging Face vision models to compare their outputs side-by-side!")
with gr.Row():
with gr.Column():
gr.Markdown("### Model 1")
model1_choice = gr.Dropdown(
choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
label="Select a pre-defined model or 'Custom'"
)
model1_custom = gr.Textbox(
label="Custom Hugging Face Model",
placeholder="e.g., username/model_name"
)
with gr.Column():
gr.Markdown("### Model 2")
model2_choice = gr.Dropdown(
choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
label="Select a pre-defined model or 'Custom'"
)
model2_custom = gr.Textbox(
label="Custom Hugging Face Model",
placeholder="e.g., username/model_name"
)
image_input = gr.Image(label="Input Image", type="pil")
compare_btn = gr.Button("Compare Models")
with gr.Row():
output1 = gr.Textbox(label="Model 1 Output")
output2 = gr.Textbox(label="Model 2 Output")
compare_btn.click(
fn=compare_vision_models,
inputs=[image_input, model1_choice, model1_custom, model2_choice, model2_custom],
outputs=[output1, output2]
)
demo.launch()