|
import gradio as gr |
|
import evaluate |
|
|
|
clip_metric = evaluate.load("d-matrix/clip_eval") |
|
print("Successfully loaded CLIP evaluation metric") |
|
|
|
AVAILABLE_MODELS = [ |
|
"openai/clip-vit-base-patch32", |
|
"openai/clip-vit-large-patch14", |
|
"openai/clip-vit-base-patch16", |
|
] |
|
|
|
AVAILABLE_DATASETS = ["mscoco", "flickr"] |
|
|
|
with gr.Blocks(title="CLIP Evaluation") as demo: |
|
gr.Markdown("# CLIP Model Evaluation") |
|
gr.Markdown( |
|
""" |
|
This tool evaluates CLIP models on image-text retrieval tasks using standard datasets. |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
model_input = gr.Dropdown( |
|
choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="CLIP Model" |
|
) |
|
|
|
dataset_input = gr.Dropdown( |
|
choices=AVAILABLE_DATASETS, value="mscoco", label="Dataset" |
|
) |
|
|
|
samples_input = gr.Slider( |
|
minimum=1, maximum=10, value=1, step=1, label="Number of samples" |
|
) |
|
|
|
evaluate_button = gr.Button("Evaluate Model") |
|
|
|
with gr.Column(): |
|
results_output = gr.Markdown("Results will appear here") |
|
|
|
def evaluate_clip(model_name, dataset, num_samples, progress=gr.Progress()): |
|
progress(0, desc="Evaluating CLIP model...") |
|
|
|
results = clip_metric.compute( |
|
model_name=[model_name], |
|
dataset_names=[dataset], |
|
n_examples=[int(num_samples)], |
|
) |
|
|
|
output = f"## CLIP Evaluation Results\n\n" |
|
output += f"**Model:** {model_name}\n" |
|
output += f"**Dataset:** {dataset}\n" |
|
output += f"**Samples:** {num_samples}\n\n" |
|
|
|
output += "**Image Retrieval (Text→Image):**\n" |
|
for k in [1, 5, 10]: |
|
metric_name = f"{dataset}:image_recall@{k}" |
|
if metric_name in results: |
|
output += f"* Recall@{k}: {results[metric_name]:.4f}\n" |
|
|
|
output += "\n**Text Retrieval (Image→Text):**\n" |
|
for k in [1, 5, 10]: |
|
metric_name = f"{dataset}:text_recall@{k}" |
|
if metric_name in results: |
|
output += f"* Recall@{k}: {results[metric_name]:.4f}\n" |
|
|
|
return output |
|
|
|
evaluate_button.click( |
|
fn=evaluate_clip, |
|
inputs=[model_input, dataset_input, samples_input], |
|
outputs=results_output, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|