Spaces:

sflindrs
/

vlm_comparer

Running on Zero

App Files Files Community

vlm_comparer / app.py

sflindrs

Create app.py

0fb4283 verified 11 days ago

raw

history blame

3.53 kB

	import gradio as gr
	from transformers import pipeline
	import os
	import spaces

	# Define some pre-populated vision models.
	PREDEFINED_MODELS = {
	"ViT Base (google/vit-base-patch16-224)": "google/vit-base-patch16-224",
	"DeiT Base (facebook/deit-base-distilled-patch16-224)": "facebook/deit-base-distilled-patch16-224",
	"CLIP ViT Base (openai/clip-vit-base-patch32)": "openai/clip-vit-base-patch32"
	}

	@spaces.GPU
	def compare_vision_models(image, model1_choice, model1_custom, model2_choice, model2_custom):
	"""
	For each model selection, use the pre-defined model identifier unless the user selects "Custom" and enters an identifier.
	Then create an image-classification pipeline for each model and run inference on the provided image.
	"""
	# Determine the model names to use:
	model1_name = (
	PREDEFINED_MODELS.get(model1_choice, model1_custom)
	if model1_choice != "Custom" else model1_custom
	)
	model2_name = (
	PREDEFINED_MODELS.get(model2_choice, model2_custom)
	if model2_choice != "Custom" else model2_custom
	)

	# Optionally, if you deploy on a GPU-enabled space (e.g. using ZeroGPU), you can set device=0.
	# Here, we check an environment variable "USE_GPU" (set it to "1" in your Space's settings if needed).
	device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1

	# Create pipelines. In this example we assume the models support image classification.
	classifier1 = pipeline("image-classification", model=model1_name, device=device)
	classifier2 = pipeline("image-classification", model=model2_name, device=device)

	# Run inference
	preds1 = classifier1(image)
	preds2 = classifier2(image)

	# Format the predictions as text (each line shows the predicted label and its confidence score)
	result1 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds1])
	result2 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds2])

	return result1, result2

	# Build the Gradio interface using Blocks.
	with gr.Blocks(title="Vision Model Comparison Tool") as demo:
	gr.Markdown("## Vision Model Comparison Tool\nSelect two Hugging Face vision models to compare their outputs side-by-side!")
	with gr.Row():
	with gr.Column():
	gr.Markdown("### Model 1")
	model1_choice = gr.Dropdown(
	choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
	label="Select a pre-defined model or 'Custom'"
	)
	model1_custom = gr.Textbox(
	label="Custom Hugging Face Model",
	placeholder="e.g., username/model_name"
	)
	with gr.Column():
	gr.Markdown("### Model 2")
	model2_choice = gr.Dropdown(
	choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
	label="Select a pre-defined model or 'Custom'"
	)
	model2_custom = gr.Textbox(
	label="Custom Hugging Face Model",
	placeholder="e.g., username/model_name"
	)
	image_input = gr.Image(label="Input Image", type="pil")
	compare_btn = gr.Button("Compare Models")
	with gr.Row():
	output1 = gr.Textbox(label="Model 1 Output")
	output2 = gr.Textbox(label="Model 2 Output")

	compare_btn.click(
	fn=compare_vision_models,
	inputs=[image_input, model1_choice, model1_custom, model2_choice, model2_custom],
	outputs=[output1, output2]
	)

	demo.launch()