Spaces:
Runtime error
Runtime error
import clip | |
import numpy as np | |
import torch | |
import gradio as gr | |
# Load the CLIP model | |
model, preprocess = clip.load("ViT-B/32") | |
device = "cuda" if torch.cuda.is_available() else "cpu" # Check for GPU availability | |
model.to(device).eval() | |
# Define the Business Listing variable | |
Business_Listing = "Air Guide" | |
def find_similar_images(text_input): | |
# Directory where you want to load images | |
image_dir = "/content/sample_data/Tourism" | |
# Create an empty description dictionary | |
description = f"{Business_Listing} Logo" | |
# Set up the layout for displaying images | |
num_rows = 4 | |
num_cols = 8 | |
original_images = [] | |
images = [] | |
texts = [] | |
# Load and preprocess images | |
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.ico', '.svg', '.eps', '.pdf'] | |
for filename in [filename for filename in os.listdir(image_dir) if any(filename.endswith(ext) for ext in image_extensions)]: | |
# Get the image name (without extension) | |
image_name, _ = os.path.splitext(filename) | |
# Load the image | |
image = Image.open(os.path.join(image_dir, filename)).convert("RGB") | |
original_images.append(image) | |
images.append(preprocess(image)) | |
texts.append(description) | |
# Prepare input text and images | |
image_input = torch.tensor(np.stack(images)).to(device) | |
text_tokens = clip.tokenize([f"This is {text_input}"]) | |
text_tokens = text_tokens.to(device) | |
# Encode text and image features | |
with torch.no_grad(): | |
image_features = model.encode_image(image_input).float() | |
text_features = model.encode_text(text_tokens).float() | |
# Normalize features and calculate similarity | |
image_features /= image_features.norm(dim=-1, keepdim=True) | |
text_features /= text_features.norm(dim=-1, keepdim=True) | |
similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T | |
# Find the maximum similarity value | |
max_similarity_value = similarity[0, :].max() | |
# Find all indices with the maximum similarity value | |
max_similarity_indices = np.where(similarity[0, :] == max_similarity_value) | |
# Get the filenames with the highest similarity | |
valid_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.ico', '.svg', '.eps', '.pdf') | |
image_files = [filename for filename in os.listdir(image_dir) if filename.endswith(valid_extensions)] | |
filenames_with_highest_similarity = [image_files[i] for i in max_similarity_indices[0]] | |
return filenames_with_highest_similarity, max_similarity_value | |
# Define a Gradio interface | |
iface = gr.Interface( | |
fn=find_similar_images, | |
inputs="text", | |
outputs=["text", "number"], | |
live=True, | |
interpretation="default", | |
title="CLIP Model Image Search", | |
) | |
iface.launch() | |