File size: 2,849 Bytes
b05aa6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import clip
import numpy as np
import torch
import gradio as gr

# Load the CLIP model
model, preprocess = clip.load("ViT-B/32")
device = "cuda" if torch.cuda.is_available() else "cpu"  # Check for GPU availability
model.to(device).eval()

# Define the Business Listing variable
Business_Listing = "Air Guide"

def find_similar_images(text_input):
    # Directory where you want to load images
    image_dir = "/content/sample_data/Tourism"
    
    # Create an empty description dictionary
    description = f"{Business_Listing} Logo"
    
    # Set up the layout for displaying images
    num_rows = 4
    num_cols = 8
    
    original_images = []
    images = []
    texts = []
    
    # Load and preprocess images
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.ico', '.svg', '.eps', '.pdf']
    for filename in [filename for filename in os.listdir(image_dir) if any(filename.endswith(ext) for ext in image_extensions)]:
        # Get the image name (without extension)
        image_name, _ = os.path.splitext(filename)
    
        # Load the image
        image = Image.open(os.path.join(image_dir, filename)).convert("RGB")
        original_images.append(image)
        images.append(preprocess(image))
        texts.append(description)
    
    # Prepare input text and images
    image_input = torch.tensor(np.stack(images)).to(device)
    text_tokens = clip.tokenize([f"This is {text_input}"])
    text_tokens = text_tokens.to(device)
    
    # Encode text and image features
    with torch.no_grad():
        image_features = model.encode_image(image_input).float()
        text_features = model.encode_text(text_tokens).float()
    
    # Normalize features and calculate similarity
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T
    
    # Find the maximum similarity value
    max_similarity_value = similarity[0, :].max()
    
    # Find all indices with the maximum similarity value
    max_similarity_indices = np.where(similarity[0, :] == max_similarity_value)
    
    # Get the filenames with the highest similarity
    valid_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.ico', '.svg', '.eps', '.pdf')
    image_files = [filename for filename in os.listdir(image_dir) if filename.endswith(valid_extensions)]
    filenames_with_highest_similarity = [image_files[i] for i in max_similarity_indices[0]]
    
    return filenames_with_highest_similarity, max_similarity_value

# Define a Gradio interface
iface = gr.Interface(
    fn=find_similar_images,
    inputs="text",
    outputs=["text", "number"],
    live=True,
    interpretation="default",
    title="CLIP Model Image Search",
)

iface.launch()