wilwork commited on
Commit
0e73f03
·
verified ·
1 Parent(s): 47bffaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -26
app.py CHANGED
@@ -1,40 +1,24 @@
1
  import gradio as gr
2
- from transformers import CLIPModel, CLIPFeatureExtractor, BertTokenizer
3
  from PIL import Image
4
  import torch
5
  import torch.nn.functional as F
 
 
6
 
7
- # Load model and processors separately
8
- model_name = "jinaai/jina-clip-v1"
9
- model = CLIPModel.from_pretrained(model_name)
10
- feature_extractor = CLIPFeatureExtractor.from_pretrained(model_name)
11
- tokenizer = BertTokenizer.from_pretrained(model_name)
12
 
13
  def compute_similarity(image, text):
14
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
15
 
16
- # Process image
17
- image_inputs = feature_extractor(images=image, return_tensors="pt")
18
-
19
- # Process text (Remove `token_type_ids`)
20
- text_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
21
- text_inputs.pop("token_type_ids", None)
22
-
23
  with torch.no_grad():
24
- # Extract embeddings
25
- image_embeds = model.get_image_features(**image_inputs)
26
- text_embeds = model.get_text_features(**text_inputs)
27
-
28
- # Print to debug
29
- print("Image Embedding:", image_embeds)
30
- print("Text Embedding:", text_embeds)
31
-
32
- # Normalize embeddings
33
- image_embeds = F.normalize(image_embeds, p=2, dim=-1)
34
- text_embeds = F.normalize(text_embeds, p=2, dim=-1)
35
 
36
  # Compute cosine similarity
37
- similarity_score = (image_embeds @ text_embeds.T).item()
38
 
39
  return similarity_score
40
 
@@ -47,4 +31,4 @@ demo = gr.Interface(
47
  description="Upload an image and enter a text prompt to get the similarity score."
48
  )
49
 
50
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModel
3
  from PIL import Image
4
  import torch
5
  import torch.nn.functional as F
6
+ import requests
7
+ from io import BytesIO
8
 
9
+ # Load model with remote code support
10
+ model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
 
 
 
11
 
12
  def compute_similarity(image, text):
13
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
14
 
 
 
 
 
 
 
 
15
  with torch.no_grad():
16
+ # Encode text and image using JinaAI CLIP model
17
+ text_embeds = model.encode_text([text]) # Expecting list input
18
+ image_embeds = model.encode_image([image]) # Expecting list input
 
 
 
 
 
 
 
 
19
 
20
  # Compute cosine similarity
21
+ similarity_score = (text_embeds @ image_embeds.T).item()
22
 
23
  return similarity_score
24
 
 
31
  description="Upload an image and enter a text prompt to get the similarity score."
32
  )
33
 
34
+ demo.launch()