wilwork commited on
Commit
5fadd6e
·
verified ·
1 Parent(s): 110181b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -1,21 +1,25 @@
1
  import gradio as gr
2
- from transformers import CLIPModel, CLIPProcessor
3
  from PIL import Image
4
  import torch
5
 
6
- # Load model and processor
7
  model_name = "jinaai/jina-clip-v1"
8
  model = CLIPModel.from_pretrained(model_name)
9
- processor = CLIPProcessor.from_pretrained(model_name)
 
10
 
11
  def compute_similarity(image, text):
12
- image = Image.fromarray(image) # Convert NumPy array to PIL image
 
 
 
13
 
14
- # Process inputs
15
- inputs = processor(text=[text], images=image, return_tensors="pt", padding=True, truncation=True)
16
 
17
  with torch.no_grad():
18
- outputs = model(**inputs)
19
  logits_per_image = outputs.logits_per_image # Image-to-text similarity
20
  similarity_score = logits_per_image.item()
21
 
@@ -26,7 +30,7 @@ demo = gr.Interface(
26
  fn=compute_similarity,
27
  inputs=[gr.Image(type="numpy"), gr.Textbox(label="Enter text")],
28
  outputs=gr.Number(label="Similarity Score"),
29
- title="CLIP Image-Text Similarity",
30
  description="Upload an image and enter a text prompt to get the similarity score."
31
  )
32
 
 
1
  import gradio as gr
2
+ from transformers import CLIPModel, CLIPFeatureExtractor, BertTokenizer
3
  from PIL import Image
4
  import torch
5
 
6
+ # Load model and appropriate processors separately
7
  model_name = "jinaai/jina-clip-v1"
8
  model = CLIPModel.from_pretrained(model_name)
9
+ feature_extractor = CLIPFeatureExtractor.from_pretrained(model_name)
10
+ tokenizer = BertTokenizer.from_pretrained(model_name)
11
 
12
  def compute_similarity(image, text):
13
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
14
+
15
+ # Process image
16
+ image_inputs = feature_extractor(images=image, return_tensors="pt")
17
 
18
+ # Process text
19
+ text_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
20
 
21
  with torch.no_grad():
22
+ outputs = model(**image_inputs, **text_inputs)
23
  logits_per_image = outputs.logits_per_image # Image-to-text similarity
24
  similarity_score = logits_per_image.item()
25
 
 
30
  fn=compute_similarity,
31
  inputs=[gr.Image(type="numpy"), gr.Textbox(label="Enter text")],
32
  outputs=gr.Number(label="Similarity Score"),
33
+ title="JinaAI CLIP Image-Text Similarity",
34
  description="Upload an image and enter a text prompt to get the similarity score."
35
  )
36