Spaces:

wilwork
/

jina-clip-v1-test

Running

wilwork commited on Feb 27

Commit

cbcffb4

verified ·

1 Parent(s): 08f9b31

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,24 +16,32 @@ model = CLIPModel.from_pretrained(model_name, trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-def compute_similarity(input1, input2, type1, type2):
     # Process input1
     if type1 == "Image":
         image1 = Image.open(input1).convert("RGB")
         input1_tensor = processor(images=image1, return_tensors="pt")["pixel_values"]
-    elif isinstance(input1, str) and input1.strip():
-        input1_tensor = tokenizer(input1, return_tensors="pt")["input_ids"]
     else:
-        return "Error: Invalid text input for Input 1"
     # Process input2
     if type2 == "Image":
         image2 = Image.open(input2).convert("RGB")
         input2_tensor = processor(images=image2, return_tensors="pt")["pixel_values"]
-    elif isinstance(input2, str) and input2.strip():
-        input2_tensor = tokenizer(input2, return_tensors="pt")["input_ids"]
     else:
-        return "Error: Invalid text input for Input 2"
     # Compute embeddings
     with torch.no_grad():
@@ -72,6 +80,8 @@ with gr.Blocks() as demo:
         inputs=[
             input1,
             input2,
             type1,
             type2
         ],

 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+def compute_similarity(input1, input2, text1, text2, type1, type2):
     # Process input1
     if type1 == "Image":
+        if not input1:
+            return "Error: No image provided for Input 1"
         image1 = Image.open(input1).convert("RGB")
         input1_tensor = processor(images=image1, return_tensors="pt")["pixel_values"]
+    elif type1 == "Text":
+        if not text1.strip():
+            return "Error: No text provided for Input 1"
+        input1_tensor = tokenizer(text1, return_tensors="pt")["input_ids"]
     else:
+        return "Error: Invalid input type for Input 1"
     # Process input2
     if type2 == "Image":
+        if not input2:
+            return "Error: No image provided for Input 2"
         image2 = Image.open(input2).convert("RGB")
         input2_tensor = processor(images=image2, return_tensors="pt")["pixel_values"]
+    elif type2 == "Text":
+        if not text2.strip():
+            return "Error: No text provided for Input 2"
+        input2_tensor = tokenizer(text2, return_tensors="pt")["input_ids"]
     else:
+        return "Error: Invalid input type for Input 2"
     # Compute embeddings
     with torch.no_grad():
         inputs=[
             input1,
             input2,
+            text1,
+            text2,
             type1,
             type2
         ],