Tonic commited on
Commit
70c7a11
·
unverified ·
1 Parent(s): 0105b57
Files changed (1) hide show
  1. app.py +13 -21
app.py CHANGED
@@ -10,23 +10,11 @@ import torch
10
  import torch.nn.functional as F
11
  import spaces
12
  import json
13
- import torch
14
- import torch.nn as nn
15
- import torch.nn.functional as F
16
- from safetensors import safe_open
17
  import json
18
  import gradio as gr
19
  from PIL import Image
20
- import numpy as np
21
  from huggingface_hub import snapshot_download
22
- # from mistral_common.protocol.instruct.messages import UserMessage, TextChunk, ImageChunk
23
- # from mistral_common.protocol.instruct.request import ChatCompletionRequest
24
- # from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
25
  import spaces
26
- import math
27
- from typing import List, Optional, Tuple
28
- import gc
29
- # from contextlib import contextmanager
30
  import os
31
  from loadimg import load_img
32
  import traceback
@@ -53,13 +41,17 @@ with open(f'{model_path}/params.json', 'r') as f:
53
  with open(f'{model_path}/tekken.json', 'r') as f:
54
  tokenizer_config = json.load(f)
55
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # Initialize the LLM
58
- llm = LLM(model=repo_id,
59
- tokenizer_mode="mistral",
60
- max_model_len=65536,
61
- max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
62
- limit_mm_per_prompt={"image": max_img_per_msg})
63
 
64
  def encode_image(image: Image.Image, image_format="PNG") -> str:
65
  im_file = BytesIO()
@@ -70,6 +62,7 @@ def encode_image(image: Image.Image, image_format="PNG") -> str:
70
 
71
  @spaces.GPU()
72
  def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
 
73
  image = Image.open(BytesIO(requests.get(image_url).content))
74
  image = image.resize((3844, 2408))
75
  new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
@@ -87,6 +80,7 @@ def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
87
 
88
  @spaces.GPU()
89
  def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
 
90
  image1 = Image.open(BytesIO(requests.get(image1_url).content))
91
  image2 = Image.open(BytesIO(requests.get(image2_url).content))
92
  image1 = image1.resize((3844, 2408))
@@ -111,22 +105,20 @@ def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tq
111
 
112
  @spaces.GPU()
113
  def calculate_image_similarity(image1_url, image2_url):
 
114
  # Load and preprocess images
115
  image1 = Image.open(BytesIO(requests.get(image1_url).content)).convert('RGB')
116
  image2 = Image.open(BytesIO(requests.get(image2_url).content)).convert('RGB')
117
  image1 = image1.resize((224, 224)) # Resize to match model input size
118
  image2 = image2.resize((224, 224))
119
 
120
- # Convert images to tensors
121
  image1_tensor = torch.tensor(list(image1.getdata())).view(1, 3, 224, 224).float() / 255.0
122
  image2_tensor = torch.tensor(list(image2.getdata())).view(1, 3, 224, 224).float() / 255.0
123
 
124
- # Get image embeddings using the vision encoder
125
  with torch.no_grad():
126
  embedding1 = llm.model.vision_encoder([image1_tensor])
127
  embedding2 = llm.model.vision_encoder([image2_tensor])
128
 
129
- # Calculate cosine similarity
130
  similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
131
 
132
  return similarity
 
10
  import torch.nn.functional as F
11
  import spaces
12
  import json
 
 
 
 
13
  import json
14
  import gradio as gr
15
  from PIL import Image
 
16
  from huggingface_hub import snapshot_download
 
 
 
17
  import spaces
 
 
 
 
18
  import os
19
  from loadimg import load_img
20
  import traceback
 
41
  with open(f'{model_path}/tekken.json', 'r') as f:
42
  tokenizer_config = json.load(f)
43
 
44
+ llm = None
45
+
46
+ def initialize_llm():
47
+ global llm
48
+ if llm is None:
49
+ llm = LLM(model=repo_id,
50
+ tokenizer_mode="mistral",
51
+ max_model_len=65536,
52
+ max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
53
+ limit_mm_per_prompt={"image": max_img_per_msg})
54
 
 
 
 
 
 
 
55
 
56
  def encode_image(image: Image.Image, image_format="PNG") -> str:
57
  im_file = BytesIO()
 
62
 
63
  @spaces.GPU()
64
  def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
65
+ initialize_llm()
66
  image = Image.open(BytesIO(requests.get(image_url).content))
67
  image = image.resize((3844, 2408))
68
  new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
 
80
 
81
  @spaces.GPU()
82
  def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
83
+ initialize_llm()
84
  image1 = Image.open(BytesIO(requests.get(image1_url).content))
85
  image2 = Image.open(BytesIO(requests.get(image2_url).content))
86
  image1 = image1.resize((3844, 2408))
 
105
 
106
  @spaces.GPU()
107
  def calculate_image_similarity(image1_url, image2_url):
108
+ initialize_llm()
109
  # Load and preprocess images
110
  image1 = Image.open(BytesIO(requests.get(image1_url).content)).convert('RGB')
111
  image2 = Image.open(BytesIO(requests.get(image2_url).content)).convert('RGB')
112
  image1 = image1.resize((224, 224)) # Resize to match model input size
113
  image2 = image2.resize((224, 224))
114
 
 
115
  image1_tensor = torch.tensor(list(image1.getdata())).view(1, 3, 224, 224).float() / 255.0
116
  image2_tensor = torch.tensor(list(image2.getdata())).view(1, 3, 224, 224).float() / 255.0
117
 
 
118
  with torch.no_grad():
119
  embedding1 = llm.model.vision_encoder([image1_tensor])
120
  embedding2 = llm.model.vision_encoder([image2_tensor])
121
 
 
122
  similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
123
 
124
  return similarity