Spaces:
Paused
Paused
add vllm
Browse files
app.py
CHANGED
@@ -10,23 +10,11 @@ import torch
|
|
10 |
import torch.nn.functional as F
|
11 |
import spaces
|
12 |
import json
|
13 |
-
import torch
|
14 |
-
import torch.nn as nn
|
15 |
-
import torch.nn.functional as F
|
16 |
-
from safetensors import safe_open
|
17 |
import json
|
18 |
import gradio as gr
|
19 |
from PIL import Image
|
20 |
-
import numpy as np
|
21 |
from huggingface_hub import snapshot_download
|
22 |
-
# from mistral_common.protocol.instruct.messages import UserMessage, TextChunk, ImageChunk
|
23 |
-
# from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
24 |
-
# from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
25 |
import spaces
|
26 |
-
import math
|
27 |
-
from typing import List, Optional, Tuple
|
28 |
-
import gc
|
29 |
-
# from contextlib import contextmanager
|
30 |
import os
|
31 |
from loadimg import load_img
|
32 |
import traceback
|
@@ -53,13 +41,17 @@ with open(f'{model_path}/params.json', 'r') as f:
|
|
53 |
with open(f'{model_path}/tekken.json', 'r') as f:
|
54 |
tokenizer_config = json.load(f)
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
# Initialize the LLM
|
58 |
-
llm = LLM(model=repo_id,
|
59 |
-
tokenizer_mode="mistral",
|
60 |
-
max_model_len=65536,
|
61 |
-
max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
|
62 |
-
limit_mm_per_prompt={"image": max_img_per_msg})
|
63 |
|
64 |
def encode_image(image: Image.Image, image_format="PNG") -> str:
|
65 |
im_file = BytesIO()
|
@@ -70,6 +62,7 @@ def encode_image(image: Image.Image, image_format="PNG") -> str:
|
|
70 |
|
71 |
@spaces.GPU()
|
72 |
def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
|
|
73 |
image = Image.open(BytesIO(requests.get(image_url).content))
|
74 |
image = image.resize((3844, 2408))
|
75 |
new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
|
@@ -87,6 +80,7 @@ def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
|
87 |
|
88 |
@spaces.GPU()
|
89 |
def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
|
|
90 |
image1 = Image.open(BytesIO(requests.get(image1_url).content))
|
91 |
image2 = Image.open(BytesIO(requests.get(image2_url).content))
|
92 |
image1 = image1.resize((3844, 2408))
|
@@ -111,22 +105,20 @@ def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tq
|
|
111 |
|
112 |
@spaces.GPU()
|
113 |
def calculate_image_similarity(image1_url, image2_url):
|
|
|
114 |
# Load and preprocess images
|
115 |
image1 = Image.open(BytesIO(requests.get(image1_url).content)).convert('RGB')
|
116 |
image2 = Image.open(BytesIO(requests.get(image2_url).content)).convert('RGB')
|
117 |
image1 = image1.resize((224, 224)) # Resize to match model input size
|
118 |
image2 = image2.resize((224, 224))
|
119 |
|
120 |
-
# Convert images to tensors
|
121 |
image1_tensor = torch.tensor(list(image1.getdata())).view(1, 3, 224, 224).float() / 255.0
|
122 |
image2_tensor = torch.tensor(list(image2.getdata())).view(1, 3, 224, 224).float() / 255.0
|
123 |
|
124 |
-
# Get image embeddings using the vision encoder
|
125 |
with torch.no_grad():
|
126 |
embedding1 = llm.model.vision_encoder([image1_tensor])
|
127 |
embedding2 = llm.model.vision_encoder([image2_tensor])
|
128 |
|
129 |
-
# Calculate cosine similarity
|
130 |
similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
|
131 |
|
132 |
return similarity
|
|
|
10 |
import torch.nn.functional as F
|
11 |
import spaces
|
12 |
import json
|
|
|
|
|
|
|
|
|
13 |
import json
|
14 |
import gradio as gr
|
15 |
from PIL import Image
|
|
|
16 |
from huggingface_hub import snapshot_download
|
|
|
|
|
|
|
17 |
import spaces
|
|
|
|
|
|
|
|
|
18 |
import os
|
19 |
from loadimg import load_img
|
20 |
import traceback
|
|
|
41 |
with open(f'{model_path}/tekken.json', 'r') as f:
|
42 |
tokenizer_config = json.load(f)
|
43 |
|
44 |
+
llm = None
|
45 |
+
|
46 |
+
def initialize_llm():
|
47 |
+
global llm
|
48 |
+
if llm is None:
|
49 |
+
llm = LLM(model=repo_id,
|
50 |
+
tokenizer_mode="mistral",
|
51 |
+
max_model_len=65536,
|
52 |
+
max_num_batched_tokens=max_img_per_msg * max_tokens_per_img,
|
53 |
+
limit_mm_per_prompt={"image": max_img_per_msg})
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def encode_image(image: Image.Image, image_format="PNG") -> str:
|
57 |
im_file = BytesIO()
|
|
|
62 |
|
63 |
@spaces.GPU()
|
64 |
def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
65 |
+
initialize_llm()
|
66 |
image = Image.open(BytesIO(requests.get(image_url).content))
|
67 |
image = image.resize((3844, 2408))
|
68 |
new_image_url = f"data:image/png;base64,{encode_image(image, image_format='PNG')}"
|
|
|
80 |
|
81 |
@spaces.GPU()
|
82 |
def compare_images(image1_url, image2_url, prompt, progress=gr.Progress(track_tqdm=True)):
|
83 |
+
initialize_llm()
|
84 |
image1 = Image.open(BytesIO(requests.get(image1_url).content))
|
85 |
image2 = Image.open(BytesIO(requests.get(image2_url).content))
|
86 |
image1 = image1.resize((3844, 2408))
|
|
|
105 |
|
106 |
@spaces.GPU()
|
107 |
def calculate_image_similarity(image1_url, image2_url):
|
108 |
+
initialize_llm()
|
109 |
# Load and preprocess images
|
110 |
image1 = Image.open(BytesIO(requests.get(image1_url).content)).convert('RGB')
|
111 |
image2 = Image.open(BytesIO(requests.get(image2_url).content)).convert('RGB')
|
112 |
image1 = image1.resize((224, 224)) # Resize to match model input size
|
113 |
image2 = image2.resize((224, 224))
|
114 |
|
|
|
115 |
image1_tensor = torch.tensor(list(image1.getdata())).view(1, 3, 224, 224).float() / 255.0
|
116 |
image2_tensor = torch.tensor(list(image2.getdata())).view(1, 3, 224, 224).float() / 255.0
|
117 |
|
|
|
118 |
with torch.no_grad():
|
119 |
embedding1 = llm.model.vision_encoder([image1_tensor])
|
120 |
embedding2 = llm.model.vision_encoder([image2_tensor])
|
121 |
|
|
|
122 |
similarity = F.cosine_similarity(embedding1.mean(dim=0), embedding2.mean(dim=0), dim=0).item()
|
123 |
|
124 |
return similarity
|