Spaces:
Running
Running
Commit
·
2044733
1
Parent(s):
75318f4
changing model
Browse files
main.py
CHANGED
@@ -2,22 +2,25 @@ import base64
|
|
2 |
from io import BytesIO
|
3 |
|
4 |
import torch
|
5 |
-
from fastapi import FastAPI, Query
|
6 |
from PIL import Image
|
7 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException
|
8 |
-
from qwen_vl_utils import process_vision_info
|
9 |
-
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, Qwen2VLForConditionalGeneration
|
10 |
-
|
11 |
-
from fastapi import FastAPI, Body
|
12 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
app = FastAPI()
|
15 |
|
|
|
16 |
# Define request model
|
17 |
class PredictRequest(BaseModel):
|
18 |
image_base64: str
|
19 |
prompt: str
|
20 |
|
|
|
21 |
# checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
|
22 |
# min_pixels = 256 * 28 * 28
|
23 |
# max_pixels = 1280 * 28 * 28
|
@@ -31,13 +34,11 @@ class PredictRequest(BaseModel):
|
|
31 |
# # attn_implementation="flash_attention_2",
|
32 |
# )
|
33 |
|
34 |
-
checkpoint = "Qwen/Qwen2.5-VL-
|
35 |
-
min_pixels = 256*28*28
|
36 |
-
max_pixels = 1280*28*28
|
37 |
processor = AutoProcessor.from_pretrained(
|
38 |
-
checkpoint,
|
39 |
-
min_pixels=min_pixels,
|
40 |
-
max_pixels=max_pixels
|
41 |
)
|
42 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
43 |
checkpoint,
|
@@ -82,6 +83,7 @@ def read_root():
|
|
82 |
# print(f"❌ Error encoding image {image_path}: {e}")
|
83 |
# return None
|
84 |
|
|
|
85 |
def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
86 |
"""
|
87 |
Converts an image from file data to a Base64-encoded string with optimized size.
|
@@ -96,6 +98,7 @@ def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
|
96 |
except Exception as e:
|
97 |
raise HTTPException(status_code=500, detail=f"Error encoding image: {e}")
|
98 |
|
|
|
99 |
@app.post("/encode-image/")
|
100 |
async def upload_and_encode_image(file: UploadFile = File(...)):
|
101 |
"""
|
@@ -108,6 +111,7 @@ async def upload_and_encode_image(file: UploadFile = File(...)):
|
|
108 |
except Exception as e:
|
109 |
raise HTTPException(status_code=400, detail=f"Invalid file: {e}")
|
110 |
|
|
|
111 |
@app.post("/predict")
|
112 |
def predict(data: PredictRequest):
|
113 |
"""
|
@@ -121,7 +125,6 @@ def predict(data: PredictRequest):
|
|
121 |
str: The generated description of the image.
|
122 |
"""
|
123 |
|
124 |
-
|
125 |
# Create the input message structure
|
126 |
messages = [
|
127 |
{
|
@@ -160,6 +163,7 @@ def predict(data: PredictRequest):
|
|
160 |
|
161 |
return {"response": output_text[0] if output_text else "No description generated."}
|
162 |
|
|
|
163 |
# @app.get("/predict")
|
164 |
# def predict(image_url: str = Query(...), prompt: str = Query(...)):
|
165 |
|
|
|
2 |
from io import BytesIO
|
3 |
|
4 |
import torch
|
5 |
+
from fastapi import Body, FastAPI, File, HTTPException, Query, UploadFile
|
6 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
7 |
from pydantic import BaseModel
|
8 |
+
from qwen_vl_utils import process_vision_info
|
9 |
+
from transformers import (
|
10 |
+
AutoProcessor,
|
11 |
+
Qwen2_5_VLForConditionalGeneration,
|
12 |
+
Qwen2VLForConditionalGeneration,
|
13 |
+
)
|
14 |
|
15 |
app = FastAPI()
|
16 |
|
17 |
+
|
18 |
# Define request model
|
19 |
class PredictRequest(BaseModel):
|
20 |
image_base64: str
|
21 |
prompt: str
|
22 |
|
23 |
+
|
24 |
# checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
|
25 |
# min_pixels = 256 * 28 * 28
|
26 |
# max_pixels = 1280 * 28 * 28
|
|
|
34 |
# # attn_implementation="flash_attention_2",
|
35 |
# )
|
36 |
|
37 |
+
checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
|
38 |
+
min_pixels = 256 * 28 * 28
|
39 |
+
max_pixels = 1280 * 28 * 28
|
40 |
processor = AutoProcessor.from_pretrained(
|
41 |
+
checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
|
|
|
|
|
42 |
)
|
43 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
44 |
checkpoint,
|
|
|
83 |
# print(f"❌ Error encoding image {image_path}: {e}")
|
84 |
# return None
|
85 |
|
86 |
+
|
87 |
def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
88 |
"""
|
89 |
Converts an image from file data to a Base64-encoded string with optimized size.
|
|
|
98 |
except Exception as e:
|
99 |
raise HTTPException(status_code=500, detail=f"Error encoding image: {e}")
|
100 |
|
101 |
+
|
102 |
@app.post("/encode-image/")
|
103 |
async def upload_and_encode_image(file: UploadFile = File(...)):
|
104 |
"""
|
|
|
111 |
except Exception as e:
|
112 |
raise HTTPException(status_code=400, detail=f"Invalid file: {e}")
|
113 |
|
114 |
+
|
115 |
@app.post("/predict")
|
116 |
def predict(data: PredictRequest):
|
117 |
"""
|
|
|
125 |
str: The generated description of the image.
|
126 |
"""
|
127 |
|
|
|
128 |
# Create the input message structure
|
129 |
messages = [
|
130 |
{
|
|
|
163 |
|
164 |
return {"response": output_text[0] if output_text else "No description generated."}
|
165 |
|
166 |
+
|
167 |
# @app.get("/predict")
|
168 |
# def predict(image_url: str = Query(...), prompt: str = Query(...)):
|
169 |
|