Spaces:
Sleeping
Sleeping
sanbo
commited on
Commit
·
979bfc3
1
Parent(s):
3028bfb
update sth. at 2025-01-16 22:16:33
Browse files
README.md
CHANGED
@@ -24,10 +24,11 @@ You can generate embeddings by sending a POST request to one of the following en
|
|
24 |
|
25 |
Example request using `curl`:
|
26 |
|
27 |
-
```
|
28 |
curl -X POST https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings \
|
29 |
-H "Content-Type: application/json" \
|
30 |
-d '{
|
31 |
"input": "Your text string goes here",
|
32 |
"model": "jinaai/jina-embeddings-v3"
|
33 |
-
}'
|
|
|
|
24 |
|
25 |
Example request using `curl`:
|
26 |
|
27 |
+
``` bash
|
28 |
curl -X POST https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings \
|
29 |
-H "Content-Type: application/json" \
|
30 |
-d '{
|
31 |
"input": "Your text string goes here",
|
32 |
"model": "jinaai/jina-embeddings-v3"
|
33 |
+
}'
|
34 |
+
```
|
app.py
CHANGED
@@ -1,11 +1,29 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
import torch
|
|
|
|
|
|
|
|
|
|
|
5 |
from typing import List, Dict
|
|
|
|
|
6 |
import uvicorn
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
class EmbeddingRequest(BaseModel):
|
10 |
input: str
|
11 |
model: str = "jinaai/jina-embeddings-v3"
|
@@ -14,18 +32,81 @@ class EmbeddingResponse(BaseModel):
|
|
14 |
status: str
|
15 |
embeddings: List[List[float]]
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
app = FastAPI(
|
19 |
title="Jina Embeddings API",
|
20 |
description="Text embedding generation service using jina-embeddings-v3",
|
21 |
version="1.0.0"
|
22 |
)
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
|
|
|
29 |
@app.post("/generate_embeddings", response_model=EmbeddingResponse)
|
30 |
@app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
|
31 |
@app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
|
@@ -33,17 +114,13 @@ model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
|
33 |
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
|
34 |
async def generate_embeddings(request: EmbeddingRequest):
|
35 |
try:
|
36 |
-
|
37 |
-
inputs = tokenizer(request.input, return_tensors="pt", truncation=True, max_length=512)
|
38 |
-
|
39 |
-
# 生成嵌入
|
40 |
-
with torch.no_grad():
|
41 |
-
embeddings = model(**inputs).last_hidden_state.mean(dim=1)
|
42 |
-
|
43 |
return EmbeddingResponse(
|
44 |
status="success",
|
45 |
-
embeddings=
|
46 |
)
|
|
|
|
|
47 |
except Exception as e:
|
48 |
raise HTTPException(status_code=500, detail=str(e))
|
49 |
|
@@ -51,9 +128,51 @@ async def generate_embeddings(request: EmbeddingRequest):
|
|
51 |
async def root():
|
52 |
return {
|
53 |
"status": "active",
|
54 |
-
"model": model_name,
|
55 |
-
"
|
|
|
56 |
}
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
if __name__ == "__main__":
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import logging
|
3 |
+
import time
|
4 |
import torch
|
5 |
+
import gradio as gr
|
6 |
+
from fastapi import FastAPI, Request, HTTPException
|
7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
from pydantic import BaseModel, BaseSettings
|
9 |
+
from transformers import AutoTokenizer, AutoModel
|
10 |
from typing import List, Dict
|
11 |
+
from functools import lru_cache
|
12 |
+
import numpy as np
|
13 |
import uvicorn
|
14 |
|
15 |
+
class Settings(BaseSettings):
|
16 |
+
model_name: str = "jinaai/jina-embeddings-v3"
|
17 |
+
max_length: int = 512
|
18 |
+
batch_size: int = 32
|
19 |
+
host: str = "0.0.0.0"
|
20 |
+
port: int = 7860
|
21 |
+
enable_gpu: bool = True
|
22 |
+
queue_size: int = 100
|
23 |
+
|
24 |
+
class Config:
|
25 |
+
env_file = ".env"
|
26 |
+
|
27 |
class EmbeddingRequest(BaseModel):
|
28 |
input: str
|
29 |
model: str = "jinaai/jina-embeddings-v3"
|
|
|
32 |
status: str
|
33 |
embeddings: List[List[float]]
|
34 |
|
35 |
+
class EmbeddingService:
|
36 |
+
def __init__(self, settings: Settings):
|
37 |
+
self.settings = settings
|
38 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() and settings.enable_gpu else "cpu")
|
39 |
+
self.model = None
|
40 |
+
self.tokenizer = None
|
41 |
+
self.request_queue = asyncio.Queue(maxsize=settings.queue_size)
|
42 |
+
self.setup_logging()
|
43 |
+
|
44 |
+
def setup_logging(self):
|
45 |
+
logging.basicConfig(
|
46 |
+
level=logging.INFO,
|
47 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
48 |
+
)
|
49 |
+
self.logger = logging.getLogger(__name__)
|
50 |
+
|
51 |
+
async def initialize(self):
|
52 |
+
try:
|
53 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
54 |
+
self.settings.model_name,
|
55 |
+
trust_remote_code=True
|
56 |
+
)
|
57 |
+
self.model = AutoModel.from_pretrained(
|
58 |
+
self.settings.model_name,
|
59 |
+
trust_remote_code=True
|
60 |
+
).to(self.device)
|
61 |
+
self.model.eval()
|
62 |
+
self.logger.info(f"模型加载成功,使用设备: {self.device}")
|
63 |
+
except Exception as e:
|
64 |
+
self.logger.error(f"模型初始化失败: {str(e)}")
|
65 |
+
raise
|
66 |
+
|
67 |
+
@lru_cache(maxsize=1000)
|
68 |
+
async def generate_embedding(self, text: str) -> np.ndarray:
|
69 |
+
try:
|
70 |
+
inputs = self.tokenizer(
|
71 |
+
text,
|
72 |
+
return_tensors="pt",
|
73 |
+
truncation=True,
|
74 |
+
max_length=self.settings.max_length
|
75 |
+
).to(self.device)
|
76 |
+
|
77 |
+
with torch.no_grad():
|
78 |
+
outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
|
79 |
+
return outputs.cpu().numpy()
|
80 |
+
except Exception as e:
|
81 |
+
self.logger.error(f"生成嵌入向量失败: {str(e)}")
|
82 |
+
raise
|
83 |
+
|
84 |
+
async def handle_request(self, text: str) -> np.ndarray:
|
85 |
+
if not text.strip():
|
86 |
+
raise ValueError("输入文本不能为空")
|
87 |
+
return await self.generate_embedding(text)
|
88 |
+
|
89 |
+
# 初始化服务
|
90 |
+
settings = Settings()
|
91 |
+
embedding_service = EmbeddingService(settings)
|
92 |
+
|
93 |
+
# FastAPI应用
|
94 |
app = FastAPI(
|
95 |
title="Jina Embeddings API",
|
96 |
description="Text embedding generation service using jina-embeddings-v3",
|
97 |
version="1.0.0"
|
98 |
)
|
99 |
|
100 |
+
# CORS中间件
|
101 |
+
app.add_middleware(
|
102 |
+
CORSMiddleware,
|
103 |
+
allow_origins=["*"],
|
104 |
+
allow_credentials=True,
|
105 |
+
allow_methods=["*"],
|
106 |
+
allow_headers=["*"],
|
107 |
+
)
|
108 |
|
109 |
+
# FastAPI路由
|
110 |
@app.post("/generate_embeddings", response_model=EmbeddingResponse)
|
111 |
@app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
|
112 |
@app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
|
|
|
114 |
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
|
115 |
async def generate_embeddings(request: EmbeddingRequest):
|
116 |
try:
|
117 |
+
embedding = await embedding_service.handle_request(request.input)
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
return EmbeddingResponse(
|
119 |
status="success",
|
120 |
+
embeddings=embedding.tolist()
|
121 |
)
|
122 |
+
except ValueError as e:
|
123 |
+
raise HTTPException(status_code=400, detail=str(e))
|
124 |
except Exception as e:
|
125 |
raise HTTPException(status_code=500, detail=str(e))
|
126 |
|
|
|
128 |
async def root():
|
129 |
return {
|
130 |
"status": "active",
|
131 |
+
"model": settings.model_name,
|
132 |
+
"device": embedding_service.device,
|
133 |
+
"usage": "Send POST request to /generate_embeddings or use UI interface"
|
134 |
}
|
135 |
|
136 |
+
# Gradio界面
|
137 |
+
def gradio_interface(text: str) -> Dict:
|
138 |
+
try:
|
139 |
+
embedding = asyncio.run(embedding_service.handle_request(text))
|
140 |
+
return {
|
141 |
+
"status": "success",
|
142 |
+
"embeddings": embedding.tolist()
|
143 |
+
}
|
144 |
+
except Exception as e:
|
145 |
+
return {
|
146 |
+
"status": "error",
|
147 |
+
"message": str(e)
|
148 |
+
}
|
149 |
+
|
150 |
+
iface = gr.Interface(
|
151 |
+
fn=gradio_interface,
|
152 |
+
inputs=gr.Textbox(lines=3, label="输入文本"),
|
153 |
+
outputs=gr.JSON(label="嵌入向量结果"),
|
154 |
+
title="Jina Embeddings V3",
|
155 |
+
description="使用jina-embeddings-v3模型生成文本嵌入向量",
|
156 |
+
examples=[
|
157 |
+
["这是一个测试句子。"],
|
158 |
+
["人工智能正在改变世界。"]
|
159 |
+
]
|
160 |
+
)
|
161 |
+
|
162 |
+
@app.on_event("startup")
|
163 |
+
async def startup_event():
|
164 |
+
await embedding_service.initialize()
|
165 |
+
|
166 |
if __name__ == "__main__":
|
167 |
+
# 确保模型初始化
|
168 |
+
asyncio.run(embedding_service.initialize())
|
169 |
+
|
170 |
+
# 启动Gradio和FastAPI
|
171 |
+
gr.mount_gradio_app(app, iface, path="/ui")
|
172 |
+
|
173 |
+
uvicorn.run(
|
174 |
+
app,
|
175 |
+
host=settings.host,
|
176 |
+
port=settings.port,
|
177 |
+
workers=1 # GPU模式下建议使用单进程
|
178 |
+
)
|
app.py1
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
from transformers import AutoTokenizer, AutoModel
|
4 |
import torch
|
@@ -6,8 +6,9 @@ from typing import List, Dict
|
|
6 |
import uvicorn
|
7 |
|
8 |
# 定义请求和响应模型
|
9 |
-
class
|
10 |
-
|
|
|
11 |
|
12 |
class EmbeddingResponse(BaseModel):
|
13 |
status: str
|
@@ -26,15 +27,19 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
26 |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
27 |
|
28 |
@app.post("/generate_embeddings", response_model=EmbeddingResponse)
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
try:
|
31 |
# 使用分词器处理输入文本
|
32 |
-
inputs = tokenizer(request.
|
33 |
-
|
34 |
# 生成嵌入
|
35 |
with torch.no_grad():
|
36 |
embeddings = model(**inputs).last_hidden_state.mean(dim=1)
|
37 |
-
|
38 |
return EmbeddingResponse(
|
39 |
status="success",
|
40 |
embeddings=embeddings.numpy().tolist()
|
@@ -47,8 +52,8 @@ async def root():
|
|
47 |
return {
|
48 |
"status": "active",
|
49 |
"model": model_name,
|
50 |
-
"usage": "Send POST request to /generate_embeddings"
|
51 |
}
|
52 |
|
53 |
if __name__ == "__main__":
|
54 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Request
|
2 |
from pydantic import BaseModel
|
3 |
from transformers import AutoTokenizer, AutoModel
|
4 |
import torch
|
|
|
6 |
import uvicorn
|
7 |
|
8 |
# 定义请求和响应模型
|
9 |
+
class EmbeddingRequest(BaseModel):
|
10 |
+
input: str
|
11 |
+
model: str = "jinaai/jina-embeddings-v3"
|
12 |
|
13 |
class EmbeddingResponse(BaseModel):
|
14 |
status: str
|
|
|
27 |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
28 |
|
29 |
@app.post("/generate_embeddings", response_model=EmbeddingResponse)
|
30 |
+
@app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
|
31 |
+
@app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
|
32 |
+
@app.post("/api/v1/chat/completions", response_model=EmbeddingResponse)
|
33 |
+
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
|
34 |
+
async def generate_embeddings(request: EmbeddingRequest):
|
35 |
try:
|
36 |
# 使用分词器处理输入文本
|
37 |
+
inputs = tokenizer(request.input, return_tensors="pt", truncation=True, max_length=512)
|
38 |
+
|
39 |
# 生成嵌入
|
40 |
with torch.no_grad():
|
41 |
embeddings = model(**inputs).last_hidden_state.mean(dim=1)
|
42 |
+
|
43 |
return EmbeddingResponse(
|
44 |
status="success",
|
45 |
embeddings=embeddings.numpy().tolist()
|
|
|
52 |
return {
|
53 |
"status": "active",
|
54 |
"model": model_name,
|
55 |
+
"usage": "Send POST request to /generate_embeddings or /api/v1/embeddings or /hf/v1/embeddings"
|
56 |
}
|
57 |
|
58 |
if __name__ == "__main__":
|
59 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|