sanbo commited on
Commit
979bfc3
·
1 Parent(s): 3028bfb

update sth. at 2025-01-16 22:16:33

Browse files
Files changed (3) hide show
  1. README.md +3 -2
  2. app.py +139 -20
  3. app.py1 +14 -9
README.md CHANGED
@@ -24,10 +24,11 @@ You can generate embeddings by sending a POST request to one of the following en
24
 
25
  Example request using `curl`:
26
 
27
- ```sh
28
  curl -X POST https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings \
29
  -H "Content-Type: application/json" \
30
  -d '{
31
  "input": "Your text string goes here",
32
  "model": "jinaai/jina-embeddings-v3"
33
- }'
 
 
24
 
25
  Example request using `curl`:
26
 
27
+ ``` bash
28
  curl -X POST https://sanbo1200-jina-embeddings-v3.hf.space/api/v1/embeddings \
29
  -H "Content-Type: application/json" \
30
  -d '{
31
  "input": "Your text string goes here",
32
  "model": "jinaai/jina-embeddings-v3"
33
+ }'
34
+ ```
app.py CHANGED
@@ -1,11 +1,29 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
- from pydantic import BaseModel
3
- from transformers import AutoTokenizer, AutoModel
4
  import torch
 
 
 
 
 
5
  from typing import List, Dict
 
 
6
  import uvicorn
7
 
8
- # 定义请求和响应模型
 
 
 
 
 
 
 
 
 
 
 
9
  class EmbeddingRequest(BaseModel):
10
  input: str
11
  model: str = "jinaai/jina-embeddings-v3"
@@ -14,18 +32,81 @@ class EmbeddingResponse(BaseModel):
14
  status: str
15
  embeddings: List[List[float]]
16
 
17
- # 创建FastAPI应用
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  app = FastAPI(
19
  title="Jina Embeddings API",
20
  description="Text embedding generation service using jina-embeddings-v3",
21
  version="1.0.0"
22
  )
23
 
24
- # 加载模型和分词器
25
- model_name = "jinaai/jina-embeddings-v3"
26
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
27
- model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
28
 
 
29
  @app.post("/generate_embeddings", response_model=EmbeddingResponse)
30
  @app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
31
  @app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
@@ -33,17 +114,13 @@ model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
33
  @app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
34
  async def generate_embeddings(request: EmbeddingRequest):
35
  try:
36
- # 使用分词器处理输入文本
37
- inputs = tokenizer(request.input, return_tensors="pt", truncation=True, max_length=512)
38
-
39
- # 生成嵌入
40
- with torch.no_grad():
41
- embeddings = model(**inputs).last_hidden_state.mean(dim=1)
42
-
43
  return EmbeddingResponse(
44
  status="success",
45
- embeddings=embeddings.numpy().tolist()
46
  )
 
 
47
  except Exception as e:
48
  raise HTTPException(status_code=500, detail=str(e))
49
 
@@ -51,9 +128,51 @@ async def generate_embeddings(request: EmbeddingRequest):
51
  async def root():
52
  return {
53
  "status": "active",
54
- "model": model_name,
55
- "usage": "Send POST request to /generate_embeddings or /api/v1/embeddings or /hf/v1/embeddings"
 
56
  }
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  if __name__ == "__main__":
59
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import time
4
  import torch
5
+ import gradio as gr
6
+ from fastapi import FastAPI, Request, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel, BaseSettings
9
+ from transformers import AutoTokenizer, AutoModel
10
  from typing import List, Dict
11
+ from functools import lru_cache
12
+ import numpy as np
13
  import uvicorn
14
 
15
+ class Settings(BaseSettings):
16
+ model_name: str = "jinaai/jina-embeddings-v3"
17
+ max_length: int = 512
18
+ batch_size: int = 32
19
+ host: str = "0.0.0.0"
20
+ port: int = 7860
21
+ enable_gpu: bool = True
22
+ queue_size: int = 100
23
+
24
+ class Config:
25
+ env_file = ".env"
26
+
27
  class EmbeddingRequest(BaseModel):
28
  input: str
29
  model: str = "jinaai/jina-embeddings-v3"
 
32
  status: str
33
  embeddings: List[List[float]]
34
 
35
+ class EmbeddingService:
36
+ def __init__(self, settings: Settings):
37
+ self.settings = settings
38
+ self.device = torch.device("cuda" if torch.cuda.is_available() and settings.enable_gpu else "cpu")
39
+ self.model = None
40
+ self.tokenizer = None
41
+ self.request_queue = asyncio.Queue(maxsize=settings.queue_size)
42
+ self.setup_logging()
43
+
44
+ def setup_logging(self):
45
+ logging.basicConfig(
46
+ level=logging.INFO,
47
+ format='%(asctime)s - %(levelname)s - %(message)s'
48
+ )
49
+ self.logger = logging.getLogger(__name__)
50
+
51
+ async def initialize(self):
52
+ try:
53
+ self.tokenizer = AutoTokenizer.from_pretrained(
54
+ self.settings.model_name,
55
+ trust_remote_code=True
56
+ )
57
+ self.model = AutoModel.from_pretrained(
58
+ self.settings.model_name,
59
+ trust_remote_code=True
60
+ ).to(self.device)
61
+ self.model.eval()
62
+ self.logger.info(f"模型加载成功,使用设备: {self.device}")
63
+ except Exception as e:
64
+ self.logger.error(f"模型初始化失败: {str(e)}")
65
+ raise
66
+
67
+ @lru_cache(maxsize=1000)
68
+ async def generate_embedding(self, text: str) -> np.ndarray:
69
+ try:
70
+ inputs = self.tokenizer(
71
+ text,
72
+ return_tensors="pt",
73
+ truncation=True,
74
+ max_length=self.settings.max_length
75
+ ).to(self.device)
76
+
77
+ with torch.no_grad():
78
+ outputs = self.model(**inputs).last_hidden_state.mean(dim=1)
79
+ return outputs.cpu().numpy()
80
+ except Exception as e:
81
+ self.logger.error(f"生成嵌入向量失败: {str(e)}")
82
+ raise
83
+
84
+ async def handle_request(self, text: str) -> np.ndarray:
85
+ if not text.strip():
86
+ raise ValueError("输入文本不能为空")
87
+ return await self.generate_embedding(text)
88
+
89
+ # 初始化服务
90
+ settings = Settings()
91
+ embedding_service = EmbeddingService(settings)
92
+
93
+ # FastAPI应用
94
  app = FastAPI(
95
  title="Jina Embeddings API",
96
  description="Text embedding generation service using jina-embeddings-v3",
97
  version="1.0.0"
98
  )
99
 
100
+ # CORS中间件
101
+ app.add_middleware(
102
+ CORSMiddleware,
103
+ allow_origins=["*"],
104
+ allow_credentials=True,
105
+ allow_methods=["*"],
106
+ allow_headers=["*"],
107
+ )
108
 
109
+ # FastAPI路由
110
  @app.post("/generate_embeddings", response_model=EmbeddingResponse)
111
  @app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
112
  @app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
 
114
  @app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
115
  async def generate_embeddings(request: EmbeddingRequest):
116
  try:
117
+ embedding = await embedding_service.handle_request(request.input)
 
 
 
 
 
 
118
  return EmbeddingResponse(
119
  status="success",
120
+ embeddings=embedding.tolist()
121
  )
122
+ except ValueError as e:
123
+ raise HTTPException(status_code=400, detail=str(e))
124
  except Exception as e:
125
  raise HTTPException(status_code=500, detail=str(e))
126
 
 
128
  async def root():
129
  return {
130
  "status": "active",
131
+ "model": settings.model_name,
132
+ "device": embedding_service.device,
133
+ "usage": "Send POST request to /generate_embeddings or use UI interface"
134
  }
135
 
136
+ # Gradio界面
137
+ def gradio_interface(text: str) -> Dict:
138
+ try:
139
+ embedding = asyncio.run(embedding_service.handle_request(text))
140
+ return {
141
+ "status": "success",
142
+ "embeddings": embedding.tolist()
143
+ }
144
+ except Exception as e:
145
+ return {
146
+ "status": "error",
147
+ "message": str(e)
148
+ }
149
+
150
+ iface = gr.Interface(
151
+ fn=gradio_interface,
152
+ inputs=gr.Textbox(lines=3, label="输入文本"),
153
+ outputs=gr.JSON(label="嵌入向量结果"),
154
+ title="Jina Embeddings V3",
155
+ description="使用jina-embeddings-v3模型生成文本嵌入向量",
156
+ examples=[
157
+ ["这是一个测试句子。"],
158
+ ["人工智能正在改变世界。"]
159
+ ]
160
+ )
161
+
162
+ @app.on_event("startup")
163
+ async def startup_event():
164
+ await embedding_service.initialize()
165
+
166
  if __name__ == "__main__":
167
+ # 确保模型初始化
168
+ asyncio.run(embedding_service.initialize())
169
+
170
+ # 启动Gradio和FastAPI
171
+ gr.mount_gradio_app(app, iface, path="/ui")
172
+
173
+ uvicorn.run(
174
+ app,
175
+ host=settings.host,
176
+ port=settings.port,
177
+ workers=1 # GPU模式下建议使用单进程
178
+ )
app.py1 CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModel
4
  import torch
@@ -6,8 +6,9 @@ from typing import List, Dict
6
  import uvicorn
7
 
8
  # 定义请求和响应模型
9
- class TextRequest(BaseModel):
10
- text: str
 
11
 
12
  class EmbeddingResponse(BaseModel):
13
  status: str
@@ -26,15 +27,19 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
26
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
27
 
28
  @app.post("/generate_embeddings", response_model=EmbeddingResponse)
29
- async def generate_embeddings(request: TextRequest):
 
 
 
 
30
  try:
31
  # 使用分词器处理输入文本
32
- inputs = tokenizer(request.text, return_tensors="pt", truncation=True, max_length=512)
33
-
34
  # 生成嵌入
35
  with torch.no_grad():
36
  embeddings = model(**inputs).last_hidden_state.mean(dim=1)
37
-
38
  return EmbeddingResponse(
39
  status="success",
40
  embeddings=embeddings.numpy().tolist()
@@ -47,8 +52,8 @@ async def root():
47
  return {
48
  "status": "active",
49
  "model": model_name,
50
- "usage": "Send POST request to /generate_embeddings"
51
  }
52
 
53
  if __name__ == "__main__":
54
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModel
4
  import torch
 
6
  import uvicorn
7
 
8
  # 定义请求和响应模型
9
+ class EmbeddingRequest(BaseModel):
10
+ input: str
11
+ model: str = "jinaai/jina-embeddings-v3"
12
 
13
  class EmbeddingResponse(BaseModel):
14
  status: str
 
27
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
28
 
29
  @app.post("/generate_embeddings", response_model=EmbeddingResponse)
30
+ @app.post("/api/v1/embeddings", response_model=EmbeddingResponse)
31
+ @app.post("/hf/v1/embeddings", response_model=EmbeddingResponse)
32
+ @app.post("/api/v1/chat/completions", response_model=EmbeddingResponse)
33
+ @app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse)
34
+ async def generate_embeddings(request: EmbeddingRequest):
35
  try:
36
  # 使用分词器处理输入文本
37
+ inputs = tokenizer(request.input, return_tensors="pt", truncation=True, max_length=512)
38
+
39
  # 生成嵌入
40
  with torch.no_grad():
41
  embeddings = model(**inputs).last_hidden_state.mean(dim=1)
42
+
43
  return EmbeddingResponse(
44
  status="success",
45
  embeddings=embeddings.numpy().tolist()
 
52
  return {
53
  "status": "active",
54
  "model": model_name,
55
+ "usage": "Send POST request to /generate_embeddings or /api/v1/embeddings or /hf/v1/embeddings"
56
  }
57
 
58
  if __name__ == "__main__":
59
+ uvicorn.run(app, host="0.0.0.0", port=7860)