from fastapi import FastAPI, HTTPException, Request from pydantic import BaseModel from transformers import AutoTokenizer, AutoModel import torch from typing import List, Dict import uvicorn # 定义响应模型 class EmbeddingResponse(BaseModel): status: str embeddings: List[Listfloat]] # 创建FastAPI应用 app = FastAPI( title="Jina Embeddings API", description="Text embedding generation service using jina-embeddings-v3", version="1.0.0" ) # 加载模型和分词器 model_name = "jinaai/jina-embeddings-v3" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModel.from_pretrained(model_name, trust_remote_code=True) async def generate_embeddings(text: str): try: # 使用分词器处理输入文本 inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) #生成嵌入 with torch.no_grad(): embeddings = model(**inputs).last_hidden_state.mean(dim=1) return EmbeddingResponse( status="success", embeddings=embeddings.numpy().tolist() ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/api/v1/embeddings") @app.post("/hf/v1/embeddings") async def embedding(request: Request): try: data = await request.json() text = data.get('input', '') if not text: raise HTTPException(status_code=400, detail="Input text is missing") return await generate_embeddings(text) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/") async def root(): return { "status": "active", "model": model_name, "usage": "Send POST request to /api/v1/embeddings" } if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)