from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoTokenizer, AutoModel import torch from typing import List, Dict import uvicorn # 定义请求和响应模型 class TextRequest(BaseModel): text: str class EmbeddingResponse(BaseModel): status: str embeddings: List[List[float]] # 创建FastAPI应用 app = FastAPI( title="Jina Embeddings API", description="Text embedding generation service using jina-embeddings-v3", version="1.0.0" ) # 加载模型和分词器 model_name = "jinaai/jina-embeddings-v3" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModel.from_pretrained(model_name, trust_remote_code=True) @app.post("/generate_embeddings", response_model=EmbeddingResponse) async def generate_embeddings(request: TextRequest): try: # 使用分词器处理输入文本 inputs = tokenizer(request.text, return_tensors="pt", truncation=True, max_length=512) # 生成嵌入 with torch.no_grad(): embeddings = model(**inputs).last_hidden_state.mean(dim=1) return EmbeddingResponse( status="success", embeddings=embeddings.numpy().tolist() ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/") async def root(): return { "status": "active", "model": model_name, "usage": "Send POST request to /generate_embeddings" } if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)