ollama-embedding / main.py
redfernstech's picture
Upload 4 files
4bae573 verified
raw
history blame
2.82 kB
from fastapi import FastAPI, HTTPException, Depends, Header, Request
from pydantic import BaseModel
from langchain_community.llms import Ollama # Correct Import
import os
import logging
import time # Import time module
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO)
# API keys from .env
API_KEYS = {
"user1": os.getenv("API_KEY_USER1"),
"user2": os.getenv("API_KEY_USER2"),
}
app = FastAPI()
# API Key Authentication
def verify_api_key(request: Request, api_key: str = Header(None, alias="X-API-Key")):
logging.info(f"Received Headers: {request.headers}") # Log headers
if not api_key:
raise HTTPException(status_code=401, detail="API key is missing")
api_key = api_key.strip()
if api_key not in API_KEYS.values():
raise HTTPException(status_code=401, detail="Invalid API key")
return api_key
# OpenAI-compatible request format
class OpenAIRequest(BaseModel):
model: str
messages: list
stream: bool = False # Default to non-streaming
# Initialize LangChain LLM with Ollama
def get_llm(model_name: str):
return Ollama(model=model_name)
@app.get("/")
def home():
return {"message": "OpenAI-compatible LangChain + Ollama API is running"}
@app.post("/v1/chat/completions")
def generate_text(request: OpenAIRequest, api_key: str = Depends(verify_api_key)):
try:
llm = get_llm(request.model)
# Extract last user message from messages
user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None)
if not user_message:
raise HTTPException(status_code=400, detail="User message is required")
response_text = llm.invoke(user_message)
# OpenAI-like response format
response = {
"id": "chatcmpl-123",
"object": "chat.completion",
"created": int(time.time()), # FIXED: Using time.time() instead of os.time()
"model": request.model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": response_text},
"finish_reason": "stop",
}
],
"usage": {
"prompt_tokens": len(user_message.split()),
"completion_tokens": len(response_text.split()),
"total_tokens": len(user_message.split()) + len(response_text.split()),
}
}
return response
except Exception as e:
logging.error(f"Error generating response: {e}")
raise HTTPException(status_code=500, detail="Internal server error")