Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from fastapi.responses import StreamingResponse | |
import requests | |
import json | |
app = FastAPI() | |
# Ollama internal URL | |
OLLAMA_BASE_URL = "http://localhost:11434" | |
# Generic proxy for other API endpoints | |
async def ollama_proxy_get(path: str, query: str = None): | |
url = f"{OLLAMA_BASE_URL}/api/{path}" | |
params = {"query": query} if query else {} | |
try: | |
response = requests.get(url, params=params) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# Handle /api/chat specifically | |
async def ollama_chat(body: dict): | |
url = f"{OLLAMA_BASE_URL}/api/chat" | |
try: | |
# Forward the request to Ollama with streaming support | |
response = requests.post(url, json=body, stream=True) | |
response.raise_for_status() | |
# Stream the response back to the client | |
def generate(): | |
for chunk in response.iter_lines(): | |
if chunk: | |
yield chunk + b"\n" | |
return StreamingResponse(generate(), media_type="text/event-stream") | |
except requests.exceptions.RequestException as e: | |
raise HTTPException(status_code=500, detail=f"Ollama error: {str(e)}") | |
async def root(): | |
return {"message": "Ollama API proxy running on Hugging Face Spaces!"} |