from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from huggingface_hub import InferenceClient import os HF_TOKEN = os.getenv("HF_TOKEN") MODEL_ID = "google/gemma-2b-it" client = InferenceClient(token=HF_TOKEN) app = FastAPI() # Allow CORS for all origins (for development and Netlify) app.add_middleware( CORSMiddleware, allow_origins=["*"], # For production, specify frontend domain allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class ChatRequest(BaseModel): message: str @app.get("/") def root(): return {"message": "Gemma Chat API is running 🚀. Use POST /chat to send messages."} @app.post("/chat") async def chat(req: ChatRequest): try: messages = [{"role": "user", "content": req.message}] response = client.chat_completion( model=MODEL_ID, messages=messages, temperature=0.7, ) return {"response": response.choices[0].message.content} except Exception as e: return {"error": str(e)}