Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import uvicorn | |
| # Define a Pydantic model for request validation | |
| class Query(BaseModel): | |
| text: str | |
| # Initialize FastAPI app | |
| app = FastAPI(title="Financial Chatbot API") | |
| # Load your fine-tuned model and tokenizer | |
| model_name = "Phoenix21/meta-llama-Llama-3.2-3B-2025-03-13-checkpoints" | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Create a text-generation pipeline | |
| chat_pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=256, | |
| temperature=0.7, | |
| top_p=0.95, | |
| ) | |
| # Define an endpoint for generating responses | |
| def generate(query: Query): | |
| prompt = f"Question: {query.text}\nAnswer: " | |
| response = chat_pipe(prompt)[0]["generated_text"] | |
| return {"response": response} | |
| # Run the app using uvicorn. Hugging Face Spaces sets the PORT environment variable. | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 8000)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) | |