from fastapi import FastAPI
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from datasets import load_dataset
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
import os
from huggingface_hub import login

app = FastAPI()

# Log in to Hugging Face using environment variable
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
    raise ValueError("HF_TOKEN environment variable not set")
login(hf_token)

# Load Dataset and Prepare Knowledge Base
ds = load_dataset("codeparrot/apps", "all", split="train")
os.makedirs("knowledge_base", exist_ok=True)
for i, example in enumerate(ds.select(range(100))):  # Reduced to 100 for free tier
    solution = example['solutions'][0] if example['solutions'] else "No solution available"
    with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
        f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
documents = SimpleDirectoryReader("knowledge_base").load_data()

# Setup RAG
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
Settings.embed_model = embed_model
d = 384
faiss_index = faiss.IndexFlatL2(d)
vector_store = FaissVectorStore(faiss_index=faiss_index)
index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)

# Load LLaMA Model
model_name = "meta-llama/Llama-3.2-1B-Instruct"
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto" if device == "cuda" else None
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

@app.get("/solve")
async def solve_problem(problem: str, top_k: int = 1):
    retriever = index.as_retriever(similarity_top_k=top_k)
    retrieved_nodes = retriever.retrieve(problem)
    context = retrieved_nodes[0].text if retrieved_nodes else "No relevant context found."
    prompt = f"Given the following competitive programming problem:\n\n{problem}\n\nRelevant context:\n{context}\n\nGenerate a solution in Python:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    solution = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return {"solution": solution, "context": context}