from fastapi import FastAPI from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch from datasets import load_dataset from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.vector_stores.faiss import FaissVectorStore import faiss import os from huggingface_hub import login app = FastAPI() # Log in to Hugging Face using environment variable hf_token = os.getenv("HF_TOKEN") if not hf_token: raise ValueError("HF_TOKEN environment variable not set") login(hf_token) # Load Dataset and Prepare Knowledge Base ds = load_dataset("codeparrot/apps", "all", split="train") os.makedirs("knowledge_base", exist_ok=True) for i, example in enumerate(ds.select(range(100))): # Reduced to 100 for free tier solution = example['solutions'][0] if example['solutions'] else "No solution available" with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f: f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}") documents = SimpleDirectoryReader("knowledge_base").load_data() # Setup RAG embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") Settings.embed_model = embed_model d = 384 faiss_index = faiss.IndexFlatL2(d) vector_store = FaissVectorStore(faiss_index=faiss_index) index = VectorStoreIndex.from_documents(documents, vector_store=vector_store) # Load LLaMA Model model_name = "meta-llama/Llama-3.2-1B-Instruct" quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True ) tokenizer = AutoTokenizer.from_pretrained(model_name) device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=quant_config, device_map="auto" if device == "cuda" else None ) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token @app.get("/solve") async def solve_problem(problem: str, top_k: int = 1): retriever = index.as_retriever(similarity_top_k=top_k) retrieved_nodes = retriever.retrieve(problem) context = retrieved_nodes[0].text if retrieved_nodes else "No relevant context found." prompt = f"Given the following competitive programming problem:\n\n{problem}\n\nRelevant context:\n{context}\n\nGenerate a solution in Python:" inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_new_tokens=200, temperature=0.7, top_p=0.9, do_sample=True ) solution = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"solution": solution, "context": context}