# import gradio as gr # gr.load("models/HuggingFaceH4/zephyr-7b-alpha").launch() import os import numpy as np import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import faiss # Step 1: Load Precomputed Embeddings and Metadata def load_embeddings(embeddings_folder='embeddings'): all_embeddings = [] metadata = [] for file in os.listdir(embeddings_folder): if file.endswith('.npy'): embedding_path = os.path.join(embeddings_folder, file) embedding = np.load(embedding_path) # Shape: (27, 384) all_embeddings.append(embedding) # Metadata corresponds to each .npy file meta_info = file.replace('.npy', '') # Example: 'course_1' metadata.extend([meta_info] * embedding.shape[0]) # Repeat metadata for each sub-embedding # Flatten list of embeddings to shape (n * 27, 384) all_embeddings = np.vstack(all_embeddings) return all_embeddings, metadata embeddings, metadata = load_embeddings() # Step 2: Set Up FAISS Index with Flattened Embeddings dimension = embeddings.shape[1] # Should be 384 after flattening index = faiss.IndexFlatL2(dimension) index.add(embeddings) # Step 3: Load the Language Model # model_name = "HuggingFaceH4/zephyr-7b-alpha" # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForCausalLM.from_pretrained(model_name) model_name = "TheBloke/zephyr-7B-beta-GPTQ" tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="balanced", trust_remote_code=False) # Step 4: Define the Retrieval Function def retrieve_documents(query, top_k=3): # Find embeddings matching the query matched_embeddings = [embeddings[i] for i in range(len(metadata)) if query.lower() in metadata[i].lower()] # If no matches found, set a default query embedding if matched_embeddings: query_embedding = np.mean(matched_embeddings, axis=0) else: # Fallback: use the mean of all embeddings as a default embedding query_embedding = np.mean(embeddings, axis=0) print("No exact matches found for query. Using default query embedding.") # Reshape query_embedding to match FAISS expected shape (1, d) query_embedding = query_embedding.reshape(1, -1) # Perform the search distances, indices = index.search(query_embedding, top_k) # Retrieve document metadata based on indices retrieved_docs = [metadata[idx] for idx in indices[0]] return retrieved_docs # Step 5: Define the Response Generation Function def generate_response(query): retrieved_docs = retrieve_documents(query) context = " ".join(retrieved_docs) input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:" inputs = tokenizer(input_text, return_tensors="pt") output = model.generate(**inputs, max_length=512) answer = tokenizer.decode(output[0], skip_special_tokens=True) return answer # Step 6: Create Gradio Interface def gradio_interface(query): response = generate_response(query) return response iface = gr.Interface( fn=gradio_interface, inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), outputs="text", title="RAG-based Course Search", description="Enter a query to search for relevant courses using Retrieval Augmented Generation." ) if __name__ == "__main__": iface.launch()