import streamlit as st import joblib import numpy as np import faiss import os from openai import OpenAI # Initialize OpenAI client using custom Hugging Face secret client = OpenAI(api_key=os.getenv("POCJujitsu")) # Load serialized FAISS index and document chunks chunks, index = joblib.load("rag_model.joblib") # Embed query using OpenAI embedding API def embed_query(text): response = client.embeddings.create( model="text-embedding-3-small", input=text ) return np.array(response.data[0].embedding, dtype=np.float32).reshape(1, -1) # Semantic search using FAISS (for older FAISS versions) # Semantic search with fallback handling # Semantic search using FAISS - strictly for older API with preallocated arrays def search(query, k=3): query_vec = embed_query(query).astype(np.float32) # Preallocate arrays (required for FAISS IndexFlatL2 in older versions) distances = np.empty((1, k), dtype=np.float32) labels = np.empty((1, k), dtype=np.int64) # Call FAISS with all required arguments index.search(query_vec, k, distances, labels) return [chunks[i] for i in labels[0]] def chat_no_rag(question): response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[{"role": "user", "content": question}], temperature=0.5, max_tokens=300 ) return response.choices[0].message.content def chat_with_rag(question, context_chunks): context = "\n".join(context_chunks) prompt = f"Ayudate en el siguiente contexto para responder la pregunta y usa tus conocimientos en caso de ser necesario: \n\n{context}\n\nPregunta: {question}" response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], temperature=0.3, max_tokens=300 ) return response.choices[0].message.content def chat_with_rag_enhanced(question, context_chunks): context = "\n".join(context_chunks) prompt = ( "Eres un experto en historia marcial. " "Usa el siguiente contexto como referencia para responder la pregunta. " "Puedes complementar con tus propios conocimientos si es necesario.\n\n" f"Contexto:\n{context}\n\n" f"Pregunta: {question}\nRespuesta:" ) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}], temperature=0.2, max_tokens=300 ) return response.choices[0].message.content # Streamlit UI st.set_page_config(page_title="RAG JuJutsu Q&A") st.title("🤖 JuJutsu AI - Ask Anything") st.markdown("Ask a question about jujutsu history, techniques, or philosophy.") question = st.text_input("❓ Enter your question:") mode = st.radio("Choose response mode:", ["No RAG", "With RAG", "With RAG + Expert Prompt"]) if st.button("Get Answer") and question: if mode == "No RAG": answer = chat_no_rag(question) else: retrieved = search(question) if mode == "With RAG": answer = chat_with_rag(question, retrieved) else: answer = chat_with_rag_enhanced(question, retrieved) st.markdown("### 🧠 Answer") st.write(answer)