Spaces:
Sleeping
Sleeping
File size: 3,242 Bytes
f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f19fa3f 07d48e9 f19fa3f 63c936e f07eaf3 1300f65 f07eaf3 1300f65 07d48e9 6342765 c8801fd f07eaf3 07d48e9 c8801fd 07d48e9 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 b4bc534 20d211b 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 1300f65 f07eaf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import streamlit as st
import joblib
import numpy as np
import faiss
import os
from openai import OpenAI
# Initialize OpenAI client using custom Hugging Face secret
client = OpenAI(api_key=os.getenv("POCJujitsu"))
# Load serialized FAISS index and document chunks
chunks, index = joblib.load("rag_model.joblib")
# Embed query using OpenAI embedding API
def embed_query(text):
response = client.embeddings.create(
model="text-embedding-3-small",
input=text
)
return np.array(response.data[0].embedding, dtype=np.float32).reshape(1, -1)
# Semantic search using FAISS (for older FAISS versions)
# Semantic search with fallback handling
# Semantic search using FAISS - strictly for older API with preallocated arrays
def search(query, k=3):
query_vec = embed_query(query).astype(np.float32)
# Preallocate arrays (required for FAISS IndexFlatL2 in older versions)
distances = np.empty((1, k), dtype=np.float32)
labels = np.empty((1, k), dtype=np.int64)
# Call FAISS with all required arguments
index.search(query_vec, k, distances, labels)
return [chunks[i] for i in labels[0]]
def chat_no_rag(question):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": question}],
temperature=0.5,
max_tokens=300
)
return response.choices[0].message.content
def chat_with_rag(question, context_chunks):
context = "\n".join(context_chunks)
prompt = f"Ayudate en el siguiente contexto para responder la pregunta y usa tus conocimientos en caso de ser necesario: \n\n{context}\n\nPregunta: {question}"
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=300
)
return response.choices[0].message.content
def chat_with_rag_enhanced(question, context_chunks):
context = "\n".join(context_chunks)
prompt = (
"Eres un experto en historia marcial. "
"Usa el siguiente contexto como referencia para responder la pregunta. "
"Puedes complementar con tus propios conocimientos si es necesario.\n\n"
f"Contexto:\n{context}\n\n"
f"Pregunta: {question}\nRespuesta:"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0.2,
max_tokens=300
)
return response.choices[0].message.content
# Streamlit UI
st.set_page_config(page_title="RAG JuJutsu Q&A")
st.title("🤖 JuJutsu AI - Ask Anything")
st.markdown("Ask a question about jujutsu history, techniques, or philosophy.")
question = st.text_input("❓ Enter your question:")
mode = st.radio("Choose response mode:", ["No RAG", "With RAG", "With RAG + Expert Prompt"])
if st.button("Get Answer") and question:
if mode == "No RAG":
answer = chat_no_rag(question)
else:
retrieved = search(question)
if mode == "With RAG":
answer = chat_with_rag(question, retrieved)
else:
answer = chat_with_rag_enhanced(question, retrieved)
st.markdown("### 🧠 Answer")
st.write(answer)
|