File size: 3,242 Bytes
f07eaf3
1300f65
f07eaf3
1300f65
f07eaf3
1300f65
f19fa3f
 
07d48e9
f19fa3f
63c936e
f07eaf3
 
1300f65
f07eaf3
 
 
 
 
 
 
1300f65
07d48e9
6342765
 
c8801fd
 
f07eaf3
 
07d48e9
c8801fd
 
 
 
 
 
07d48e9
f07eaf3
1300f65
f07eaf3
1300f65
f07eaf3
 
1300f65
f07eaf3
1300f65
 
 
f07eaf3
 
b4bc534
20d211b
1300f65
f07eaf3
1300f65
 
f07eaf3
1300f65
 
 
f07eaf3
 
1300f65
 
f07eaf3
 
1300f65
 
 
 
f07eaf3
1300f65
 
f07eaf3
1300f65
 
 
 
f07eaf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

import streamlit as st
import joblib
import numpy as np
import faiss
import os
from openai import OpenAI

# Initialize OpenAI client using custom Hugging Face secret
client = OpenAI(api_key=os.getenv("POCJujitsu"))

# Load serialized FAISS index and document chunks
chunks, index = joblib.load("rag_model.joblib")

# Embed query using OpenAI embedding API
def embed_query(text):
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return np.array(response.data[0].embedding, dtype=np.float32).reshape(1, -1)

# Semantic search using FAISS (for older FAISS versions)

# Semantic search with fallback handling

# Semantic search using FAISS - strictly for older API with preallocated arrays
def search(query, k=3):
    query_vec = embed_query(query).astype(np.float32)

    # Preallocate arrays (required for FAISS IndexFlatL2 in older versions)
    distances = np.empty((1, k), dtype=np.float32)
    labels = np.empty((1, k), dtype=np.int64)

    # Call FAISS with all required arguments
    index.search(query_vec, k, distances, labels)

    return [chunks[i] for i in labels[0]]

def chat_no_rag(question):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": question}],
        temperature=0.5,
        max_tokens=300
    )
    return response.choices[0].message.content

def chat_with_rag(question, context_chunks):
    context = "\n".join(context_chunks)
    prompt = f"Ayudate en el siguiente contexto para responder la pregunta y usa tus conocimientos en caso de ser necesario: \n\n{context}\n\nPregunta: {question}"

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
        max_tokens=300
    )
    return response.choices[0].message.content

def chat_with_rag_enhanced(question, context_chunks):
    context = "\n".join(context_chunks)
    prompt = (
        "Eres un experto en historia marcial. "
        "Usa el siguiente contexto como referencia para responder la pregunta. "
        "Puedes complementar con tus propios conocimientos si es necesario.\n\n"
        f"Contexto:\n{context}\n\n"
        f"Pregunta: {question}\nRespuesta:"
    )
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2,
        max_tokens=300
    )
    return response.choices[0].message.content

# Streamlit UI
st.set_page_config(page_title="RAG JuJutsu Q&A")
st.title("🤖 JuJutsu AI - Ask Anything")
st.markdown("Ask a question about jujutsu history, techniques, or philosophy.")

question = st.text_input("❓ Enter your question:")
mode = st.radio("Choose response mode:", ["No RAG", "With RAG", "With RAG + Expert Prompt"])

if st.button("Get Answer") and question:
    if mode == "No RAG":
        answer = chat_no_rag(question)
    else:
        retrieved = search(question)
        if mode == "With RAG":
            answer = chat_with_rag(question, retrieved)
        else:
            answer = chat_with_rag_enhanced(question, retrieved)

    st.markdown("### 🧠 Answer")
    st.write(answer)