Spaces:
Sleeping
Sleeping
FauziIsyrinApridal
commited on
Commit
·
daa81fb
1
Parent(s):
4ab9ddf
revisi 1
Browse files- .gitignore +3 -1
- app.py +39 -127
- app/auth.py +190 -0
- app/chat.py +235 -4
- app/config.py +3 -0
- app/data_loader.py +2 -0
- app/rag.py +42 -0
- app/session.py +42 -0
- app/ui.py +128 -0
- app/vector_store.py +61 -0
- app/workflows.py +49 -0
- assets/favicon.ico +0 -0
- assets/pnp-logo.png +0 -0
.gitignore
CHANGED
@@ -4,4 +4,6 @@
|
|
4 |
/tests/__pycache__
|
5 |
/app/scrapping/*.py
|
6 |
/vector_store_data
|
7 |
-
/hasil_evaluasi
|
|
|
|
|
|
4 |
/tests/__pycache__
|
5 |
/app/scrapping/*.py
|
6 |
/vector_store_data
|
7 |
+
/hasil_evaluasi
|
8 |
+
/app/data
|
9 |
+
/cache_tts
|
app.py
CHANGED
@@ -2,150 +2,62 @@ import streamlit as st
|
|
2 |
import os
|
3 |
from dotenv import load_dotenv
|
4 |
from langsmith import traceable
|
5 |
-
from datetime import datetime
|
6 |
-
from typing import List, Dict, Optional
|
7 |
|
8 |
-
from app.chat import
|
9 |
-
|
|
|
|
|
|
|
10 |
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
|
11 |
-
from app.prompts import sahabat_prompt
|
12 |
from app.db import supabase
|
13 |
-
from
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
load_dotenv()
|
19 |
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
BUCKET_NAME = "pnp-bot-storage-archive"
|
24 |
-
VECTOR_STORE_PREFIX = "vector_store"
|
25 |
-
|
26 |
-
|
27 |
-
# UTILITY
|
28 |
-
|
29 |
-
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
|
30 |
-
"""Get the latest timestamp from files in a Supabase storage bucket."""
|
31 |
-
files = list_all_files(bucket_name)
|
32 |
-
latest_time = 0.0
|
33 |
-
for file in files:
|
34 |
-
iso_time = file.get("updated_at") or file.get("created_at")
|
35 |
-
if iso_time:
|
36 |
-
try:
|
37 |
-
timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
|
38 |
-
latest_time = max(latest_time, timestamp)
|
39 |
-
except Exception as e:
|
40 |
-
print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
|
41 |
-
return latest_time
|
42 |
-
|
43 |
-
|
44 |
-
def get_supabase_vector_store_timestamp() -> Optional[str]:
|
45 |
-
"""Get the latest timestamp of vector store files in the Supabase storage."""
|
46 |
-
try:
|
47 |
-
response = supabase.storage.from_(BUCKET_NAME).list()
|
48 |
-
timestamps = []
|
49 |
-
for file in response:
|
50 |
-
if file["name"].startswith(VECTOR_STORE_PREFIX) and (
|
51 |
-
file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
|
52 |
-
):
|
53 |
-
timestamps.append(file["updated_at"])
|
54 |
-
if len(timestamps) >= 2:
|
55 |
-
return max(timestamps)
|
56 |
-
return None
|
57 |
-
except Exception as e:
|
58 |
-
print(f"Error getting Supabase timestamp: {e}")
|
59 |
-
return None
|
60 |
-
|
61 |
-
|
62 |
-
def vector_store_is_outdated() -> bool:
|
63 |
-
"""Check if vector store needs to be updated based on files in Supabase storage."""
|
64 |
-
supabase_timestamp = get_supabase_vector_store_timestamp()
|
65 |
-
if supabase_timestamp is None:
|
66 |
-
return True
|
67 |
-
supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
|
68 |
-
data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
|
69 |
-
|
70 |
-
return data_time > supabase_time
|
71 |
-
|
72 |
-
|
73 |
-
def reorder_embedding(docs):
|
74 |
-
"""Reorder documents for long context retrieval."""
|
75 |
-
reordering = LongContextReorder()
|
76 |
-
return reordering.transform_documents(docs)
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
# RAG CHAIN
|
81 |
-
|
82 |
-
@traceable(name="Create RAG Conversational Chain")
|
83 |
-
def create_conversational_chain(vector_store):
|
84 |
-
"""Create a Conversational Retrieval Chain for RAG."""
|
85 |
-
llm = Replicate(
|
86 |
-
model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09",
|
87 |
-
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 10000}
|
88 |
-
)
|
89 |
-
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
|
90 |
-
chain = ConversationalRetrievalChain.from_llm(
|
91 |
-
llm,
|
92 |
-
retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
|
93 |
-
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
|
94 |
-
return_source_documents=True,
|
95 |
-
memory=memory,
|
96 |
-
)
|
97 |
-
return chain
|
98 |
-
|
99 |
-
|
100 |
-
def get_rag_chain(vector_store):
|
101 |
-
"""Return a Conversational Retrieval Chain for external use."""
|
102 |
-
return create_conversational_chain(vector_store)
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
# MAIN FUNCTION
|
107 |
|
108 |
@traceable(name="Main Chatbot RAG App")
|
109 |
def main():
|
|
|
|
|
|
|
|
|
110 |
initialize_session_state()
|
111 |
st.set_page_config(
|
112 |
page_title="PNP-Bot",
|
113 |
-
page_icon="
|
114 |
)
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
else:
|
133 |
-
print("⚠️ Folder 'data/' kosong. Chatbot tetap bisa digunakan, tetapi tanpa konteks dokumen.")
|
134 |
-
vector_store = None
|
135 |
-
else:
|
136 |
-
with st.spinner("Memuat vector store dari Supabase..."):
|
137 |
-
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
|
138 |
-
if vector_store:
|
139 |
-
print("✅ Vector store berhasil dimuat dari Supabase!")
|
140 |
-
else:
|
141 |
-
print("❌ Gagal memuat vector store dari Supabase.")
|
142 |
-
else:
|
143 |
-
vector_store = st.session_state.get("vector_store") or load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
|
144 |
-
|
145 |
st.session_state["vector_store"] = vector_store
|
146 |
|
147 |
if st.session_state["vector_store"] is not None:
|
148 |
-
chain = create_conversational_chain(st.session_state["vector_store"])
|
149 |
display_chat_history(chain)
|
150 |
|
151 |
|
|
|
2 |
import os
|
3 |
from dotenv import load_dotenv
|
4 |
from langsmith import traceable
|
|
|
|
|
5 |
|
6 |
+
from app.chat import (
|
7 |
+
initialize_session_state,
|
8 |
+
display_chat_history,
|
9 |
+
)
|
10 |
+
from app.data_loader import get_data, load_docs
|
11 |
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
|
|
|
12 |
from app.db import supabase
|
13 |
+
from app.config import Config
|
14 |
+
# Modularized helpers
|
15 |
+
import app.rag as rag
|
16 |
+
import app.vector_store as vs
|
17 |
+
import app.auth as auth
|
18 |
+
import app.ui as ui
|
19 |
+
from app.session import restore_user_session_if_needed, ensure_active_session
|
20 |
+
from app.workflows import prepare_vector_store_if_needed
|
21 |
+
from uuid import uuid4
|
22 |
|
23 |
load_dotenv()
|
24 |
|
25 |
|
26 |
+
BUCKET_NAME = Config.BUCKET_NAME
|
27 |
+
VECTOR_STORE_PREFIX = Config.VECTOR_STORE_PREFIX
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
@traceable(name="Main Chatbot RAG App")
|
30 |
def main():
|
31 |
+
try:
|
32 |
+
load_dotenv()
|
33 |
+
except Exception:
|
34 |
+
pass
|
35 |
initialize_session_state()
|
36 |
st.set_page_config(
|
37 |
page_title="PNP-Bot",
|
38 |
+
page_icon="favicon.ico",
|
39 |
)
|
40 |
+
# Try restore Supabase session if user missing
|
41 |
+
restore_user_session_if_needed()
|
42 |
+
# Authentication gate
|
43 |
+
user = st.session_state.get("user")
|
44 |
+
if not user:
|
45 |
+
auth.auth_view()
|
46 |
+
return
|
47 |
+
|
48 |
+
# Ensure we have an active chat session
|
49 |
+
ensure_active_session(user["id"])
|
50 |
+
|
51 |
+
# Sidebar: delegate completely to UI module
|
52 |
+
with st.sidebar:
|
53 |
+
ui.render_sidebar_sessions()
|
54 |
+
|
55 |
+
# Vector store orchestration (delegated)
|
56 |
+
vector_store = prepare_vector_store_if_needed(len(st.session_state["history"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
st.session_state["vector_store"] = vector_store
|
58 |
|
59 |
if st.session_state["vector_store"] is not None:
|
60 |
+
chain = rag.create_conversational_chain(st.session_state["vector_store"])
|
61 |
display_chat_history(chain)
|
62 |
|
63 |
|
app/auth.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
import streamlit as st
|
4 |
+
from app.db import supabase
|
5 |
+
|
6 |
+
|
7 |
+
def auth_view():
|
8 |
+
"""Render Supabase authentication with Login, Register, and Forgot Password tabs."""
|
9 |
+
# Wrapper (centered) for all auth content
|
10 |
+
left, center, right = st.columns([1, 2, 1])
|
11 |
+
with center:
|
12 |
+
# Header: smaller PNP logo and centered title
|
13 |
+
# Path logo
|
14 |
+
logo_path = os.path.join("assets", "pnp-logo.png")
|
15 |
+
|
16 |
+
# Convert ke base64 biar bisa di-embed langsung
|
17 |
+
def get_base64_image(path):
|
18 |
+
with open(path, "rb") as f:
|
19 |
+
return base64.b64encode(f.read()).decode()
|
20 |
+
|
21 |
+
encoded_logo = get_base64_image(logo_path)
|
22 |
+
|
23 |
+
# Render dalam satu div center
|
24 |
+
st.markdown(
|
25 |
+
f"""
|
26 |
+
<div style="text-align:center;">
|
27 |
+
<img src="data:image/png;base64,{encoded_logo}" width="100">
|
28 |
+
<h1 style="margin-top:0.25rem;">PNP bot</h1>
|
29 |
+
</div>
|
30 |
+
""",
|
31 |
+
unsafe_allow_html=True
|
32 |
+
)
|
33 |
+
|
34 |
+
# --- Password recovery handler (Supabase redirect) ---
|
35 |
+
# 1) Move hash params to query params on first load, then reload once
|
36 |
+
if not st.session_state.get("_hash_migrated"):
|
37 |
+
st.markdown(
|
38 |
+
"""
|
39 |
+
<script>
|
40 |
+
(function(){
|
41 |
+
const hash = window.location.hash.startsWith('#') ? window.location.hash.substring(1) : window.location.hash;
|
42 |
+
if (hash && !window.__hashMigrated) {
|
43 |
+
const h = new URLSearchParams(hash);
|
44 |
+
const qp = new URLSearchParams(window.location.search);
|
45 |
+
for (const [k,v] of h.entries()) { qp.set(k, v); }
|
46 |
+
const newUrl = window.location.pathname + '?' + qp.toString();
|
47 |
+
window.history.replaceState(null, '', newUrl);
|
48 |
+
window.location.hash = '';
|
49 |
+
window.__hashMigrated = true;
|
50 |
+
window.location.reload();
|
51 |
+
}
|
52 |
+
})();
|
53 |
+
</script>
|
54 |
+
""",
|
55 |
+
unsafe_allow_html=True,
|
56 |
+
)
|
57 |
+
st.session_state["_hash_migrated"] = True
|
58 |
+
|
59 |
+
# 2) Read query params for recovery flow
|
60 |
+
try:
|
61 |
+
qp = st.query_params # Streamlit >= 1.30
|
62 |
+
get_q = lambda k: qp.get(k, None)
|
63 |
+
except Exception:
|
64 |
+
qp = st.experimental_get_query_params()
|
65 |
+
get_q = lambda k: (qp.get(k, [None])[0] if isinstance(qp.get(k, None), list) else qp.get(k, None))
|
66 |
+
|
67 |
+
q_type = get_q("type")
|
68 |
+
if q_type == "recovery":
|
69 |
+
st.info("Reset password: silakan masukkan password baru Anda.")
|
70 |
+
access_token = get_q("access_token")
|
71 |
+
refresh_token = get_q("refresh_token")
|
72 |
+
with st.form("reset_password_form"):
|
73 |
+
npw = st.text_input("Password Baru", type="password")
|
74 |
+
npw2 = st.text_input("Konfirmasi Password Baru", type="password")
|
75 |
+
submit_reset = st.form_submit_button("Set Password Baru")
|
76 |
+
if submit_reset:
|
77 |
+
if not npw or len(npw) < 6:
|
78 |
+
st.error("Password minimal 6 karakter.")
|
79 |
+
elif npw != npw2:
|
80 |
+
st.error("Konfirmasi password tidak sama.")
|
81 |
+
elif not access_token or not refresh_token:
|
82 |
+
st.error("Token pemulihan tidak ditemukan. Coba klik ulang tautan dari email.")
|
83 |
+
else:
|
84 |
+
try:
|
85 |
+
# Set current session using tokens from redirect
|
86 |
+
supabase.auth.set_session(access_token, refresh_token)
|
87 |
+
# Update user password
|
88 |
+
supabase.auth.update_user({"password": npw})
|
89 |
+
st.success("Password berhasil diubah. Silakan login kembali.")
|
90 |
+
# Optional: clear recovery query params
|
91 |
+
try:
|
92 |
+
# Best-effort to clear params
|
93 |
+
st.markdown(
|
94 |
+
"""
|
95 |
+
<script>
|
96 |
+
(function(){
|
97 |
+
const qp = new URLSearchParams(window.location.search);
|
98 |
+
["type","access_token","refresh_token","expires_in","expires_at","token_type"].forEach(k=>qp.delete(k));
|
99 |
+
const newUrl = window.location.pathname + (qp.toString()?('?'+qp.toString()):'');
|
100 |
+
window.history.replaceState(null, '', newUrl);
|
101 |
+
})();
|
102 |
+
</script>
|
103 |
+
""",
|
104 |
+
unsafe_allow_html=True,
|
105 |
+
)
|
106 |
+
except Exception:
|
107 |
+
pass
|
108 |
+
except Exception as e:
|
109 |
+
st.error(f"Gagal mengubah password: {e}")
|
110 |
+
# When in recovery mode, do not render login/register tabs
|
111 |
+
return
|
112 |
+
|
113 |
+
# Auth tabs inside wrapper
|
114 |
+
tab_login, tab_register, tab_forgot = st.tabs(["Login", "Register", "Forgot Password"])
|
115 |
+
|
116 |
+
with tab_login:
|
117 |
+
with st.form("login_form"):
|
118 |
+
email = st.text_input("Email")
|
119 |
+
password = st.text_input("Password", type="password")
|
120 |
+
submitted = st.form_submit_button("Login")
|
121 |
+
if submitted:
|
122 |
+
# Demo password fallback
|
123 |
+
shared_pw = os.getenv("APP_DEMO_PASSWORD")
|
124 |
+
if shared_pw and password == shared_pw:
|
125 |
+
st.session_state["user"] = {"id": "demo-user", "email": email or "[email protected]"}
|
126 |
+
st.success("Login demo berhasil")
|
127 |
+
st.rerun()
|
128 |
+
try:
|
129 |
+
auth_res = supabase.auth.sign_in_with_password({
|
130 |
+
"email": email,
|
131 |
+
"password": password,
|
132 |
+
})
|
133 |
+
user = getattr(auth_res, "user", None)
|
134 |
+
if user:
|
135 |
+
st.session_state["user"] = {"id": user.id, "email": getattr(user, "email", email)}
|
136 |
+
session_obj = getattr(auth_res, "session", None)
|
137 |
+
if session_obj:
|
138 |
+
st.session_state["sb_session"] = {
|
139 |
+
"access_token": getattr(session_obj, "access_token", None),
|
140 |
+
"refresh_token": getattr(session_obj, "refresh_token", None),
|
141 |
+
}
|
142 |
+
st.success("Login berhasil")
|
143 |
+
st.rerun()
|
144 |
+
else:
|
145 |
+
st.error("Email atau password salah.")
|
146 |
+
except Exception as e:
|
147 |
+
st.error(f"Gagal login: {e}")
|
148 |
+
|
149 |
+
with tab_register:
|
150 |
+
st.caption("Buat akun baru. Anda akan menerima email konfirmasi.")
|
151 |
+
with st.form("register_form"):
|
152 |
+
r_email = st.text_input("Email", key="reg_email")
|
153 |
+
r_password = st.text_input("Password", type="password", key="reg_password")
|
154 |
+
r_password2 = st.text_input("Konfirmasi Password", type="password", key="reg_password2")
|
155 |
+
submitted_r = st.form_submit_button("Register")
|
156 |
+
if submitted_r:
|
157 |
+
if r_password != r_password2:
|
158 |
+
st.error("Password tidak sama.")
|
159 |
+
else:
|
160 |
+
try:
|
161 |
+
# Prefer explicit env, then generic site URL, then localhost for dev
|
162 |
+
redirect_url = os.getenv(
|
163 |
+
"SUPABASE_EMAIL_REDIRECT",
|
164 |
+
os.getenv("NEXT_PUBLIC_SITE_URL", "http://localhost:8501"),
|
165 |
+
)
|
166 |
+
supabase.auth.sign_up({
|
167 |
+
"email": r_email,
|
168 |
+
"password": r_password,
|
169 |
+
"options": {"email_redirect_to": redirect_url}
|
170 |
+
})
|
171 |
+
st.success("Registrasi berhasil. Silakan cek email untuk konfirmasi.")
|
172 |
+
except Exception as e:
|
173 |
+
st.error(f"Gagal registrasi: {e}")
|
174 |
+
|
175 |
+
with tab_forgot:
|
176 |
+
st.caption("Kirim tautan reset password ke email Anda.")
|
177 |
+
with st.form("forgot_form"):
|
178 |
+
f_email = st.text_input("Email", key="forgot_email")
|
179 |
+
submitted_f = st.form_submit_button("Kirim Link Reset")
|
180 |
+
if submitted_f:
|
181 |
+
try:
|
182 |
+
# Prefer explicit env, then generic site URL, then localhost for dev
|
183 |
+
redirect_url = os.getenv(
|
184 |
+
"SUPABASE_EMAIL_REDIRECT",
|
185 |
+
os.getenv("NEXT_PUBLIC_SITE_URL", "http://localhost:8501"),
|
186 |
+
)
|
187 |
+
supabase.auth.reset_password_for_email(f_email, {"redirect_to": redirect_url})
|
188 |
+
st.success("Email reset password telah dikirim. Periksa kotak masuk Anda.")
|
189 |
+
except Exception as e:
|
190 |
+
st.error(f"Gagal mengirim email reset password: {e}")
|
app/chat.py
CHANGED
@@ -12,6 +12,7 @@ import os
|
|
12 |
import glob
|
13 |
import time
|
14 |
from dotenv import load_dotenv
|
|
|
15 |
|
16 |
load_dotenv()
|
17 |
|
@@ -53,6 +54,207 @@ def initialize_session_state():
|
|
53 |
st.session_state['tts_output'] = ""
|
54 |
if 'tts_played' not in st.session_state:
|
55 |
st.session_state['tts_played'] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# edge-tts fallback (cadangan)
|
58 |
async def generate_audio_edge(text, path, voice="id-ID-GadisNeural"):
|
@@ -95,9 +297,28 @@ def text_to_speech(text):
|
|
95 |
return ""
|
96 |
|
97 |
def conversation_chat(query, chain, history):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
result = chain({"question": query, "chat_history": history})
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
def display_chat_history(chain):
|
103 |
reply_container = st.container()
|
@@ -113,7 +334,7 @@ def display_chat_history(chain):
|
|
113 |
help="Aktifkan/Nonaktifkan Text-to-Speech",
|
114 |
use_container_width=True):
|
115 |
st.session_state['should_speak'] = not st.session_state['should_speak']
|
116 |
-
st.
|
117 |
|
118 |
# Tombol Input Suara
|
119 |
with col3:
|
@@ -129,7 +350,7 @@ def display_chat_history(chain):
|
|
129 |
# Jika ada STT
|
130 |
if stt_text:
|
131 |
st.session_state.input_text = stt_text
|
132 |
-
st.
|
133 |
|
134 |
# Ambil input user
|
135 |
user_input = user_input_obj or st.session_state.get("input_text", "")
|
@@ -147,6 +368,16 @@ def display_chat_history(chain):
|
|
147 |
st.session_state['tts_output'] = output
|
148 |
st.session_state['tts_played'] = False
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
# Tampilkan Riwayat Chat
|
151 |
if st.session_state['generated']:
|
152 |
with reply_container:
|
|
|
12 |
import glob
|
13 |
import time
|
14 |
from dotenv import load_dotenv
|
15 |
+
from uuid import uuid4
|
16 |
|
17 |
load_dotenv()
|
18 |
|
|
|
54 |
st.session_state['tts_output'] = ""
|
55 |
if 'tts_played' not in st.session_state:
|
56 |
st.session_state['tts_played'] = True
|
57 |
+
if 'session_id' not in st.session_state:
|
58 |
+
st.session_state['session_id'] = str(uuid4())
|
59 |
+
|
60 |
+
def _current_user():
|
61 |
+
return st.session_state.get('user')
|
62 |
+
|
63 |
+
def save_message_to_supabase(user_id: str, session_id: str, role: str, content: str):
|
64 |
+
try:
|
65 |
+
supabase.table("chat_messages").insert({
|
66 |
+
"user_id": user_id,
|
67 |
+
"session_id": session_id,
|
68 |
+
"role": role,
|
69 |
+
"content": content,
|
70 |
+
}).execute()
|
71 |
+
except Exception as e:
|
72 |
+
# Non-fatal in UI; log to console
|
73 |
+
print(f"Gagal menyimpan pesan: {e}")
|
74 |
+
|
75 |
+
def load_history_from_supabase(user_id: str, session_id: str, limit: int = 100):
|
76 |
+
try:
|
77 |
+
res = (
|
78 |
+
supabase
|
79 |
+
.table("chat_messages")
|
80 |
+
.select("role, content, created_at")
|
81 |
+
.eq("user_id", user_id)
|
82 |
+
.eq("session_id", session_id)
|
83 |
+
.order("created_at", desc=False)
|
84 |
+
.limit(limit)
|
85 |
+
.execute()
|
86 |
+
)
|
87 |
+
rows = getattr(res, 'data', None) or []
|
88 |
+
past, generated, history = [], [], []
|
89 |
+
# Bootstrap greeting if empty
|
90 |
+
if not rows:
|
91 |
+
return past, generated, history
|
92 |
+
for r in rows:
|
93 |
+
if r["role"] == "user":
|
94 |
+
past.append(r["content"])
|
95 |
+
elif r["role"] == "assistant":
|
96 |
+
generated.append(r["content"])
|
97 |
+
# Build history pairs progressively when both sides exist
|
98 |
+
# Reconstruct history as list of (user, assistant) tuples
|
99 |
+
for i in range(min(len(past), len(generated))):
|
100 |
+
history.append((past[i], generated[i]))
|
101 |
+
return past, generated, history
|
102 |
+
except Exception as e:
|
103 |
+
print(f"Gagal memuat riwayat: {e}")
|
104 |
+
return [], [], []
|
105 |
+
|
106 |
+
def create_chat_session(user_id: str, title: str = None) -> str:
|
107 |
+
"""Create a new chat session for user and return session_id."""
|
108 |
+
try:
|
109 |
+
payload = {"user_id": user_id}
|
110 |
+
if title:
|
111 |
+
payload["title"] = title
|
112 |
+
res = (
|
113 |
+
supabase
|
114 |
+
.table("chat_sessions")
|
115 |
+
.insert(payload)
|
116 |
+
.execute()
|
117 |
+
)
|
118 |
+
data = getattr(res, 'data', None) or []
|
119 |
+
if isinstance(data, list) and data:
|
120 |
+
return data[0].get('id') or str(uuid4())
|
121 |
+
if isinstance(data, dict) and data.get('id'):
|
122 |
+
return data['id']
|
123 |
+
return str(uuid4())
|
124 |
+
except Exception as e:
|
125 |
+
print(f"Gagal membuat sesi: {e}")
|
126 |
+
return str(uuid4())
|
127 |
+
|
128 |
+
def list_chat_sessions(user_id: str, limit: int = 20):
|
129 |
+
"""List recent chat sessions for a user, newest first."""
|
130 |
+
try:
|
131 |
+
res = (
|
132 |
+
supabase
|
133 |
+
.table("chat_sessions")
|
134 |
+
.select("id, title, created_at")
|
135 |
+
.eq("user_id", user_id)
|
136 |
+
.order("created_at", desc=True)
|
137 |
+
.limit(limit)
|
138 |
+
.execute()
|
139 |
+
)
|
140 |
+
data = getattr(res, 'data', None) or []
|
141 |
+
return data
|
142 |
+
except Exception as e:
|
143 |
+
print(f"Gagal memuat daftar sesi: {e}")
|
144 |
+
return []
|
145 |
+
|
146 |
+
def delete_chat_session(user_id: str, session_id: str) -> bool:
|
147 |
+
"""Delete a single chat session and its messages for a user."""
|
148 |
+
try:
|
149 |
+
# Delete messages first (FK safety)
|
150 |
+
try:
|
151 |
+
supabase.table("chat_messages").delete().eq("user_id", user_id).eq("session_id", session_id).execute()
|
152 |
+
except Exception as e:
|
153 |
+
# If messages table missing/empty, continue
|
154 |
+
print(f"Info: hapus pesan sesi gagal/abaikan: {e}")
|
155 |
+
# Delete the session row
|
156 |
+
supabase.table("chat_sessions").delete().eq("user_id", user_id).eq("id", session_id).execute()
|
157 |
+
return True
|
158 |
+
except Exception as e:
|
159 |
+
print(f"Gagal menghapus sesi: {e}")
|
160 |
+
return False
|
161 |
+
|
162 |
+
def delete_all_chat_sessions(user_id: str) -> bool:
|
163 |
+
"""Delete all chat sessions and all messages for a user."""
|
164 |
+
try:
|
165 |
+
# Delete all messages for user
|
166 |
+
try:
|
167 |
+
supabase.table("chat_messages").delete().eq("user_id", user_id).execute()
|
168 |
+
except Exception as e:
|
169 |
+
print(f"Info: hapus semua pesan gagal/abaikan: {e}")
|
170 |
+
# Delete all sessions for user
|
171 |
+
supabase.table("chat_sessions").delete().eq("user_id", user_id).execute()
|
172 |
+
return True
|
173 |
+
except Exception as e:
|
174 |
+
print(f"Gagal menghapus semua sesi: {e}")
|
175 |
+
return False
|
176 |
+
|
177 |
+
def ensure_chat_session(user_id: str, session_id: str, title: str = None) -> str:
|
178 |
+
"""Ensure a chat session with given id exists; create if missing. Returns the session id."""
|
179 |
+
try:
|
180 |
+
# Check existence
|
181 |
+
chk = (
|
182 |
+
supabase
|
183 |
+
.table("chat_sessions")
|
184 |
+
.select("id")
|
185 |
+
.eq("id", session_id)
|
186 |
+
.limit(1)
|
187 |
+
.execute()
|
188 |
+
)
|
189 |
+
data = getattr(chk, 'data', None) or []
|
190 |
+
if (isinstance(data, list) and data) or (isinstance(data, dict) and data.get('id')):
|
191 |
+
return session_id
|
192 |
+
except Exception:
|
193 |
+
pass
|
194 |
+
# Create with explicit id
|
195 |
+
try:
|
196 |
+
payload = {"id": session_id, "user_id": user_id}
|
197 |
+
if title:
|
198 |
+
payload["title"] = title
|
199 |
+
ins = (
|
200 |
+
supabase
|
201 |
+
.table("chat_sessions")
|
202 |
+
.insert(payload)
|
203 |
+
.execute()
|
204 |
+
)
|
205 |
+
data = getattr(ins, 'data', None) or []
|
206 |
+
if isinstance(data, list) and data:
|
207 |
+
return data[0].get('id', session_id)
|
208 |
+
if isinstance(data, dict) and data.get('id'):
|
209 |
+
return data.get('id', session_id)
|
210 |
+
return session_id
|
211 |
+
except Exception as e:
|
212 |
+
print(f"Gagal memastikan sesi: {e}")
|
213 |
+
return session_id
|
214 |
+
|
215 |
+
def _generate_session_title_from_text(text: str, max_len: int = 60) -> str:
|
216 |
+
"""Generate a concise session title from the first user message."""
|
217 |
+
if not text:
|
218 |
+
return "Percakapan Baru"
|
219 |
+
# Normalize whitespace and strip
|
220 |
+
t = " ".join(text.strip().split())
|
221 |
+
# Remove surrounding quotes or trailing punctuation if too noisy
|
222 |
+
t = t.strip('"\'\u201c\u201d')
|
223 |
+
if len(t) > max_len:
|
224 |
+
t = t[:max_len - 1].rstrip() + "…"
|
225 |
+
return t or "Percakapan Baru"
|
226 |
+
|
227 |
+
def update_chat_session_title_if_empty(user_id: str, session_id: str, candidate_title: str) -> None:
|
228 |
+
"""If session has no title, set it to candidate_title."""
|
229 |
+
try:
|
230 |
+
chk = (
|
231 |
+
supabase
|
232 |
+
.table("chat_sessions")
|
233 |
+
.select("id, title")
|
234 |
+
.eq("id", session_id)
|
235 |
+
.eq("user_id", user_id)
|
236 |
+
.limit(1)
|
237 |
+
.execute()
|
238 |
+
)
|
239 |
+
rows = getattr(chk, 'data', None) or []
|
240 |
+
title_val = None
|
241 |
+
if isinstance(rows, list) and rows:
|
242 |
+
title_val = rows[0].get("title")
|
243 |
+
elif isinstance(rows, dict):
|
244 |
+
title_val = rows.get("title")
|
245 |
+
if not title_val:
|
246 |
+
safe_title = _generate_session_title_from_text(candidate_title)
|
247 |
+
(
|
248 |
+
supabase
|
249 |
+
.table("chat_sessions")
|
250 |
+
.update({"title": safe_title})
|
251 |
+
.eq("id", session_id)
|
252 |
+
.eq("user_id", user_id)
|
253 |
+
.execute()
|
254 |
+
)
|
255 |
+
except Exception as e:
|
256 |
+
# Non-fatal; just log
|
257 |
+
print(f"Gagal mengatur judul sesi: {e}")
|
258 |
|
259 |
# edge-tts fallback (cadangan)
|
260 |
async def generate_audio_edge(text, path, voice="id-ID-GadisNeural"):
|
|
|
297 |
return ""
|
298 |
|
299 |
def conversation_chat(query, chain, history):
|
300 |
+
# Save user message first
|
301 |
+
user = _current_user()
|
302 |
+
if user:
|
303 |
+
try:
|
304 |
+
save_message_to_supabase(user_id=user["id"], session_id=st.session_state.get('session_id'), role="user", content=query)
|
305 |
+
except Exception:
|
306 |
+
pass
|
307 |
+
# Try auto-name the session on first user message
|
308 |
+
try:
|
309 |
+
update_chat_session_title_if_empty(user_id=user["id"], session_id=st.session_state.get('session_id'), candidate_title=query)
|
310 |
+
except Exception:
|
311 |
+
pass
|
312 |
result = chain({"question": query, "chat_history": history})
|
313 |
+
answer = result["answer"]
|
314 |
+
history.append((query, answer))
|
315 |
+
# Save assistant reply
|
316 |
+
if user:
|
317 |
+
try:
|
318 |
+
save_message_to_supabase(user_id=user["id"], session_id=st.session_state.get('session_id'), role="assistant", content=answer)
|
319 |
+
except Exception:
|
320 |
+
pass
|
321 |
+
return answer
|
322 |
|
323 |
def display_chat_history(chain):
|
324 |
reply_container = st.container()
|
|
|
334 |
help="Aktifkan/Nonaktifkan Text-to-Speech",
|
335 |
use_container_width=True):
|
336 |
st.session_state['should_speak'] = not st.session_state['should_speak']
|
337 |
+
st.rerun()
|
338 |
|
339 |
# Tombol Input Suara
|
340 |
with col3:
|
|
|
350 |
# Jika ada STT
|
351 |
if stt_text:
|
352 |
st.session_state.input_text = stt_text
|
353 |
+
st.rerun()
|
354 |
|
355 |
# Ambil input user
|
356 |
user_input = user_input_obj or st.session_state.get("input_text", "")
|
|
|
368 |
st.session_state['tts_output'] = output
|
369 |
st.session_state['tts_played'] = False
|
370 |
|
371 |
+
# If user just logged in and no local history, try loading from DB
|
372 |
+
if not st.session_state['history']:
|
373 |
+
user = _current_user()
|
374 |
+
if user:
|
375 |
+
past, generated, history = load_history_from_supabase(user_id=user['id'], session_id=st.session_state.get('session_id'))
|
376 |
+
if past or generated:
|
377 |
+
st.session_state['past'] = past or st.session_state['past']
|
378 |
+
st.session_state['generated'] = generated or st.session_state['generated']
|
379 |
+
st.session_state['history'] = history or st.session_state['history']
|
380 |
+
|
381 |
# Tampilkan Riwayat Chat
|
382 |
if st.session_state['generated']:
|
383 |
with reply_container:
|
app/config.py
CHANGED
@@ -7,3 +7,6 @@ class Config:
|
|
7 |
SUPABASE_URL = os.getenv('SUPABASE_URL')
|
8 |
SUPABASE_KEY = os.getenv('SUPABASE_KEY')
|
9 |
REPLICATE_API_TOKEN = os.getenv('REPLICATE_API_TOKEN')
|
|
|
|
|
|
|
|
7 |
SUPABASE_URL = os.getenv('SUPABASE_URL')
|
8 |
SUPABASE_KEY = os.getenv('SUPABASE_KEY')
|
9 |
REPLICATE_API_TOKEN = os.getenv('REPLICATE_API_TOKEN')
|
10 |
+
# Storage/vector configs
|
11 |
+
BUCKET_NAME = os.getenv('BUCKET_NAME', 'pnp-bot-storage-archive')
|
12 |
+
VECTOR_STORE_PREFIX = os.getenv('VECTOR_STORE_PREFIX', 'vector_store')
|
app/data_loader.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import os
|
|
|
2 |
from app.db import supabase
|
3 |
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
|
4 |
|
|
|
5 |
def list_all_files(bucket_name, limit_per_page=1000):
|
6 |
all_files = []
|
7 |
offset = 0
|
|
|
1 |
import os
|
2 |
+
import streamlit as st
|
3 |
from app.db import supabase
|
4 |
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
|
5 |
|
6 |
+
@st.cache_data(ttl=60, show_spinner=False)
|
7 |
def list_all_files(bucket_name, limit_per_page=1000):
|
8 |
all_files = []
|
9 |
offset = 0
|
app/rag.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from langsmith import traceable
|
4 |
+
from langchain_community.llms import Replicate
|
5 |
+
from langchain.memory import ConversationBufferWindowMemory
|
6 |
+
from langchain.chains import ConversationalRetrievalChain
|
7 |
+
from langchain_community.document_transformers import LongContextReorder
|
8 |
+
from app.prompts import sahabat_prompt
|
9 |
+
|
10 |
+
|
11 |
+
def reorder_embedding(docs):
|
12 |
+
"""Reorder documents for long context retrieval."""
|
13 |
+
reordering = LongContextReorder()
|
14 |
+
return reordering.transform_documents(docs)
|
15 |
+
|
16 |
+
|
17 |
+
@traceable(name="Create RAG Conversational Chain")
|
18 |
+
def create_conversational_chain(vector_store):
|
19 |
+
"""Create a Conversational Retrieval Chain for RAG with tuned parameters for lower latency."""
|
20 |
+
llm = Replicate(
|
21 |
+
model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09",
|
22 |
+
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 3000}
|
23 |
+
)
|
24 |
+
memory = ConversationBufferWindowMemory(
|
25 |
+
k=4,
|
26 |
+
memory_key="chat_history",
|
27 |
+
return_messages=True,
|
28 |
+
output_key="answer"
|
29 |
+
)
|
30 |
+
chain = ConversationalRetrievalChain.from_llm(
|
31 |
+
llm,
|
32 |
+
retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 20}),
|
33 |
+
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
|
34 |
+
return_source_documents=True,
|
35 |
+
memory=memory,
|
36 |
+
)
|
37 |
+
return chain
|
38 |
+
|
39 |
+
|
40 |
+
def get_rag_chain(vector_store):
|
41 |
+
"""Return a Conversational Retrieval Chain for external use."""
|
42 |
+
return create_conversational_chain(vector_store)
|
app/session.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from app.db import supabase
|
3 |
+
from app.chat import list_chat_sessions, create_chat_session, ensure_chat_session
|
4 |
+
|
5 |
+
|
6 |
+
def restore_user_session_if_needed():
|
7 |
+
"""Try to restore Supabase user session into st.session_state['user'] if missing."""
|
8 |
+
if st.session_state.get("user"):
|
9 |
+
return
|
10 |
+
try:
|
11 |
+
current = supabase.auth.get_session()
|
12 |
+
if current and getattr(current, "user", None):
|
13 |
+
u = current.user
|
14 |
+
st.session_state["user"] = {"id": u.id, "email": getattr(u, "email", None)}
|
15 |
+
return
|
16 |
+
if st.session_state.get("sb_session"):
|
17 |
+
toks = st.session_state["sb_session"]
|
18 |
+
if toks.get("access_token") and toks.get("refresh_token"):
|
19 |
+
supabase.auth.set_session(toks["access_token"], toks["refresh_token"]) # may refresh
|
20 |
+
ures = supabase.auth.get_user()
|
21 |
+
user_obj = getattr(ures, "user", None)
|
22 |
+
if user_obj:
|
23 |
+
st.session_state["user"] = {"id": user_obj.id, "email": getattr(user_obj, "email", None)}
|
24 |
+
except Exception:
|
25 |
+
pass
|
26 |
+
|
27 |
+
|
28 |
+
def ensure_active_session(user_id: str):
|
29 |
+
"""Ensure st.session_state['session_id'] points to an existing session; create one if needed."""
|
30 |
+
sid = st.session_state.get("session_id")
|
31 |
+
if not sid:
|
32 |
+
sessions = list_chat_sessions(user_id, limit=1)
|
33 |
+
if sessions:
|
34 |
+
sid = sessions[0]["id"]
|
35 |
+
else:
|
36 |
+
sid = create_chat_session(user_id, title="Percakapan Baru")
|
37 |
+
st.session_state["session_id"] = sid
|
38 |
+
try:
|
39 |
+
ensure_chat_session(user_id) # make sure exists in DB if missing
|
40 |
+
except Exception:
|
41 |
+
pass
|
42 |
+
return st.session_state["session_id"]
|
app/ui.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from app.chat import (
|
3 |
+
create_chat_session,
|
4 |
+
list_chat_sessions,
|
5 |
+
ensure_chat_session,
|
6 |
+
delete_chat_session,
|
7 |
+
delete_all_chat_sessions,
|
8 |
+
)
|
9 |
+
from app.db import supabase
|
10 |
+
|
11 |
+
|
12 |
+
def render_sidebar_sessions():
|
13 |
+
"""Render the sidebar: sessions list/search/pagination, actions, and logout.
|
14 |
+
Assumes st.session_state['user'] exists.
|
15 |
+
"""
|
16 |
+
st.sidebar.header("Percakapan")
|
17 |
+
if st.sidebar.button("➕ New Chat", use_container_width=True):
|
18 |
+
uid = st.session_state["user"]["id"]
|
19 |
+
new_sid = create_chat_session(uid, title="Percakapan Baru")
|
20 |
+
st.session_state["session_id"] = new_sid
|
21 |
+
for k in ["history", "generated", "past", "input_text", "tts_output", "tts_played"]:
|
22 |
+
if k in st.session_state:
|
23 |
+
del st.session_state[k]
|
24 |
+
st.rerun()
|
25 |
+
|
26 |
+
# Search and pagination controls
|
27 |
+
search_query = st.sidebar.text_input("Cari sesi", value=st.session_state.get("session_search", ""))
|
28 |
+
st.session_state["session_search"] = search_query
|
29 |
+
|
30 |
+
page_size = st.sidebar.selectbox("Jumlah per halaman", [5, 10, 20, 50], index=1, key="session_page_size")
|
31 |
+
current_page = st.session_state.get("session_page", 0)
|
32 |
+
|
33 |
+
# Fetch sessions
|
34 |
+
try:
|
35 |
+
sessions = list_chat_sessions(st.session_state["user"]["id"], limit=1000)
|
36 |
+
except Exception as e:
|
37 |
+
st.sidebar.error(f"Tidak dapat memuat sesi: {e}")
|
38 |
+
sessions = []
|
39 |
+
|
40 |
+
# Filter by search
|
41 |
+
if search_query:
|
42 |
+
sq = search_query.lower()
|
43 |
+
sessions = [s for s in sessions if sq in (s.get("title") or "").lower() or sq in s.get("id", "").lower()]
|
44 |
+
|
45 |
+
# Pagination
|
46 |
+
total = len(sessions)
|
47 |
+
start = current_page * page_size
|
48 |
+
end = start + page_size
|
49 |
+
page_sessions = sessions[start:end]
|
50 |
+
|
51 |
+
cols = st.sidebar.columns([1, 3, 1])
|
52 |
+
disable_prev = current_page <= 0
|
53 |
+
disable_next = end >= total
|
54 |
+
if cols[0].button("←", disabled=disable_prev):
|
55 |
+
st.session_state["session_page"] = max(0, current_page - 1)
|
56 |
+
st.rerun()
|
57 |
+
cols[1].markdown(f"Halaman {current_page + 1} / {max(1, (total + page_size - 1) // page_size)}")
|
58 |
+
if cols[2].button("→", disabled=disable_next):
|
59 |
+
st.session_state["session_page"] = current_page + 1
|
60 |
+
st.rerun()
|
61 |
+
|
62 |
+
# List sessions with select and delete
|
63 |
+
for s in page_sessions:
|
64 |
+
sid = s["id"]
|
65 |
+
title = s.get("title") or sid
|
66 |
+
row = st.sidebar.container()
|
67 |
+
subcols = row.columns([6, 1])
|
68 |
+
if subcols[0].button(title, key=f"select_{sid}"):
|
69 |
+
st.session_state["session_id"] = sid
|
70 |
+
ensure_chat_session(st.session_state["user"]["id"]) # make sure exists
|
71 |
+
# Clear local caches if switching
|
72 |
+
for k in ["history", "generated", "past"]:
|
73 |
+
if k in st.session_state:
|
74 |
+
del st.session_state[k]
|
75 |
+
st.rerun()
|
76 |
+
if subcols[1].button("🗑️", key=f"del_{sid}"):
|
77 |
+
try:
|
78 |
+
ok = delete_chat_session(st.session_state["user"]["id"], sid)
|
79 |
+
if ok:
|
80 |
+
# If deleting active, choose next one or create new
|
81 |
+
if st.session_state.get("session_id") == sid:
|
82 |
+
remaining = [x for x in sessions if x["id"] != sid]
|
83 |
+
if remaining:
|
84 |
+
st.session_state["session_id"] = remaining[0]["id"]
|
85 |
+
else:
|
86 |
+
st.session_state["session_id"] = create_chat_session(st.session_state["user"]["id"], title="Percakapan Baru")
|
87 |
+
for k in ["history", "generated", "past"]:
|
88 |
+
if k in st.session_state:
|
89 |
+
del st.session_state[k]
|
90 |
+
st.success("Sesi dihapus")
|
91 |
+
st.rerun()
|
92 |
+
else:
|
93 |
+
st.error("Gagal menghapus sesi")
|
94 |
+
except Exception as e:
|
95 |
+
st.error(f"Gagal menghapus: {e}")
|
96 |
+
|
97 |
+
st.sidebar.divider()
|
98 |
+
with st.sidebar.expander("Hapus Semua Percakapan", expanded=False):
|
99 |
+
confirm = st.checkbox("Saya yakin ingin menghapus semua percakapan", key="confirm_delete_all")
|
100 |
+
if st.button("Hapus Semua", type="primary", disabled=not confirm):
|
101 |
+
try:
|
102 |
+
ok = delete_all_chat_sessions(st.session_state["user"]["id"])
|
103 |
+
if ok:
|
104 |
+
st.session_state["session_id"] = create_chat_session(st.session_state["user"]["id"], title="Percakapan Baru")
|
105 |
+
for k in ["history", "generated", "past"]:
|
106 |
+
if k in st.session_state:
|
107 |
+
del st.session_state[k]
|
108 |
+
st.success("Semua percakapan dihapus")
|
109 |
+
st.rerun()
|
110 |
+
else:
|
111 |
+
st.error("Gagal menghapus semua percakapan")
|
112 |
+
except Exception as e:
|
113 |
+
st.error(f"Gagal menghapus semua: {e}")
|
114 |
+
|
115 |
+
st.sidebar.divider()
|
116 |
+
if st.sidebar.button("Logout"):
|
117 |
+
try:
|
118 |
+
supabase.auth.sign_out()
|
119 |
+
except Exception:
|
120 |
+
pass
|
121 |
+
for k in [
|
122 |
+
"user", "history", "generated", "past", "input_text",
|
123 |
+
"tts_output", "tts_played", "vector_store", "session_id", "sb_session"
|
124 |
+
]:
|
125 |
+
if k in st.session_state:
|
126 |
+
del st.session_state[k]
|
127 |
+
st.success("Anda telah logout")
|
128 |
+
st.rerun()
|
app/vector_store.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
import streamlit as st
|
3 |
+
from typing import Optional
|
4 |
+
from app.data_loader import list_all_files
|
5 |
+
from app.db import supabase
|
6 |
+
from app.document_processor import load_vector_store_from_supabase
|
7 |
+
from app.config import Config
|
8 |
+
|
9 |
+
# Defaults for bucket/prefix if not present in Config
|
10 |
+
BUCKET_NAME = getattr(Config, "BUCKET_NAME", "pnp-bot-storage-archive")
|
11 |
+
VECTOR_STORE_PREFIX = getattr(Config, "VECTOR_STORE_PREFIX", "vector_store")
|
12 |
+
|
13 |
+
|
14 |
+
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
|
15 |
+
"""Get the latest timestamp from files in a Supabase storage bucket."""
|
16 |
+
files = list_all_files(bucket_name)
|
17 |
+
latest_time = 0.0
|
18 |
+
for file in files:
|
19 |
+
iso_time = file.get("updated_at") or file.get("created_at")
|
20 |
+
if iso_time:
|
21 |
+
try:
|
22 |
+
timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
|
23 |
+
latest_time = max(latest_time, timestamp)
|
24 |
+
except Exception as e:
|
25 |
+
print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
|
26 |
+
return latest_time
|
27 |
+
|
28 |
+
|
29 |
+
def get_supabase_vector_store_timestamp() -> Optional[str]:
|
30 |
+
"""Get the latest timestamp of vector store files in the Supabase storage."""
|
31 |
+
try:
|
32 |
+
response = supabase.storage.from_(BUCKET_NAME).list()
|
33 |
+
timestamps = []
|
34 |
+
for file in response:
|
35 |
+
if file["name"].startswith(VECTOR_STORE_PREFIX) and (
|
36 |
+
file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
|
37 |
+
):
|
38 |
+
timestamps.append(file["updated_at"])
|
39 |
+
if len(timestamps) >= 2:
|
40 |
+
return max(timestamps)
|
41 |
+
return None
|
42 |
+
except Exception as e:
|
43 |
+
print(f"Error getting Supabase timestamp: {e}")
|
44 |
+
return None
|
45 |
+
|
46 |
+
|
47 |
+
def vector_store_is_outdated() -> bool:
|
48 |
+
"""Check if vector store needs to be updated based on files in Supabase storage."""
|
49 |
+
supabase_timestamp = get_supabase_vector_store_timestamp()
|
50 |
+
if supabase_timestamp is None:
|
51 |
+
return True
|
52 |
+
supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
|
53 |
+
data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
|
54 |
+
|
55 |
+
return data_time > supabase_time
|
56 |
+
|
57 |
+
|
58 |
+
@st.cache_resource(show_spinner=False)
|
59 |
+
def get_cached_vector_store():
|
60 |
+
"""Cache vector store loading to avoid repeated downloads/deserialization on reruns."""
|
61 |
+
return load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
|
app/workflows.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from app.data_loader import get_data, load_docs
|
3 |
+
from app.document_processor import process_documents, save_vector_store_to_supabase
|
4 |
+
from app.db import supabase
|
5 |
+
from app.config import Config
|
6 |
+
import app.vector_store as vs
|
7 |
+
import app.rag as rag
|
8 |
+
|
9 |
+
BUCKET_NAME = Config.BUCKET_NAME
|
10 |
+
VECTOR_STORE_PREFIX = Config.VECTOR_STORE_PREFIX
|
11 |
+
|
12 |
+
|
13 |
+
def prepare_vector_store_if_needed(history_len: int):
|
14 |
+
"""
|
15 |
+
Orchestrates vector store availability.
|
16 |
+
- If no local history and vector store is outdated: sync data, process, and upload.
|
17 |
+
- Else: load cached vector store from Supabase.
|
18 |
+
Returns a vector_store or None.
|
19 |
+
"""
|
20 |
+
vector_store = vs.get_cached_vector_store()
|
21 |
+
|
22 |
+
if history_len == 0:
|
23 |
+
if vs.vector_store_is_outdated():
|
24 |
+
with st.spinner("Memuat dan memproses dokumen..."):
|
25 |
+
get_data()
|
26 |
+
docs = load_docs()
|
27 |
+
if len(docs) > 0:
|
28 |
+
reordered_docs = rag.reorder_embedding(docs)
|
29 |
+
vector_store = process_documents(reordered_docs)
|
30 |
+
with st.spinner("Mengunggah vector store ke Supabase..."):
|
31 |
+
success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
|
32 |
+
if success:
|
33 |
+
print("Vector store berhasil diunggah ke Supabase!")
|
34 |
+
else:
|
35 |
+
print("Gagal mengunggah vector store ke Supabase.")
|
36 |
+
else:
|
37 |
+
print("Folder 'data/' kosong. Chatbot tetap bisa digunakan, tetapi tanpa konteks dokumen.")
|
38 |
+
vector_store = None
|
39 |
+
else:
|
40 |
+
with st.spinner("Memuat vector store dari Supabase..."):
|
41 |
+
vector_store = vs.get_cached_vector_store()
|
42 |
+
if vector_store:
|
43 |
+
print("Vector store berhasil dimuat dari Supabase!")
|
44 |
+
else:
|
45 |
+
print("Gagal memuat vector store dari Supabase.")
|
46 |
+
else:
|
47 |
+
vector_store = st.session_state.get("vector_store") or vs.get_cached_vector_store()
|
48 |
+
|
49 |
+
return vector_store
|
assets/favicon.ico
ADDED
|
assets/pnp-logo.png
ADDED
![]() |