pnp-chatbot-v1 / app.py
FauziIsyrinApridal
Initial commit without binary files
ea1ba01
raw
history blame
2.76 kB
import streamlit as st
import os
from dotenv import load_dotenv
from langsmith import traceable
from app.chat import initialize_session_state, display_chat_history
from app.data_loader import get_data, load_docs
from app.document_processor import process_documents, save_vector_store, load_vector_store
from app.prompts import sahabat_prompt
from langchain_community.llms import Replicate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_transformers import LongContextReorder
load_dotenv()
VECTOR_STORE_PATH = "vector_store_data"
DATA_DIR = "data"
@traceable(name="Create RAG Conversational Chain")
def create_conversational_chain(vector_store):
llm = Replicate(
model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216",
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000}
)
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True,
output_key='answer'
)
chain = ConversationalRetrievalChain.from_llm(
llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
return_source_documents=True,
memory=memory
)
return chain
def reorder_embedding(docs):
reordering = LongContextReorder()
return reordering.transform_documents(docs)
def get_latest_data_timestamp(folder):
latest_time = 0
for root, _, files in os.walk(folder):
for file in files:
path = os.path.join(root, file)
file_time = os.path.getmtime(path)
latest_time = max(latest_time, file_time)
return latest_time
def vector_store_is_outdated():
if not os.path.exists(VECTOR_STORE_PATH):
return True
vector_store_time = os.path.getmtime(VECTOR_STORE_PATH)
data_time = get_latest_data_timestamp(DATA_DIR)
return data_time > vector_store_time
@traceable(name="Main Chatbot RAG App")
def main():
initialize_session_state()
get_data()
if len(st.session_state['history']) == 0:
if vector_store_is_outdated():
docs = load_docs()
reordered_docs = reorder_embedding(docs)
vector_store = process_documents(reordered_docs)
save_vector_store(vector_store)
else:
vector_store = load_vector_store()
st.session_state['vector_store'] = vector_store
if st.session_state['vector_store'] is not None:
chain = create_conversational_chain(st.session_state['vector_store'])
display_chat_history(chain)
if __name__ == "__main__":
main()