Spaces:
Running
Running
| import streamlit as st | |
| import os | |
| from dotenv import load_dotenv | |
| from langsmith import traceable | |
| from app.chat import initialize_session_state, display_chat_history | |
| from app.data_loader import get_data, load_docs | |
| from app.document_processor import process_documents, save_vector_store, load_vector_store | |
| from app.prompts import sahabat_prompt | |
| from langchain_community.llms import Replicate | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain_community.document_transformers import LongContextReorder | |
| load_dotenv() | |
| VECTOR_STORE_PATH = "vector_store_data" | |
| DATA_DIR = "data" | |
| def create_conversational_chain(vector_store): | |
| llm = Replicate( | |
| model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216", | |
| model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000} | |
| ) | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| return_messages=True, | |
| output_key='answer' | |
| ) | |
| chain = ConversationalRetrievalChain.from_llm( | |
| llm, | |
| retriever=vector_store.as_retriever(search_kwargs={"k": 6}), | |
| combine_docs_chain_kwargs={"prompt": sahabat_prompt}, | |
| return_source_documents=True, | |
| memory=memory | |
| ) | |
| return chain | |
| def reorder_embedding(docs): | |
| reordering = LongContextReorder() | |
| return reordering.transform_documents(docs) | |
| def get_latest_data_timestamp(folder): | |
| latest_time = 0 | |
| for root, _, files in os.walk(folder): | |
| for file in files: | |
| path = os.path.join(root, file) | |
| file_time = os.path.getmtime(path) | |
| latest_time = max(latest_time, file_time) | |
| return latest_time | |
| def vector_store_is_outdated(): | |
| if not os.path.exists(VECTOR_STORE_PATH): | |
| return True | |
| vector_store_time = os.path.getmtime(VECTOR_STORE_PATH) | |
| data_time = get_latest_data_timestamp(DATA_DIR) | |
| return data_time > vector_store_time | |
| def main(): | |
| initialize_session_state() | |
| get_data() | |
| if len(st.session_state['history']) == 0: | |
| if vector_store_is_outdated(): | |
| docs = load_docs() | |
| if len(docs) > 0: | |
| reordered_docs = reorder_embedding(docs) | |
| vector_store = process_documents(reordered_docs) | |
| save_vector_store(vector_store) | |
| else: | |
| st.warning("Tidak ada dokumen ditemukan di folder 'data/'. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.") | |
| vector_store = None | |
| else: | |
| vector_store = load_vector_store() | |
| st.session_state['vector_store'] = vector_store | |
| if st.session_state['vector_store'] is not None: | |
| chain = create_conversational_chain(st.session_state['vector_store']) | |
| display_chat_history(chain) | |
| if __name__ == "__main__": | |
| main() | |