Spaces:

Yozora721
/

pnp-chatbot-v1

Running

FauziIsyrinApridal commited on 21 days ago

Commit

111ed77

1 Parent(s): 542a0cb

bakc to

Files changed (2) hide show

app.py CHANGED Viewed

@@ -17,10 +17,15 @@ from langchain_community.document_transformers import LongContextReorder
 load_dotenv()
 BUCKET_NAME = "pnp-bot-storage-archive"
 VECTOR_STORE_PREFIX = "vector_store"
 def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
     """Get the latest timestamp from files in a Supabase storage bucket."""
     files = list_all_files(bucket_name)
@@ -71,6 +76,9 @@ def reorder_embedding(docs):
     return reordering.transform_documents(docs)
 @traceable(name="Create RAG Conversational Chain")
 def create_conversational_chain(vector_store):
     """Create a Conversational Retrieval Chain for RAG."""
@@ -93,6 +101,10 @@ def get_rag_chain(vector_store):
     """Return a Conversational Retrieval Chain for external use."""
     return create_conversational_chain(vector_store)
 @traceable(name="Main Chatbot RAG App")
 def main():
     initialize_session_state()

 load_dotenv()
+# ---------------------------------------------------------
+# ⚡️ CONFIG
+# ---------------------------------------------------------
 BUCKET_NAME = "pnp-bot-storage-archive"
 VECTOR_STORE_PREFIX = "vector_store"
+# ---------------------------------------------------------
+# ⚡️ UTILITY
+# ---------------------------------------------------------
 def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
     """Get the latest timestamp from files in a Supabase storage bucket."""
     files = list_all_files(bucket_name)
     return reordering.transform_documents(docs)
+# ---------------------------------------------------------
+# ⚡️ RAG CHAIN
+# ---------------------------------------------------------
 @traceable(name="Create RAG Conversational Chain")
 def create_conversational_chain(vector_store):
     """Create a Conversational Retrieval Chain for RAG."""
     """Return a Conversational Retrieval Chain for external use."""
     return create_conversational_chain(vector_store)
+# ---------------------------------------------------------
+# ⚡️ MAIN FUNCTION
+# ---------------------------------------------------------
 @traceable(name="Main Chatbot RAG App")
 def main():
     initialize_session_state()

app/document_processor.py CHANGED Viewed

@@ -103,12 +103,11 @@ def process_documents(docs):
         encode_kwargs={"normalize_embeddings": True}
     )
-    text_splitter = RecursiveCharacterTextSplitter(
-    chunk_size=1500,
-    chunk_overlap=300,
-    separators=["\n\n", "\n", ".", " ", ""]
     )
     text_chunks = text_splitter.split_documents(docs)
     vector_store = FAISS.from_documents(text_chunks, embeddings)

         encode_kwargs={"normalize_embeddings": True}
     )
+    text_splitter = RecursiveCharacterTextSplitter.from_language("id")(
+        chunk_size=1500,
+        chunk_overlap=300
     )
     text_chunks = text_splitter.split_documents(docs)
     vector_store = FAISS.from_documents(text_chunks, embeddings)