FauziIsyrinApridal commited on
Commit
111ed77
·
1 Parent(s): 542a0cb
Files changed (2) hide show
  1. app.py +13 -1
  2. app/document_processor.py +4 -5
app.py CHANGED
@@ -17,10 +17,15 @@ from langchain_community.document_transformers import LongContextReorder
17
 
18
  load_dotenv()
19
 
20
-
 
 
21
  BUCKET_NAME = "pnp-bot-storage-archive"
22
  VECTOR_STORE_PREFIX = "vector_store"
23
 
 
 
 
24
  def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
25
  """Get the latest timestamp from files in a Supabase storage bucket."""
26
  files = list_all_files(bucket_name)
@@ -71,6 +76,9 @@ def reorder_embedding(docs):
71
  return reordering.transform_documents(docs)
72
 
73
 
 
 
 
74
  @traceable(name="Create RAG Conversational Chain")
75
  def create_conversational_chain(vector_store):
76
  """Create a Conversational Retrieval Chain for RAG."""
@@ -93,6 +101,10 @@ def get_rag_chain(vector_store):
93
  """Return a Conversational Retrieval Chain for external use."""
94
  return create_conversational_chain(vector_store)
95
 
 
 
 
 
96
  @traceable(name="Main Chatbot RAG App")
97
  def main():
98
  initialize_session_state()
 
17
 
18
  load_dotenv()
19
 
20
+ # ---------------------------------------------------------
21
+ # ⚡️ CONFIG
22
+ # ---------------------------------------------------------
23
  BUCKET_NAME = "pnp-bot-storage-archive"
24
  VECTOR_STORE_PREFIX = "vector_store"
25
 
26
+ # ---------------------------------------------------------
27
+ # ⚡️ UTILITY
28
+ # ---------------------------------------------------------
29
  def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
30
  """Get the latest timestamp from files in a Supabase storage bucket."""
31
  files = list_all_files(bucket_name)
 
76
  return reordering.transform_documents(docs)
77
 
78
 
79
+ # ---------------------------------------------------------
80
+ # ⚡️ RAG CHAIN
81
+ # ---------------------------------------------------------
82
  @traceable(name="Create RAG Conversational Chain")
83
  def create_conversational_chain(vector_store):
84
  """Create a Conversational Retrieval Chain for RAG."""
 
101
  """Return a Conversational Retrieval Chain for external use."""
102
  return create_conversational_chain(vector_store)
103
 
104
+
105
+ # ---------------------------------------------------------
106
+ # ⚡️ MAIN FUNCTION
107
+ # ---------------------------------------------------------
108
  @traceable(name="Main Chatbot RAG App")
109
  def main():
110
  initialize_session_state()
app/document_processor.py CHANGED
@@ -103,12 +103,11 @@ def process_documents(docs):
103
  encode_kwargs={"normalize_embeddings": True}
104
  )
105
 
106
- text_splitter = RecursiveCharacterTextSplitter(
107
- chunk_size=1500,
108
- chunk_overlap=300,
109
- separators=["\n\n", "\n", ".", " ", ""]
110
  )
111
-
112
  text_chunks = text_splitter.split_documents(docs)
113
  vector_store = FAISS.from_documents(text_chunks, embeddings)
114
 
 
103
  encode_kwargs={"normalize_embeddings": True}
104
  )
105
 
106
+ text_splitter = RecursiveCharacterTextSplitter.from_language("id")(
107
+ chunk_size=1500,
108
+ chunk_overlap=300
109
+
110
  )
 
111
  text_chunks = text_splitter.split_documents(docs)
112
  vector_store = FAISS.from_documents(text_chunks, embeddings)
113