Spaces:
Running
Running
FauziIsyrinApridal
commited on
Commit
·
111ed77
1
Parent(s):
542a0cb
bakc to
Browse files- app.py +13 -1
- app/document_processor.py +4 -5
app.py
CHANGED
@@ -17,10 +17,15 @@ from langchain_community.document_transformers import LongContextReorder
|
|
17 |
|
18 |
load_dotenv()
|
19 |
|
20 |
-
|
|
|
|
|
21 |
BUCKET_NAME = "pnp-bot-storage-archive"
|
22 |
VECTOR_STORE_PREFIX = "vector_store"
|
23 |
|
|
|
|
|
|
|
24 |
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
|
25 |
"""Get the latest timestamp from files in a Supabase storage bucket."""
|
26 |
files = list_all_files(bucket_name)
|
@@ -71,6 +76,9 @@ def reorder_embedding(docs):
|
|
71 |
return reordering.transform_documents(docs)
|
72 |
|
73 |
|
|
|
|
|
|
|
74 |
@traceable(name="Create RAG Conversational Chain")
|
75 |
def create_conversational_chain(vector_store):
|
76 |
"""Create a Conversational Retrieval Chain for RAG."""
|
@@ -93,6 +101,10 @@ def get_rag_chain(vector_store):
|
|
93 |
"""Return a Conversational Retrieval Chain for external use."""
|
94 |
return create_conversational_chain(vector_store)
|
95 |
|
|
|
|
|
|
|
|
|
96 |
@traceable(name="Main Chatbot RAG App")
|
97 |
def main():
|
98 |
initialize_session_state()
|
|
|
17 |
|
18 |
load_dotenv()
|
19 |
|
20 |
+
# ---------------------------------------------------------
|
21 |
+
# ⚡️ CONFIG
|
22 |
+
# ---------------------------------------------------------
|
23 |
BUCKET_NAME = "pnp-bot-storage-archive"
|
24 |
VECTOR_STORE_PREFIX = "vector_store"
|
25 |
|
26 |
+
# ---------------------------------------------------------
|
27 |
+
# ⚡️ UTILITY
|
28 |
+
# ---------------------------------------------------------
|
29 |
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
|
30 |
"""Get the latest timestamp from files in a Supabase storage bucket."""
|
31 |
files = list_all_files(bucket_name)
|
|
|
76 |
return reordering.transform_documents(docs)
|
77 |
|
78 |
|
79 |
+
# ---------------------------------------------------------
|
80 |
+
# ⚡️ RAG CHAIN
|
81 |
+
# ---------------------------------------------------------
|
82 |
@traceable(name="Create RAG Conversational Chain")
|
83 |
def create_conversational_chain(vector_store):
|
84 |
"""Create a Conversational Retrieval Chain for RAG."""
|
|
|
101 |
"""Return a Conversational Retrieval Chain for external use."""
|
102 |
return create_conversational_chain(vector_store)
|
103 |
|
104 |
+
|
105 |
+
# ---------------------------------------------------------
|
106 |
+
# ⚡️ MAIN FUNCTION
|
107 |
+
# ---------------------------------------------------------
|
108 |
@traceable(name="Main Chatbot RAG App")
|
109 |
def main():
|
110 |
initialize_session_state()
|
app/document_processor.py
CHANGED
@@ -103,12 +103,11 @@ def process_documents(docs):
|
|
103 |
encode_kwargs={"normalize_embeddings": True}
|
104 |
)
|
105 |
|
106 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
)
|
111 |
-
|
112 |
text_chunks = text_splitter.split_documents(docs)
|
113 |
vector_store = FAISS.from_documents(text_chunks, embeddings)
|
114 |
|
|
|
103 |
encode_kwargs={"normalize_embeddings": True}
|
104 |
)
|
105 |
|
106 |
+
text_splitter = RecursiveCharacterTextSplitter.from_language("id")(
|
107 |
+
chunk_size=1500,
|
108 |
+
chunk_overlap=300
|
109 |
+
|
110 |
)
|
|
|
111 |
text_chunks = text_splitter.split_documents(docs)
|
112 |
vector_store = FAISS.from_documents(text_chunks, embeddings)
|
113 |
|