Spaces:

Yozora721
/

pnp-chatbot-v1

Sleeping

App Files Files Community

FauziIsyrinApridal commited on Jul 30

Commit

542a0cb

1 Parent(s): 4d1c64a

..

Browse files

Files changed (2) hide show

app.py +49 -2
app/document_processor.py +2 -45

app.py CHANGED Viewed

@@ -2,9 +2,12 @@ import streamlit as st
 import os
 from dotenv import load_dotenv
 from langsmith import traceable
 from app.chat import initialize_session_state, display_chat_history
-from app.data_loader import get_data, load_docs
-from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase, get_latest_data_timestamp_from_files,get_supabase_vector_store_timestamp, vector_store_is_outdated
 from app.prompts import sahabat_prompt
 from app.db import supabase
 from langchain_community.llms import Replicate
@@ -14,9 +17,53 @@ from langchain_community.document_transformers import LongContextReorder
 load_dotenv()
 BUCKET_NAME = "pnp-bot-storage-archive"
 VECTOR_STORE_PREFIX = "vector_store"
 def reorder_embedding(docs):
     """Reorder documents for long context retrieval."""

 import os
 from dotenv import load_dotenv
 from langsmith import traceable
+from datetime import datetime
+from typing import List, Dict, Optional
 from app.chat import initialize_session_state, display_chat_history
+from app.data_loader import get_data, list_all_files, load_docs
+from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
 from app.prompts import sahabat_prompt
 from app.db import supabase
 from langchain_community.llms import Replicate
 load_dotenv()
 BUCKET_NAME = "pnp-bot-storage-archive"
 VECTOR_STORE_PREFIX = "vector_store"
+def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
+    """Get the latest timestamp from files in a Supabase storage bucket."""
+    files = list_all_files(bucket_name)
+    latest_time = 0.0
+    for file in files:
+        iso_time = file.get("updated_at") or file.get("created_at")
+        if iso_time:
+            try:
+                timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
+                latest_time = max(latest_time, timestamp)
+            except Exception as e:
+                print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
+    return latest_time
+def get_supabase_vector_store_timestamp() -> Optional[str]:
+    """Get the latest timestamp of vector store files in the Supabase storage."""
+    try:
+        response = supabase.storage.from_(BUCKET_NAME).list()
+        timestamps = []
+        for file in response:
+            if file["name"].startswith(VECTOR_STORE_PREFIX) and (
+                file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
+            ):
+                timestamps.append(file["updated_at"])
+        if len(timestamps) >= 2:
+            return max(timestamps)
+        return None
+    except Exception as e:
+        print(f"Error getting Supabase timestamp: {e}")
+        return None
+def vector_store_is_outdated() -> bool:
+    """Check if vector store needs to be updated based on files in Supabase storage."""
+    supabase_timestamp = get_supabase_vector_store_timestamp()
+    if supabase_timestamp is None:
+        return True
+    supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
+    data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
+    return data_time > supabase_time
 def reorder_embedding(docs):
     """Reorder documents for long context retrieval."""

app/document_processor.py CHANGED Viewed

@@ -4,8 +4,7 @@ from langchain_community.vectorstores import FAISS
 import os
 import tempfile
 import streamlit as st
-from datetime import datetime
-from data_loader import list_all_files
 def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
@@ -113,46 +112,4 @@ def process_documents(docs):
     text_chunks = text_splitter.split_documents(docs)
     vector_store = FAISS.from_documents(text_chunks, embeddings)
-    return vector_store
-def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
-    """Get the latest timestamp from files in a Supabase storage bucket."""
-    files = list_all_files(bucket_name)
-    latest_time = 0.0
-    for file in files:
-        iso_time = file.get("updated_at") or file.get("created_at")
-        if iso_time:
-            try:
-                timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
-                latest_time = max(latest_time, timestamp)
-            except Exception as e:
-                print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
-    return latest_time
-def get_supabase_vector_store_timestamp() -> Optional[str]:
-    """Get the latest timestamp of vector store files in the Supabase storage."""
-    try:
-        response = supabase.storage.from_(BUCKET_NAME).list()
-        timestamps = []
-        for file in response:
-            if file["name"].startswith(VECTOR_STORE_PREFIX) and (
-                file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
-            ):
-                timestamps.append(file["updated_at"])
-        if len(timestamps) >= 2:
-            return max(timestamps)
-        return None
-    except Exception as e:
-        print(f"Error getting Supabase timestamp: {e}")
-        return None
-def vector_store_is_outdated() -> bool:
-    """Check if vector store needs to be updated based on files in Supabase storage."""
-    supabase_timestamp = get_supabase_vector_store_timestamp()
-    if supabase_timestamp is None:
-        return True
-    supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
-    data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
-    return data_time > supabase_time

 import os
 import tempfile
 import streamlit as st
+from langchain.schema import Document
 def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
     text_chunks = text_splitter.split_documents(docs)
     vector_store = FAISS.from_documents(text_chunks, embeddings)
+    return vector_store