Spaces:
Sleeping
Sleeping
| from datetime import datetime | |
| import streamlit as st | |
| from typing import Optional | |
| from app.data_loader import list_all_files | |
| from app.db import supabase | |
| from app.document_processor import load_vector_store_from_supabase | |
| from app.config import Config | |
| # Defaults for bucket/prefix if not present in Config | |
| BUCKET_NAME = getattr(Config, "BUCKET_NAME", "pnp-bot-storage-archive") | |
| VECTOR_STORE_PREFIX = getattr(Config, "VECTOR_STORE_PREFIX", "vector_store") | |
| def get_latest_data_timestamp_from_files(bucket_name: str) -> float: | |
| """Get the latest timestamp from files in a Supabase storage bucket.""" | |
| files = list_all_files(bucket_name) | |
| latest_time = 0.0 | |
| for file in files: | |
| iso_time = file.get("updated_at") or file.get("created_at") | |
| if iso_time: | |
| try: | |
| timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp() | |
| latest_time = max(latest_time, timestamp) | |
| except Exception as e: | |
| print(f"Gagal parsing waktu dari {file.get('name')}: {e}") | |
| return latest_time | |
| def get_supabase_vector_store_timestamp() -> Optional[str]: | |
| """Get the latest timestamp of vector store files in the Supabase storage.""" | |
| try: | |
| response = supabase.storage.from_(BUCKET_NAME).list() | |
| timestamps = [] | |
| for file in response: | |
| if file["name"].startswith(VECTOR_STORE_PREFIX) and ( | |
| file["name"].endswith(".faiss") or file["name"].endswith(".pkl") | |
| ): | |
| timestamps.append(file["updated_at"]) | |
| if len(timestamps) >= 2: | |
| return max(timestamps) | |
| return None | |
| except Exception as e: | |
| print(f"Error getting Supabase timestamp: {e}") | |
| return None | |
| def vector_store_is_outdated() -> bool: | |
| """Check if vector store needs to be updated based on files in Supabase storage.""" | |
| supabase_timestamp = get_supabase_vector_store_timestamp() | |
| if supabase_timestamp is None: | |
| return True | |
| supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp() | |
| data_time = get_latest_data_timestamp_from_files("pnp-bot-storage") | |
| return data_time > supabase_time | |
| def get_cached_vector_store(): | |
| """Cache vector store loading to avoid repeated downloads/deserialization on reruns.""" | |
| return load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) | |