File size: 4,234 Bytes
3e0b264 14c46c2 f98ed18 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 3e0b264 14c46c2 0f8eb9f 14c46c2 3e0b264 14c46c2 0f8eb9f 14c46c2 ebdb41f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import shutil
from huggingface_hub import hf_hub_download, Repository
from langchain_community.vectorstores.faiss import FAISS
import streamlit as st
class HFVectorDB:
def __init__(self, hf_repo_id, hf_token, local_index_dir="/tmp/vector_index", embedding_model=None):
self.hf_repo_id = hf_repo_id
self.hf_token = hf_token
self.local_index_dir = local_index_dir
self.embedding_model = embedding_model
os.makedirs(self.local_index_dir, exist_ok=True)
self.index = None
# Download index files from HF repo (if exist)
self._download_index_files()
self._load_index()
def _download_index_files(self):
try:
faiss_path = hf_hub_download(repo_id=self.hf_repo_id, filename="index.faiss", use_auth_token=self.hf_token)
pkl_path = hf_hub_download(repo_id=self.hf_repo_id, filename="index.pkl", use_auth_token=self.hf_token)
shutil.copy(faiss_path, os.path.join(self.local_index_dir, "index.faiss"))
shutil.copy(pkl_path, os.path.join(self.local_index_dir, "index.pkl"))
st.write("✅ Downloaded FAISS index files from HF repo")
except Exception as e:
st.write(f"⚠️ Could not download FAISS index files: {e}")
def _load_index(self):
try:
self.index = FAISS.load_local(self.local_index_dir, self.embedding_model)
st.write("✅ Loaded FAISS index from local")
except Exception:
st.write("ℹ️ No local FAISS index found, starting with empty index")
self.index = None
def save_index(self):
if self.index is not None:
self.index.save_local(self.local_index_dir)
st.write("✅ Saved FAISS index locally")
self._upload_index_files()
else:
st.write("⚠️ No FAISS index to save")
def _upload_index_files(self):
repo_local_path = "/tmp/hf_dataset_repo_clone"
if os.path.exists(repo_local_path):
shutil.rmtree(repo_local_path)
repo = Repository(local_dir=repo_local_path, clone_from=self.hf_repo_id, use_auth_token=self.hf_token)
shutil.copy(os.path.join(self.local_index_dir, "index.faiss"), os.path.join(repo_local_path, "index.faiss"))
shutil.copy(os.path.join(self.local_index_dir, "index.pkl"), os.path.join(repo_local_path, "index.pkl"))
repo.git_add(auto_lfs_track=True)
repo.git_commit("Update FAISS index files")
repo.git_push()
st.write("✅ Uploaded FAISS index files to HF repo")
def add_documents(self, docs):
if self.index is None:
self.index = FAISS.from_documents(docs, self.embedding_model)
st.write("✅ Created new FAISS index")
else:
self.index.add_documents(docs)
st.write("✅ Added documents to FAISS index")
self.save_index()
def similarity_search(self, query, k=5):
if self.index is None:
st.write("⚠️ No index found, returning empty results")
return []
return self.index.similarity_search(query, k=k)
def save_top_picks_json(top_picks, date, path="top_picks.jsonl"):
record = {
"date": date.isoformat(),
"top_picks": top_picks
}
with open(path, "a") as f:
f.write(json.dumps(record) + "\n")
st.write(f"✅ Saved top picks to {path}")
def add_top_picks_to_vector_db(vector_db, top_picks, date):
docs = []
for pick in top_picks:
content = (
f"{pick['company']} ({pick['ticker']}):\n"
f"Sentiment: {pick['sentiment']}\n"
f"Critical News: {pick['critical_news']}\n"
f"Impact: {pick['impact_summary']}\n"
f"Action: {pick['action']}\n"
f"Reason: {pick['reason']}"
)
metadata = {
"ticker": pick["ticker"],
"company": pick["company"],
"sentiment": pick["sentiment"],
"action": pick["action"],
"date": date.isoformat()
}
docs.append(Document(page_content=content, metadata=metadata))
vector_db.add_documents(docs)
st.write("✅ Added top picks to vector DB")
|