|
import os |
|
import json |
|
from typing import List, Optional |
|
from datetime import datetime |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.schema import Document |
|
|
|
class StockVectorDB: |
|
def __init__(self, index_path: str = "vector_index", log_path: str = "vector_db_log.json", embedding_model: Optional[str] = None): |
|
self.index_path = index_path |
|
self.log_path = log_path |
|
model = embedding_model or "sentence-transformers/all-MiniLM-L6-v2" |
|
self.embedding_model = HuggingFaceEmbeddings(model_name=model) |
|
|
|
if os.path.exists(index_path): |
|
print(f"π Loading existing FAISS index from '{index_path}'") |
|
self.index = FAISS.load_local(index_path, self.embedding_model) |
|
else: |
|
print(f"π Creating new FAISS index at '{index_path}'") |
|
self.index = FAISS.from_documents([], self.embedding_model) |
|
|
|
|
|
if os.path.exists(self.log_path): |
|
with open(self.log_path, "r") as f: |
|
self.log_data = json.load(f) |
|
else: |
|
self.log_data = [] |
|
|
|
def store_top_picks(self, top_picks: List[dict], date: Optional[datetime] = None): |
|
date = date or datetime.now() |
|
formatted_date = date.strftime("%Y-%m-%d") |
|
docs = [] |
|
|
|
for stock in top_picks: |
|
content = f"{stock['ticker']} {stock['company']} is {stock['sentiment']} due to: {stock['critical_news']}. Impact: {stock['impact_summary']}. Action: {stock['action']}. Reason: {stock['reason']}" |
|
metadata = { |
|
"date": formatted_date, |
|
"ticker": stock["ticker"], |
|
"company": stock["company"], |
|
"sentiment": stock["sentiment"], |
|
"action": stock["action"] |
|
} |
|
docs.append(Document(page_content=content, metadata=metadata)) |
|
|
|
|
|
self.log_data.append({ |
|
"ticker": stock["ticker"], |
|
"company": stock["company"], |
|
"date": formatted_date, |
|
"sentiment": stock["sentiment"], |
|
"action": stock["action"], |
|
"reason": stock["reason"] |
|
}) |
|
|
|
if docs: |
|
self.index.add_documents(docs) |
|
self.save_index() |
|
self.save_log() |
|
print(f"β
Stored {len(docs)} documents for {formatted_date}") |
|
else: |
|
print("β οΈ No valid documents to store.") |
|
|
|
def save_index(self): |
|
self.index.save_local(self.index_path) |
|
|
|
def save_log(self): |
|
with open(self.log_path, "w") as f: |
|
json.dump(self.log_data, f, indent=2) |
|
|
|
def search(self, query: str, k: int = 5): |
|
print(f"π Searching for: '{query}' (top {k})") |
|
results = self.index.similarity_search(query, k=k) |
|
for res in results: |
|
print(f"\nπ Ticker: {res.metadata.get('ticker')} | Sentiment: {res.metadata.get('sentiment')} | Date: {res.metadata.get('date')}") |
|
print(res.page_content) |
|
print("-" * 80) |
|
|
|
def backup(self, backup_dir: str = "vector_backups"): |
|
os.makedirs(backup_dir, exist_ok=True) |
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
backup_path = os.path.join(backup_dir, f"vector_index_{ts}") |
|
self.index.save_local(backup_path) |
|
print(f"π¦ Backup saved to {backup_path}") |
|
|