StockMarketInsights / stock_vector_db.py
rajat5ranjan's picture
Create stock_vector_db.py
3e0b264 verified
raw
history blame
3.45 kB
import os
import json
from typing import List, Optional
from datetime import datetime
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
class StockVectorDB:
def __init__(self, index_path: str = "vector_index", log_path: str = "vector_db_log.json", embedding_model: Optional[str] = None):
self.index_path = index_path
self.log_path = log_path
model = embedding_model or "sentence-transformers/all-MiniLM-L6-v2"
self.embedding_model = HuggingFaceEmbeddings(model_name=model)
if os.path.exists(index_path):
print(f"πŸ” Loading existing FAISS index from '{index_path}'")
self.index = FAISS.load_local(index_path, self.embedding_model)
else:
print(f"πŸ†• Creating new FAISS index at '{index_path}'")
self.index = FAISS.from_documents([], self.embedding_model)
# Load existing log or start fresh
if os.path.exists(self.log_path):
with open(self.log_path, "r") as f:
self.log_data = json.load(f)
else:
self.log_data = []
def store_top_picks(self, top_picks: List[dict], date: Optional[datetime] = None):
date = date or datetime.now()
formatted_date = date.strftime("%Y-%m-%d")
docs = []
for stock in top_picks:
content = f"{stock['ticker']} {stock['company']} is {stock['sentiment']} due to: {stock['critical_news']}. Impact: {stock['impact_summary']}. Action: {stock['action']}. Reason: {stock['reason']}"
metadata = {
"date": formatted_date,
"ticker": stock["ticker"],
"company": stock["company"],
"sentiment": stock["sentiment"],
"action": stock["action"]
}
docs.append(Document(page_content=content, metadata=metadata))
# Log
self.log_data.append({
"ticker": stock["ticker"],
"company": stock["company"],
"date": formatted_date,
"sentiment": stock["sentiment"],
"action": stock["action"],
"reason": stock["reason"]
})
if docs:
self.index.add_documents(docs)
self.save_index()
self.save_log()
print(f"βœ… Stored {len(docs)} documents for {formatted_date}")
else:
print("⚠️ No valid documents to store.")
def save_index(self):
self.index.save_local(self.index_path)
def save_log(self):
with open(self.log_path, "w") as f:
json.dump(self.log_data, f, indent=2)
def search(self, query: str, k: int = 5):
print(f"πŸ” Searching for: '{query}' (top {k})")
results = self.index.similarity_search(query, k=k)
for res in results:
print(f"\nπŸ“Œ Ticker: {res.metadata.get('ticker')} | Sentiment: {res.metadata.get('sentiment')} | Date: {res.metadata.get('date')}")
print(res.page_content)
print("-" * 80)
def backup(self, backup_dir: str = "vector_backups"):
os.makedirs(backup_dir, exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = os.path.join(backup_dir, f"vector_index_{ts}")
self.index.save_local(backup_path)
print(f"πŸ“¦ Backup saved to {backup_path}")