Sigrid De los Santos
Add matplotlib to requirements
0d9c76e
import os
import sys
from datetime import datetime
from dotenv import load_dotenv
import pandas as pd
from md_html import convert_single_md_to_html as convert_md_to_html
from news_analysis import fetch_deep_news, generate_value_investor_report
from csv_utils import detect_changes
# === Setup Paths ===
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
HTML_DIR = os.path.join(BASE_DIR, "html")
CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(HTML_DIR, exist_ok=True)
# === Load .env ===
load_dotenv()
def build_metrics_box(topic, num_articles):
now = datetime.now().strftime("%Y-%m-%d %H:%M")
return f"""
> Topic: `{topic}`
> Articles Collected: `{num_articles}`
> Generated: `{now}`
>
"""
def run_value_investing_analysis(csv_path, progress_callback=None):
current_df = pd.read_csv(csv_path)
prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
if os.path.exists(prev_path):
previous_df = pd.read_csv(prev_path)
changed_df = detect_changes(current_df, previous_df)
if changed_df.empty:
if progress_callback:
progress_callback("βœ… No changes detected. Skipping processing.")
return []
else:
changed_df = current_df
new_md_files = []
for _, row in changed_df.iterrows():
topic = row.get("topic")
timespan = row.get("timespan_days", 7)
msg = f"πŸ” Processing: {topic} ({timespan} days)"
print(msg)
if progress_callback:
progress_callback(msg)
news = fetch_deep_news(topic, timespan)
if not news:
warning = f"⚠️ No news found for: {topic}"
print(warning)
if progress_callback:
progress_callback(warning)
continue
report_body = generate_value_investor_report(topic, news)
image_url = "https://via.placeholder.com/1281x721?text=No+Image+Available"
image_credit = "Image placeholder"
metrics_md = build_metrics_box(topic, len(news))
full_md = metrics_md + report_body
base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
filename = base_filename + ".md"
filepath = os.path.join(DATA_DIR, filename)
counter = 1
while os.path.exists(filepath):
filename = f"{base_filename}_{counter}.md"
filepath = os.path.join(DATA_DIR, filename)
counter += 1
with open(filepath, "w", encoding="utf-8") as f:
f.write(full_md)
new_md_files.append(filepath)
if progress_callback:
progress_callback(f"βœ… Markdown saved to: {DATA_DIR}")
current_df.to_csv(prev_path, index=False)
return new_md_files
def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
os.environ["TAVILY_API_KEY"] = tavily_api_key
new_md_files = run_value_investing_analysis(csv_path, progress_callback)
new_html_paths = []
for md_path in new_md_files:
convert_md_to_html(md_path, HTML_DIR)
html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
new_html_paths.append(html_path)
return new_html_paths
if __name__ == "__main__":
md_files = run_value_investing_analysis(CSV_PATH)
for md in md_files:
convert_md_to_html(md, HTML_DIR)
print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
# import os
# import sys
# from datetime import datetime
# from dotenv import load_dotenv
# from image_search import search_unsplash_image
# from md_html import convert_single_md_to_html as convert_md_to_html
# from news_analysis import fetch_deep_news, generate_value_investor_report
# import pandas as pd
# from csv_utils import detect_changes
# # Setup paths
# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
# DATA_DIR = os.path.join(BASE_DIR, "data")
# HTML_DIR = os.path.join(BASE_DIR, "html")
# CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
# os.makedirs(DATA_DIR, exist_ok=True)
# os.makedirs(HTML_DIR, exist_ok=True)
# # Load .env
# load_dotenv()
# def build_metrics_box(topic, num_articles):
# now = datetime.now().strftime("%Y-%m-%d %H:%M")
# return f"""
# > Topic: `{topic}`
# > Articles Collected: `{num_articles}`
# > Generated: `{now}`
# >
# """
# def run_value_investing_analysis(csv_path):
# current_df = pd.read_csv(csv_path)
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
# if os.path.exists(prev_path):
# previous_df = pd.read_csv(prev_path)
# changed_df = detect_changes(current_df, previous_df)
# if changed_df.empty:
# print("βœ… No changes detected. Skipping processing.")
# return []
# else:
# changed_df = current_df
# new_md_files = []
# for _, row in changed_df.iterrows():
# topic = row.get("topic")
# timespan = row.get("timespan_days", 7)
# print(f"\nπŸ” Processing: {topic} ({timespan} days)")
# news = fetch_deep_news(topic, timespan)
# if not news:
# print(f"⚠️ No news found for: {topic}")
# continue
# report_body = generate_value_investor_report(topic, news)
# from image_search import search_unsplash_image
# # Later inside your loop
# image_url, image_credit = search_unsplash_image(topic)
# #image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))
# metrics_md = build_metrics_box(topic, len(news))
# full_md = metrics_md + report_body
# base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
# filename = base_filename + ".md"
# filepath = os.path.join(DATA_DIR, filename)
# counter = 1
# while os.path.exists(filepath):
# filename = f"{base_filename}_{counter}.md"
# filepath = os.path.join(DATA_DIR, filename)
# counter += 1
# with open(filepath, "w", encoding="utf-8") as f:
# f.write(full_md)
# new_md_files.append(filepath)
# print(f"βœ… Markdown saved to: {DATA_DIR}")
# current_df.to_csv(prev_path, index=False)
# return new_md_files
# def run_pipeline(csv_path, tavily_api_key):
# os.environ["TAVILY_API_KEY"] = tavily_api_key
# new_md_files = run_value_investing_analysis(csv_path)
# new_html_paths = []
# for md_path in new_md_files:
# convert_md_to_html(md_path, HTML_DIR)
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
# new_html_paths.append(html_path)
# return new_html_paths
# if __name__ == "__main__":
# md_files = run_value_investing_analysis(CSV_PATH)
# for md in md_files:
# convert_md_to_html(md, HTML_DIR)
# print(f"🌐 All reports converted to HTML at: {HTML_DIR}")