Spaces:

sigridveronica
/

ai-news-analyzer

Running

App Files Files Community

Sigrid De los Santos commited on 10 days ago

Commit

a9b1809

1 Parent(s): 3e4bf85

Remove remaining binary file for Hugging Face

Browse files

Files changed (1) hide show

src/main.py +62 -50

src/main.py CHANGED Viewed

@@ -3,14 +3,16 @@ import sys
 from datetime import datetime
 from dotenv import load_dotenv
 import pandas as pd
-from image_search import search_unsplash_image
 from md_html import convert_single_md_to_html as convert_md_to_html
 from news_analysis import fetch_deep_news, generate_value_investor_report
 from csv_utils import detect_changes
-# Setup paths
-BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 DATA_DIR = os.path.join(BASE_DIR, "data")
 HTML_DIR = os.path.join(BASE_DIR, "html")
 CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
@@ -18,7 +20,7 @@ CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(HTML_DIR, exist_ok=True)
-# Load .env
 load_dotenv()
 def build_metrics_box(topic, num_articles):
@@ -30,16 +32,33 @@ def build_metrics_box(topic, num_articles):
 >
 """
-def run_value_investing_analysis(csv_path, progress_callback=None):
     current_df = pd.read_csv(csv_path)
     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
     if os.path.exists(prev_path):
         previous_df = pd.read_csv(prev_path)
         changed_df = detect_changes(current_df, previous_df)
         if changed_df.empty:
-            if progress_callback:
-                progress_callback("✅ No changes detected. Skipping processing.")
             return []
     else:
         changed_df = current_df
@@ -49,27 +68,18 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
     for _, row in changed_df.iterrows():
         topic = row.get("topic")
         timespan = row.get("timespan_days", 7)
-        if progress_callback:
-            progress_callback(f"🔍 Processing: {topic} ({timespan} days)")
         news = fetch_deep_news(topic, timespan)
         if not news:
-            if progress_callback:
-                progress_callback(f"⚠️ No news found for: {topic}")
             continue
-        if progress_callback:
-            progress_callback(f"🧠 Analyzing news for: {topic}")
         report_body = generate_value_investor_report(topic, news)
-        # Use placeholder image instead of API call
-        image_url = "https://via.placeholder.com/1281x721?text=No+Image"
-        image_credit = "Image unavailable"
         metrics_md = build_metrics_box(topic, len(news))
-        full_md = metrics_md + report_body
         base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
         filename = base_filename + ".md"
@@ -81,30 +91,22 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
             filepath = os.path.join(DATA_DIR, filename)
             counter += 1
-        if progress_callback:
-            progress_callback(f"📝 Saving markdown for: {topic}")
         with open(filepath, "w", encoding="utf-8") as f:
             f.write(full_md)
         new_md_files.append(filepath)
-    if progress_callback:
-        progress_callback(f"✅ Markdown reports saved to: `{DATA_DIR}`")
     current_df.to_csv(prev_path, index=False)
     return new_md_files
-def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
     os.environ["TAVILY_API_KEY"] = tavily_api_key
-    new_md_files = run_value_investing_analysis(csv_path, progress_callback)
     new_html_paths = []
     for md_path in new_md_files:
-        if progress_callback:
-            progress_callback(f"🌐 Converting to HTML: {os.path.basename(md_path)}")
         convert_md_to_html(md_path, HTML_DIR)
         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
         new_html_paths.append(html_path)
@@ -117,19 +119,18 @@ if __name__ == "__main__":
         convert_md_to_html(md, HTML_DIR)
     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
 # import os
 # import sys
 # from datetime import datetime
 # from dotenv import load_dotenv
 # from image_search import search_unsplash_image
 # from md_html import convert_single_md_to_html as convert_md_to_html
 # from news_analysis import fetch_deep_news, generate_value_investor_report
-# import pandas as pd
 # from csv_utils import detect_changes
 # # Setup paths
 # BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 # DATA_DIR = os.path.join(BASE_DIR, "data")
@@ -151,14 +152,16 @@ if __name__ == "__main__":
 # >
 # """
-# def run_value_investing_analysis(csv_path):
 #     current_df = pd.read_csv(csv_path)
 #     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
 #     if os.path.exists(prev_path):
 #         previous_df = pd.read_csv(prev_path)
 #         changed_df = detect_changes(current_df, previous_df)
 #         if changed_df.empty:
-#             print("✅ No changes detected. Skipping processing.")
 #             return []
 #     else:
 #         changed_df = current_df
@@ -168,20 +171,24 @@ if __name__ == "__main__":
 #     for _, row in changed_df.iterrows():
 #         topic = row.get("topic")
 #         timespan = row.get("timespan_days", 7)
-#         print(f"\n🔍 Processing: {topic} ({timespan} days)")
 #         news = fetch_deep_news(topic, timespan)
 #         if not news:
-#             print(f"⚠️ No news found for: {topic}")
 #             continue
-#         report_body = generate_value_investor_report(topic, news)
-#         from image_search import search_unsplash_image
-#         # Later inside your loop
-#         image_url, image_credit = search_unsplash_image(topic)
-#         #image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))
 #         metrics_md = build_metrics_box(topic, len(news))
 #         full_md = metrics_md + report_body
@@ -196,34 +203,39 @@ if __name__ == "__main__":
 #             filepath = os.path.join(DATA_DIR, filename)
 #             counter += 1
 #         with open(filepath, "w", encoding="utf-8") as f:
 #             f.write(full_md)
 #         new_md_files.append(filepath)
-#     print(f"✅ Markdown saved to: {DATA_DIR}")
 #     current_df.to_csv(prev_path, index=False)
 #     return new_md_files
-# def run_pipeline(csv_path, tavily_api_key):
 #     os.environ["TAVILY_API_KEY"] = tavily_api_key
-#     new_md_files = run_value_investing_analysis(csv_path)
 #     new_html_paths = []
 #     for md_path in new_md_files:
 #         convert_md_to_html(md_path, HTML_DIR)
 #         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
 #         new_html_paths.append(html_path)
 #     return new_html_paths
 # if __name__ == "__main__":
 #     md_files = run_value_investing_analysis(CSV_PATH)
 #     for md in md_files:
 #         convert_md_to_html(md, HTML_DIR)
 #     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")

 from datetime import datetime
 from dotenv import load_dotenv
 import pandas as pd
+from io import BytesIO
+import base64
+import matplotlib.pyplot as plt
 from md_html import convert_single_md_to_html as convert_md_to_html
 from news_analysis import fetch_deep_news, generate_value_investor_report
 from csv_utils import detect_changes
+# === Setup Paths ===
+BASE_DIR = os.path.dirname(os.path.dirname(__file__))
 DATA_DIR = os.path.join(BASE_DIR, "data")
 HTML_DIR = os.path.join(BASE_DIR, "html")
 CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(HTML_DIR, exist_ok=True)
+# === Load .env ===
 load_dotenv()
 def build_metrics_box(topic, num_articles):
 >
 """
+def create_sentiment_chart_md(topic):
+    # Placeholder dummy chart
+    dates = pd.date_range(end=datetime.today(), periods=7)
+    values = [100 + i * 3 for i in range(7)]
+    plt.figure(figsize=(6, 3))
+    plt.plot(dates, values, marker='o')
+    plt.title(f"📈 Sentiment Trend: {topic}")
+    plt.xlabel("Date")
+    plt.ylabel("Sentiment")
+    plt.grid(True)
+    buffer = BytesIO()
+    plt.savefig(buffer, format='png')
+    plt.close()
+    buffer.seek(0)
+    encoded = base64.b64encode(buffer.read()).decode("utf-8")
+    return f"![Sentiment Trend](data:image/png;base64,{encoded})"
+def run_value_investing_analysis(csv_path):
     current_df = pd.read_csv(csv_path)
     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
     if os.path.exists(prev_path):
         previous_df = pd.read_csv(prev_path)
         changed_df = detect_changes(current_df, previous_df)
         if changed_df.empty:
+            print("✅ No changes detected. Skipping processing.")
             return []
     else:
         changed_df = current_df
     for _, row in changed_df.iterrows():
         topic = row.get("topic")
         timespan = row.get("timespan_days", 7)
+        print(f"\n🔍 Processing: {topic} ({timespan} days)")
         news = fetch_deep_news(topic, timespan)
         if not news:
+            print(f"⚠️ No news found for: {topic}")
             continue
         report_body = generate_value_investor_report(topic, news)
+        chart_md = create_sentiment_chart_md(topic)
         metrics_md = build_metrics_box(topic, len(news))
+        full_md = metrics_md + report_body + "\n\n" + chart_md
         base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
         filename = base_filename + ".md"
             filepath = os.path.join(DATA_DIR, filename)
             counter += 1
         with open(filepath, "w", encoding="utf-8") as f:
             f.write(full_md)
         new_md_files.append(filepath)
+    print(f"✅ Markdown saved to: {DATA_DIR}")
     current_df.to_csv(prev_path, index=False)
     return new_md_files
+def run_pipeline(csv_path, tavily_api_key):
     os.environ["TAVILY_API_KEY"] = tavily_api_key
+    new_md_files = run_value_investing_analysis(csv_path)
     new_html_paths = []
     for md_path in new_md_files:
         convert_md_to_html(md_path, HTML_DIR)
         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
         new_html_paths.append(html_path)
         convert_md_to_html(md, HTML_DIR)
     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
 # import os
 # import sys
 # from datetime import datetime
 # from dotenv import load_dotenv
+# import pandas as pd
 # from image_search import search_unsplash_image
 # from md_html import convert_single_md_to_html as convert_md_to_html
 # from news_analysis import fetch_deep_news, generate_value_investor_report
 # from csv_utils import detect_changes
 # # Setup paths
 # BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 # DATA_DIR = os.path.join(BASE_DIR, "data")
 # >
 # """
+# def run_value_investing_analysis(csv_path, progress_callback=None):
 #     current_df = pd.read_csv(csv_path)
 #     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
 #     if os.path.exists(prev_path):
 #         previous_df = pd.read_csv(prev_path)
 #         changed_df = detect_changes(current_df, previous_df)
 #         if changed_df.empty:
+#             if progress_callback:
+#                 progress_callback("✅ No changes detected. Skipping processing.")
 #             return []
 #     else:
 #         changed_df = current_df
 #     for _, row in changed_df.iterrows():
 #         topic = row.get("topic")
 #         timespan = row.get("timespan_days", 7)
+#         if progress_callback:
+#             progress_callback(f"🔍 Processing: {topic} ({timespan} days)")
 #         news = fetch_deep_news(topic, timespan)
 #         if not news:
+#             if progress_callback:
+#                 progress_callback(f"⚠️ No news found for: {topic}")
 #             continue
+#         if progress_callback:
+#             progress_callback(f"🧠 Analyzing news for: {topic}")
+#         report_body = generate_value_investor_report(topic, news)
+#         # Use placeholder image instead of API call
+#         image_url = "https://via.placeholder.com/1281x721?text=No+Image"
+#         image_credit = "Image unavailable"
 #         metrics_md = build_metrics_box(topic, len(news))
 #         full_md = metrics_md + report_body
 #             filepath = os.path.join(DATA_DIR, filename)
 #             counter += 1
+#         if progress_callback:
+#             progress_callback(f"📝 Saving markdown for: {topic}")
 #         with open(filepath, "w", encoding="utf-8") as f:
 #             f.write(full_md)
 #         new_md_files.append(filepath)
+#     if progress_callback:
+#         progress_callback(f"✅ Markdown reports saved to: `{DATA_DIR}`")
 #     current_df.to_csv(prev_path, index=False)
 #     return new_md_files
+# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
 #     os.environ["TAVILY_API_KEY"] = tavily_api_key
+#     new_md_files = run_value_investing_analysis(csv_path, progress_callback)
 #     new_html_paths = []
 #     for md_path in new_md_files:
+#         if progress_callback:
+#             progress_callback(f"🌐 Converting to HTML: {os.path.basename(md_path)}")
 #         convert_md_to_html(md_path, HTML_DIR)
 #         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
 #         new_html_paths.append(html_path)
 #     return new_html_paths
 # if __name__ == "__main__":
 #     md_files = run_value_investing_analysis(CSV_PATH)
 #     for md in md_files:
 #         convert_md_to_html(md, HTML_DIR)
 #     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")