Spaces:

sigridveronica
/

ai-news-analyzer

Running

App Files Files Community

Sigrid De los Santos commited on Jul 5

Commit

3e4bf85

1 Parent(s): 8143a2a

Remove remaining binary file for Hugging Face

Browse files

Files changed (2) hide show

app.py +70 -52
src/main.py +56 -68

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import tempfile
 import streamlit as st
 import pandas as pd
 from io import StringIO
-import contextlib
 # Add 'src' to Python path so we can import main.py
 sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
@@ -24,7 +23,7 @@ topics_data = []
 with st.form("topics_form"):
     topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
     for i in range(topic_count):
         col1, col2 = st.columns(2)
         with col1:
@@ -48,43 +47,33 @@ if submitted:
             df.to_csv(tmp_csv.name, index=False)
             csv_path = tmp_csv.name
-        progress_placeholder = st.empty()
-        log_output = st.empty()
-        string_buffer = StringIO()
-        def write_log(msg):
-            print(msg)  # Will go to final log
-            progress_placeholder.markdown(f"🔄 {msg}")
-        with contextlib.redirect_stdout(string_buffer):
-            write_log("🚀 Starting analysis...")
-            output_path = run_pipeline(csv_path, tavily_api_key)
-            write_log("✅ Finished analysis.")
-        logs = string_buffer.getvalue()
-        progress_placeholder.empty()  # Clear ephemeral log
-        log_output.code(logs)         # Show final full log
-        if output_path and isinstance(output_path, list):
-            st.success("✅ Analysis complete!")
-            for path in output_path:
-                if os.path.exists(path):
-                    with open(path, 'r', encoding='utf-8') as file:
-                        html_content = file.read()
-                        filename = os.path.basename(path)
-                        st.download_button(
-                            label=f"📥 Download {filename}",
-                            data=html_content,
-                            file_name=filename,
-                            mime="text/html"
-                        )
-                        st.components.v1.html(html_content, height=600, scrolling=True)
-        else:
-            st.error("❌ No reports were generated.")
 # import os
@@ -92,12 +81,15 @@ if submitted:
 # import tempfile
 # import streamlit as st
 # import pandas as pd
 # # Add 'src' to Python path so we can import main.py
 # sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
 # from main import run_pipeline
-# st.title("AI-Powered Investing News Analyzer")
 # # === API Key Input ===
 # st.subheader("🔐 API Keys")
@@ -105,45 +97,71 @@ if submitted:
 # tavily_api_key = st.text_input("Tavily API Key", type="password").strip()
 # # === Topic Input ===
-# st.subheader("📰 Topics of Interest")
 # topics_data = []
 # with st.form("topics_form"):
-#     topic_count = st.number_input("How many topics do you want to analyze?", min_value=1, max_value=10, step=1, value=1)
 #     for i in range(topic_count):
 #         col1, col2 = st.columns(2)
 #         with col1:
 #             topic = st.text_input(f"Topic {i+1}", key=f"topic_{i}")
 #         with col2:
-#             timespan = st.number_input(f"Timespan (days) for Topic {i+1}", min_value=1, max_value=30, value=7, key=f"days_{i}")
-#         topics_data.append({"topic": topic, "timespan_days": timespan})
-#     submitted = st.form_submit_button("Analyze Topics")
-# # === Run pipeline on submit ===
 # if submitted:
 #     if not openai_api_key or not tavily_api_key or not all([td['topic'] for td in topics_data]):
 #         st.warning("Please fill in all fields.")
 #     else:
-#         # Set environment variables so downstream modules can use them
 #         os.environ["OPENAI_API_KEY"] = openai_api_key
 #         os.environ["TAVILY_API_KEY"] = tavily_api_key
-#         # Save user topics to temp CSV
 #         df = pd.DataFrame(topics_data)
 #         with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_csv:
 #             df.to_csv(tmp_csv.name, index=False)
 #             csv_path = tmp_csv.name
-#         with st.spinner("Running analysis..."):
 #             output_path = run_pipeline(csv_path, tavily_api_key)
-#         if os.path.exists(output_path):
 #             st.success("✅ Analysis complete!")
-#             with open(output_path, 'r', encoding='utf-8') as file:
-#                 html_content = file.read()
-#                 st.download_button("📥 Download HTML Report", html_content, file_name="news_report.html", mime="text/html")
-#                 st.components.v1.html(html_content, height=600, scrolling=True)
 #         else:
-#             st.error("❌ Something went wrong during the analysis.")

 import streamlit as st
 import pandas as pd
 from io import StringIO
 # Add 'src' to Python path so we can import main.py
 sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
 with st.form("topics_form"):
     topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
     for i in range(topic_count):
         col1, col2 = st.columns(2)
         with col1:
             df.to_csv(tmp_csv.name, index=False)
             csv_path = tmp_csv.name
+        progress_box = st.empty()
+        def show_progress(msg):
+            progress_box.markdown(f"⏳ {msg}")
+        try:
+            output_path = run_pipeline(csv_path, tavily_api_key, progress_callback=show_progress)
+            progress_box.success("✅ Analysis complete!")
+            if output_path and isinstance(output_path, list):
+                for path in output_path:
+                    if os.path.exists(path):
+                        with open(path, 'r', encoding='utf-8') as file:
+                            html_content = file.read()
+                            filename = os.path.basename(path)
+                            st.download_button(
+                                label=f"📥 Download {filename}",
+                                data=html_content,
+                                file_name=filename,
+                                mime="text/html"
+                            )
+                            st.components.v1.html(html_content, height=600, scrolling=True)
+            else:
+                st.error("❌ No reports were generated.")
+        except Exception as e:
+            progress_box.error(f"❌ Error: {e}")
 # import os
 # import tempfile
 # import streamlit as st
 # import pandas as pd
+# from io import StringIO
+# import contextlib
 # # Add 'src' to Python path so we can import main.py
 # sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
 # from main import run_pipeline
+# st.set_page_config(page_title="📰 AI News Analyzer", layout="wide")
+# st.title("🧠 AI-Powered Investing News Analyzer")
 # # === API Key Input ===
 # st.subheader("🔐 API Keys")
 # tavily_api_key = st.text_input("Tavily API Key", type="password").strip()
 # # === Topic Input ===
+# st.subheader("📈 Topics of Interest")
 # topics_data = []
 # with st.form("topics_form"):
+#     topic_count = st.number_input("How many topics?", min_value=1, max_value=10, value=1, step=1)
 #     for i in range(topic_count):
 #         col1, col2 = st.columns(2)
 #         with col1:
 #             topic = st.text_input(f"Topic {i+1}", key=f"topic_{i}")
 #         with col2:
+#             days = st.number_input(f"Timespan (days)", min_value=1, max_value=30, value=7, key=f"days_{i}")
+#         topics_data.append({"topic": topic, "timespan_days": days})
+#     submitted = st.form_submit_button("Run Analysis")
+# # === Submission logic ===
 # if submitted:
 #     if not openai_api_key or not tavily_api_key or not all([td['topic'] for td in topics_data]):
 #         st.warning("Please fill in all fields.")
 #     else:
 #         os.environ["OPENAI_API_KEY"] = openai_api_key
 #         os.environ["TAVILY_API_KEY"] = tavily_api_key
 #         df = pd.DataFrame(topics_data)
 #         with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_csv:
 #             df.to_csv(tmp_csv.name, index=False)
 #             csv_path = tmp_csv.name
+#         progress_placeholder = st.empty()
+#         log_output = st.empty()
+#         string_buffer = StringIO()
+#         def write_log(msg):
+#             print(msg)  # Will go to final log
+#             progress_placeholder.markdown(f"🔄 {msg}")
+#         with contextlib.redirect_stdout(string_buffer):
+#             write_log("🚀 Starting analysis...")
 #             output_path = run_pipeline(csv_path, tavily_api_key)
+#             write_log("✅ Finished analysis.")
+#         logs = string_buffer.getvalue()
+#         progress_placeholder.empty()  # Clear ephemeral log
+#         log_output.code(logs)         # Show final full log
+#         if output_path and isinstance(output_path, list):
 #             st.success("✅ Analysis complete!")
+#             for path in output_path:
+#                 if os.path.exists(path):
+#                     with open(path, 'r', encoding='utf-8') as file:
+#                         html_content = file.read()
+#                         filename = os.path.basename(path)
+#                         st.download_button(
+#                             label=f"📥 Download {filename}",
+#                             data=html_content,
+#                             file_name=filename,
+#                             mime="text/html"
+#                         )
+#                         st.components.v1.html(html_content, height=600, scrolling=True)
 #         else:
+#             st.error("❌ No reports were generated.")

src/main.py CHANGED Viewed

@@ -2,15 +2,13 @@ import os
 import sys
 from datetime import datetime
 from dotenv import load_dotenv
 from image_search import search_unsplash_image
 from md_html import convert_single_md_to_html as convert_md_to_html
 from news_analysis import fetch_deep_news, generate_value_investor_report
-import pandas as pd
 from csv_utils import detect_changes
 # Setup paths
 BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 DATA_DIR = os.path.join(BASE_DIR, "data")
@@ -32,14 +30,16 @@ def build_metrics_box(topic, num_articles):
 >
 """
-def run_value_investing_analysis(csv_path):
     current_df = pd.read_csv(csv_path)
     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
     if os.path.exists(prev_path):
         previous_df = pd.read_csv(prev_path)
         changed_df = detect_changes(current_df, previous_df)
         if changed_df.empty:
-            print("✅ No changes detected. Skipping processing.")
             return []
     else:
         changed_df = current_df
@@ -49,20 +49,24 @@ def run_value_investing_analysis(csv_path):
     for _, row in changed_df.iterrows():
         topic = row.get("topic")
         timespan = row.get("timespan_days", 7)
-        print(f"\n🔍 Processing: {topic} ({timespan} days)")
         news = fetch_deep_news(topic, timespan)
         if not news:
-            print(f"⚠️ No news found for: {topic}")
             continue
-        report_body = generate_value_investor_report(topic, news)
-        from image_search import search_unsplash_image
-        # Later inside your loop
-        image_url, image_credit = search_unsplash_image(topic)
-        #image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))
         metrics_md = build_metrics_box(topic, len(news))
         full_md = metrics_md + report_body
@@ -77,76 +81,67 @@ def run_value_investing_analysis(csv_path):
             filepath = os.path.join(DATA_DIR, filename)
             counter += 1
         with open(filepath, "w", encoding="utf-8") as f:
             f.write(full_md)
         new_md_files.append(filepath)
-    print(f"✅ Markdown saved to: {DATA_DIR}")
     current_df.to_csv(prev_path, index=False)
     return new_md_files
-def run_pipeline(csv_path, tavily_api_key):
     os.environ["TAVILY_API_KEY"] = tavily_api_key
-    new_md_files = run_value_investing_analysis(csv_path)
     new_html_paths = []
     for md_path in new_md_files:
         convert_md_to_html(md_path, HTML_DIR)
         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
         new_html_paths.append(html_path)
     return new_html_paths
 if __name__ == "__main__":
     md_files = run_value_investing_analysis(CSV_PATH)
     for md in md_files:
         convert_md_to_html(md, HTML_DIR)
     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
 # import os
 # import sys
 # from datetime import datetime
 # from dotenv import load_dotenv
-# #rom news_analysis import load_csv, fetch_deep_news, generate_value_investor_report
 # from image_search import search_unsplash_image
-# from md_html import convert_md_folder_to_html
 # from md_html import convert_single_md_to_html as convert_md_to_html
 # from news_analysis import fetch_deep_news, generate_value_investor_report
 # import pandas as pd
 # from csv_utils import detect_changes
-# # Adds the absolute path of /external to your module path
-# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-# EXTERNAL_PATH = os.path.join(BASE_DIR, "external")
-# if EXTERNAL_PATH not in sys.path:
-#     sys.path.append(EXTERNAL_PATH)
-# # Load .env
-# load_dotenv()
-# # === Base Folder Setup ===
 # BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 # DATA_DIR = os.path.join(BASE_DIR, "data")
 # HTML_DIR = os.path.join(BASE_DIR, "html")
 # CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
-# # Ensure output folders exist
 # os.makedirs(DATA_DIR, exist_ok=True)
 # os.makedirs(HTML_DIR, exist_ok=True)
-# # === Metrics Block ===
 # def build_metrics_box(topic, num_articles):
 #     now = datetime.now().strftime("%Y-%m-%d %H:%M")
 #     return f"""
@@ -156,20 +151,20 @@ if __name__ == "__main__":
 # >
 # """
-# # === Main Logic ===
 # def run_value_investing_analysis(csv_path):
 #     current_df = pd.read_csv(csv_path)
 #     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
 #     if os.path.exists(prev_path):
 #         previous_df = pd.read_csv(prev_path)
 #         changed_df = detect_changes(current_df, previous_df)
 #         if changed_df.empty:
 #             print("✅ No changes detected. Skipping processing.")
-#             return
 #     else:
 #         changed_df = current_df
 #     for _, row in changed_df.iterrows():
 #         topic = row.get("topic")
 #         timespan = row.get("timespan_days", 7)
@@ -181,7 +176,13 @@ if __name__ == "__main__":
 #             continue
 #         report_body = generate_value_investor_report(topic, news)
 #         image_url, image_credit = search_unsplash_image(topic)
 #         metrics_md = build_metrics_box(topic, len(news))
 #         full_md = metrics_md + report_body
@@ -198,44 +199,31 @@ if __name__ == "__main__":
 #         with open(filepath, "w", encoding="utf-8") as f:
 #             f.write(full_md)
 #     print(f"✅ Markdown saved to: {DATA_DIR}")
-#     current_df.to_csv(prev_path, index=False)  # Save current as previous for next run
-# #convert_md_folder_to_html(DATA_DIR, HTML_DIR)
-# #print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
-# # === Streamlit Integration Wrapper ===
 # def run_pipeline(csv_path, tavily_api_key):
-#     """
-#     Runs the full analysis pipeline for Streamlit.
-#     Returns:
-#         str: Path to the generated HTML report.
-#     """
 #     os.environ["TAVILY_API_KEY"] = tavily_api_key
-#     run_value_investing_analysis(csv_path)
-#     # Combine all generated markdown into one file
-#     combined_md_path = os.path.join(DATA_DIR, "combined_report.md")
-#     with open(combined_md_path, "w", encoding="utf-8") as outfile:
-#         for fname in os.listdir(DATA_DIR):
-#             if fname.endswith(".md"):
-#                 with open(os.path.join(DATA_DIR, fname), "r", encoding="utf-8") as f:
-#                     outfile.write(f.read() + "\n\n---\n\n")
-#     # Convert to HTML
-#     # html_output_path = os.path.join(HTML_DIR, "news_report.html")
-#     # convert_md_to_html(combined_md_path, html_output_path)
-#     convert_md_to_html(combined_md_path, HTML_DIR)
-#     html_output_path = os.path.join(HTML_DIR, "combined_report.html")
-#     return html_output_path
-# # === Run ===
 # if __name__ == "__main__":
-#     run_value_investing_analysis(CSV_PATH)
-#     convert_md_folder_to_html(DATA_DIR, HTML_DIR)
 #     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")

 import sys
 from datetime import datetime
 from dotenv import load_dotenv
+import pandas as pd
 from image_search import search_unsplash_image
 from md_html import convert_single_md_to_html as convert_md_to_html
 from news_analysis import fetch_deep_news, generate_value_investor_report
 from csv_utils import detect_changes
 # Setup paths
 BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 DATA_DIR = os.path.join(BASE_DIR, "data")
 >
 """
+def run_value_investing_analysis(csv_path, progress_callback=None):
     current_df = pd.read_csv(csv_path)
     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
     if os.path.exists(prev_path):
         previous_df = pd.read_csv(prev_path)
         changed_df = detect_changes(current_df, previous_df)
         if changed_df.empty:
+            if progress_callback:
+                progress_callback("✅ No changes detected. Skipping processing.")
             return []
     else:
         changed_df = current_df
     for _, row in changed_df.iterrows():
         topic = row.get("topic")
         timespan = row.get("timespan_days", 7)
+        if progress_callback:
+            progress_callback(f"🔍 Processing: {topic} ({timespan} days)")
         news = fetch_deep_news(topic, timespan)
         if not news:
+            if progress_callback:
+                progress_callback(f"⚠️ No news found for: {topic}")
             continue
+        if progress_callback:
+            progress_callback(f"🧠 Analyzing news for: {topic}")
+        report_body = generate_value_investor_report(topic, news)
+        # Use placeholder image instead of API call
+        image_url = "https://via.placeholder.com/1281x721?text=No+Image"
+        image_credit = "Image unavailable"
         metrics_md = build_metrics_box(topic, len(news))
         full_md = metrics_md + report_body
             filepath = os.path.join(DATA_DIR, filename)
             counter += 1
+        if progress_callback:
+            progress_callback(f"📝 Saving markdown for: {topic}")
         with open(filepath, "w", encoding="utf-8") as f:
             f.write(full_md)
         new_md_files.append(filepath)
+    if progress_callback:
+        progress_callback(f"✅ Markdown reports saved to: `{DATA_DIR}`")
     current_df.to_csv(prev_path, index=False)
     return new_md_files
+def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
     os.environ["TAVILY_API_KEY"] = tavily_api_key
+    new_md_files = run_value_investing_analysis(csv_path, progress_callback)
     new_html_paths = []
     for md_path in new_md_files:
+        if progress_callback:
+            progress_callback(f"🌐 Converting to HTML: {os.path.basename(md_path)}")
         convert_md_to_html(md_path, HTML_DIR)
         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
         new_html_paths.append(html_path)
     return new_html_paths
 if __name__ == "__main__":
     md_files = run_value_investing_analysis(CSV_PATH)
     for md in md_files:
         convert_md_to_html(md, HTML_DIR)
     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")
 # import os
 # import sys
 # from datetime import datetime
 # from dotenv import load_dotenv
 # from image_search import search_unsplash_image
 # from md_html import convert_single_md_to_html as convert_md_to_html
 # from news_analysis import fetch_deep_news, generate_value_investor_report
 # import pandas as pd
 # from csv_utils import detect_changes
+# # Setup paths
 # BASE_DIR = os.path.dirname(os.path.dirname(__file__))  # one level up from src/
 # DATA_DIR = os.path.join(BASE_DIR, "data")
 # HTML_DIR = os.path.join(BASE_DIR, "html")
 # CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
 # os.makedirs(DATA_DIR, exist_ok=True)
 # os.makedirs(HTML_DIR, exist_ok=True)
+# # Load .env
+# load_dotenv()
 # def build_metrics_box(topic, num_articles):
 #     now = datetime.now().strftime("%Y-%m-%d %H:%M")
 #     return f"""
 # >
 # """
 # def run_value_investing_analysis(csv_path):
 #     current_df = pd.read_csv(csv_path)
 #     prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
 #     if os.path.exists(prev_path):
 #         previous_df = pd.read_csv(prev_path)
 #         changed_df = detect_changes(current_df, previous_df)
 #         if changed_df.empty:
 #             print("✅ No changes detected. Skipping processing.")
+#             return []
 #     else:
 #         changed_df = current_df
+#     new_md_files = []
 #     for _, row in changed_df.iterrows():
 #         topic = row.get("topic")
 #         timespan = row.get("timespan_days", 7)
 #             continue
 #         report_body = generate_value_investor_report(topic, news)
+#         from image_search import search_unsplash_image
+#         # Later inside your loop
 #         image_url, image_credit = search_unsplash_image(topic)
+#         #image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))
 #         metrics_md = build_metrics_box(topic, len(news))
 #         full_md = metrics_md + report_body
 #         with open(filepath, "w", encoding="utf-8") as f:
 #             f.write(full_md)
+#         new_md_files.append(filepath)
 #     print(f"✅ Markdown saved to: {DATA_DIR}")
+#     current_df.to_csv(prev_path, index=False)
+#     return new_md_files
 # def run_pipeline(csv_path, tavily_api_key):
 #     os.environ["TAVILY_API_KEY"] = tavily_api_key
+#     new_md_files = run_value_investing_analysis(csv_path)
+#     new_html_paths = []
+#     for md_path in new_md_files:
+#         convert_md_to_html(md_path, HTML_DIR)
+#         html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
+#         new_html_paths.append(html_path)
+#     return new_html_paths
 # if __name__ == "__main__":
+#     md_files = run_value_investing_analysis(CSV_PATH)
+#     for md in md_files:
+#         convert_md_to_html(md, HTML_DIR)
 #     print(f"🌐 All reports converted to HTML at: {HTML_DIR}")