Spaces:
Running
Running
Sigrid De los Santos
commited on
Commit
Β·
a9b1809
1
Parent(s):
3e4bf85
Remove remaining binary file for Hugging Face
Browse files- src/main.py +62 -50
src/main.py
CHANGED
@@ -3,14 +3,16 @@ import sys
|
|
3 |
from datetime import datetime
|
4 |
from dotenv import load_dotenv
|
5 |
import pandas as pd
|
|
|
|
|
|
|
6 |
|
7 |
-
from image_search import search_unsplash_image
|
8 |
from md_html import convert_single_md_to_html as convert_md_to_html
|
9 |
from news_analysis import fetch_deep_news, generate_value_investor_report
|
10 |
from csv_utils import detect_changes
|
11 |
|
12 |
-
# Setup
|
13 |
-
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
14 |
DATA_DIR = os.path.join(BASE_DIR, "data")
|
15 |
HTML_DIR = os.path.join(BASE_DIR, "html")
|
16 |
CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
|
@@ -18,7 +20,7 @@ CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
|
|
18 |
os.makedirs(DATA_DIR, exist_ok=True)
|
19 |
os.makedirs(HTML_DIR, exist_ok=True)
|
20 |
|
21 |
-
# Load .env
|
22 |
load_dotenv()
|
23 |
|
24 |
def build_metrics_box(topic, num_articles):
|
@@ -30,16 +32,33 @@ def build_metrics_box(topic, num_articles):
|
|
30 |
>
|
31 |
"""
|
32 |
|
33 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
current_df = pd.read_csv(csv_path)
|
35 |
prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
36 |
-
|
37 |
if os.path.exists(prev_path):
|
38 |
previous_df = pd.read_csv(prev_path)
|
39 |
changed_df = detect_changes(current_df, previous_df)
|
40 |
if changed_df.empty:
|
41 |
-
|
42 |
-
progress_callback("β
No changes detected. Skipping processing.")
|
43 |
return []
|
44 |
else:
|
45 |
changed_df = current_df
|
@@ -49,27 +68,18 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
|
|
49 |
for _, row in changed_df.iterrows():
|
50 |
topic = row.get("topic")
|
51 |
timespan = row.get("timespan_days", 7)
|
52 |
-
|
53 |
-
if progress_callback:
|
54 |
-
progress_callback(f"π Processing: {topic} ({timespan} days)")
|
55 |
|
56 |
news = fetch_deep_news(topic, timespan)
|
57 |
if not news:
|
58 |
-
|
59 |
-
progress_callback(f"β οΈ No news found for: {topic}")
|
60 |
continue
|
61 |
|
62 |
-
if progress_callback:
|
63 |
-
progress_callback(f"π§ Analyzing news for: {topic}")
|
64 |
-
|
65 |
report_body = generate_value_investor_report(topic, news)
|
66 |
-
|
67 |
-
# Use placeholder image instead of API call
|
68 |
-
image_url = "https://via.placeholder.com/1281x721?text=No+Image"
|
69 |
-
image_credit = "Image unavailable"
|
70 |
|
71 |
metrics_md = build_metrics_box(topic, len(news))
|
72 |
-
full_md = metrics_md + report_body
|
73 |
|
74 |
base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
|
75 |
filename = base_filename + ".md"
|
@@ -81,30 +91,22 @@ def run_value_investing_analysis(csv_path, progress_callback=None):
|
|
81 |
filepath = os.path.join(DATA_DIR, filename)
|
82 |
counter += 1
|
83 |
|
84 |
-
if progress_callback:
|
85 |
-
progress_callback(f"π Saving markdown for: {topic}")
|
86 |
-
|
87 |
with open(filepath, "w", encoding="utf-8") as f:
|
88 |
f.write(full_md)
|
89 |
|
90 |
new_md_files.append(filepath)
|
91 |
|
92 |
-
|
93 |
-
progress_callback(f"β
Markdown reports saved to: `{DATA_DIR}`")
|
94 |
-
|
95 |
current_df.to_csv(prev_path, index=False)
|
96 |
return new_md_files
|
97 |
|
98 |
-
def run_pipeline(csv_path, tavily_api_key
|
99 |
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
100 |
|
101 |
-
new_md_files = run_value_investing_analysis(csv_path
|
102 |
new_html_paths = []
|
103 |
|
104 |
for md_path in new_md_files:
|
105 |
-
if progress_callback:
|
106 |
-
progress_callback(f"π Converting to HTML: {os.path.basename(md_path)}")
|
107 |
-
|
108 |
convert_md_to_html(md_path, HTML_DIR)
|
109 |
html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
110 |
new_html_paths.append(html_path)
|
@@ -117,19 +119,18 @@ if __name__ == "__main__":
|
|
117 |
convert_md_to_html(md, HTML_DIR)
|
118 |
print(f"π All reports converted to HTML at: {HTML_DIR}")
|
119 |
|
|
|
120 |
# import os
|
121 |
# import sys
|
122 |
# from datetime import datetime
|
123 |
# from dotenv import load_dotenv
|
|
|
124 |
|
125 |
# from image_search import search_unsplash_image
|
126 |
# from md_html import convert_single_md_to_html as convert_md_to_html
|
127 |
# from news_analysis import fetch_deep_news, generate_value_investor_report
|
128 |
-
|
129 |
-
# import pandas as pd
|
130 |
# from csv_utils import detect_changes
|
131 |
|
132 |
-
|
133 |
# # Setup paths
|
134 |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
|
135 |
# DATA_DIR = os.path.join(BASE_DIR, "data")
|
@@ -151,14 +152,16 @@ if __name__ == "__main__":
|
|
151 |
# >
|
152 |
# """
|
153 |
|
154 |
-
# def run_value_investing_analysis(csv_path):
|
155 |
# current_df = pd.read_csv(csv_path)
|
156 |
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
|
|
157 |
# if os.path.exists(prev_path):
|
158 |
# previous_df = pd.read_csv(prev_path)
|
159 |
# changed_df = detect_changes(current_df, previous_df)
|
160 |
# if changed_df.empty:
|
161 |
-
#
|
|
|
162 |
# return []
|
163 |
# else:
|
164 |
# changed_df = current_df
|
@@ -168,20 +171,24 @@ if __name__ == "__main__":
|
|
168 |
# for _, row in changed_df.iterrows():
|
169 |
# topic = row.get("topic")
|
170 |
# timespan = row.get("timespan_days", 7)
|
171 |
-
|
|
|
|
|
172 |
|
173 |
# news = fetch_deep_news(topic, timespan)
|
174 |
# if not news:
|
175 |
-
#
|
|
|
176 |
# continue
|
177 |
|
178 |
-
#
|
179 |
-
#
|
180 |
|
181 |
-
#
|
182 |
-
# image_url, image_credit = search_unsplash_image(topic)
|
183 |
|
184 |
-
# #
|
|
|
|
|
185 |
|
186 |
# metrics_md = build_metrics_box(topic, len(news))
|
187 |
# full_md = metrics_md + report_body
|
@@ -196,34 +203,39 @@ if __name__ == "__main__":
|
|
196 |
# filepath = os.path.join(DATA_DIR, filename)
|
197 |
# counter += 1
|
198 |
|
|
|
|
|
|
|
199 |
# with open(filepath, "w", encoding="utf-8") as f:
|
200 |
# f.write(full_md)
|
201 |
|
202 |
# new_md_files.append(filepath)
|
203 |
|
204 |
-
#
|
|
|
|
|
205 |
# current_df.to_csv(prev_path, index=False)
|
206 |
# return new_md_files
|
207 |
|
208 |
-
|
209 |
-
# def run_pipeline(csv_path, tavily_api_key):
|
210 |
# os.environ["TAVILY_API_KEY"] = tavily_api_key
|
211 |
|
212 |
-
# new_md_files = run_value_investing_analysis(csv_path)
|
213 |
# new_html_paths = []
|
214 |
|
215 |
# for md_path in new_md_files:
|
|
|
|
|
|
|
216 |
# convert_md_to_html(md_path, HTML_DIR)
|
217 |
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
218 |
# new_html_paths.append(html_path)
|
219 |
|
220 |
# return new_html_paths
|
221 |
|
222 |
-
|
223 |
# if __name__ == "__main__":
|
224 |
# md_files = run_value_investing_analysis(CSV_PATH)
|
225 |
# for md in md_files:
|
226 |
# convert_md_to_html(md, HTML_DIR)
|
227 |
# print(f"π All reports converted to HTML at: {HTML_DIR}")
|
228 |
|
229 |
-
|
|
|
3 |
from datetime import datetime
|
4 |
from dotenv import load_dotenv
|
5 |
import pandas as pd
|
6 |
+
from io import BytesIO
|
7 |
+
import base64
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
|
|
|
10 |
from md_html import convert_single_md_to_html as convert_md_to_html
|
11 |
from news_analysis import fetch_deep_news, generate_value_investor_report
|
12 |
from csv_utils import detect_changes
|
13 |
|
14 |
+
# === Setup Paths ===
|
15 |
+
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
16 |
DATA_DIR = os.path.join(BASE_DIR, "data")
|
17 |
HTML_DIR = os.path.join(BASE_DIR, "html")
|
18 |
CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv")
|
|
|
20 |
os.makedirs(DATA_DIR, exist_ok=True)
|
21 |
os.makedirs(HTML_DIR, exist_ok=True)
|
22 |
|
23 |
+
# === Load .env ===
|
24 |
load_dotenv()
|
25 |
|
26 |
def build_metrics_box(topic, num_articles):
|
|
|
32 |
>
|
33 |
"""
|
34 |
|
35 |
+
def create_sentiment_chart_md(topic):
|
36 |
+
# Placeholder dummy chart
|
37 |
+
dates = pd.date_range(end=datetime.today(), periods=7)
|
38 |
+
values = [100 + i * 3 for i in range(7)]
|
39 |
+
|
40 |
+
plt.figure(figsize=(6, 3))
|
41 |
+
plt.plot(dates, values, marker='o')
|
42 |
+
plt.title(f"π Sentiment Trend: {topic}")
|
43 |
+
plt.xlabel("Date")
|
44 |
+
plt.ylabel("Sentiment")
|
45 |
+
plt.grid(True)
|
46 |
+
|
47 |
+
buffer = BytesIO()
|
48 |
+
plt.savefig(buffer, format='png')
|
49 |
+
plt.close()
|
50 |
+
buffer.seek(0)
|
51 |
+
encoded = base64.b64encode(buffer.read()).decode("utf-8")
|
52 |
+
return f""
|
53 |
+
|
54 |
+
def run_value_investing_analysis(csv_path):
|
55 |
current_df = pd.read_csv(csv_path)
|
56 |
prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
|
|
57 |
if os.path.exists(prev_path):
|
58 |
previous_df = pd.read_csv(prev_path)
|
59 |
changed_df = detect_changes(current_df, previous_df)
|
60 |
if changed_df.empty:
|
61 |
+
print("β
No changes detected. Skipping processing.")
|
|
|
62 |
return []
|
63 |
else:
|
64 |
changed_df = current_df
|
|
|
68 |
for _, row in changed_df.iterrows():
|
69 |
topic = row.get("topic")
|
70 |
timespan = row.get("timespan_days", 7)
|
71 |
+
print(f"\nπ Processing: {topic} ({timespan} days)")
|
|
|
|
|
72 |
|
73 |
news = fetch_deep_news(topic, timespan)
|
74 |
if not news:
|
75 |
+
print(f"β οΈ No news found for: {topic}")
|
|
|
76 |
continue
|
77 |
|
|
|
|
|
|
|
78 |
report_body = generate_value_investor_report(topic, news)
|
79 |
+
chart_md = create_sentiment_chart_md(topic)
|
|
|
|
|
|
|
80 |
|
81 |
metrics_md = build_metrics_box(topic, len(news))
|
82 |
+
full_md = metrics_md + report_body + "\n\n" + chart_md
|
83 |
|
84 |
base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
|
85 |
filename = base_filename + ".md"
|
|
|
91 |
filepath = os.path.join(DATA_DIR, filename)
|
92 |
counter += 1
|
93 |
|
|
|
|
|
|
|
94 |
with open(filepath, "w", encoding="utf-8") as f:
|
95 |
f.write(full_md)
|
96 |
|
97 |
new_md_files.append(filepath)
|
98 |
|
99 |
+
print(f"β
Markdown saved to: {DATA_DIR}")
|
|
|
|
|
100 |
current_df.to_csv(prev_path, index=False)
|
101 |
return new_md_files
|
102 |
|
103 |
+
def run_pipeline(csv_path, tavily_api_key):
|
104 |
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
105 |
|
106 |
+
new_md_files = run_value_investing_analysis(csv_path)
|
107 |
new_html_paths = []
|
108 |
|
109 |
for md_path in new_md_files:
|
|
|
|
|
|
|
110 |
convert_md_to_html(md_path, HTML_DIR)
|
111 |
html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
112 |
new_html_paths.append(html_path)
|
|
|
119 |
convert_md_to_html(md, HTML_DIR)
|
120 |
print(f"π All reports converted to HTML at: {HTML_DIR}")
|
121 |
|
122 |
+
|
123 |
# import os
|
124 |
# import sys
|
125 |
# from datetime import datetime
|
126 |
# from dotenv import load_dotenv
|
127 |
+
# import pandas as pd
|
128 |
|
129 |
# from image_search import search_unsplash_image
|
130 |
# from md_html import convert_single_md_to_html as convert_md_to_html
|
131 |
# from news_analysis import fetch_deep_news, generate_value_investor_report
|
|
|
|
|
132 |
# from csv_utils import detect_changes
|
133 |
|
|
|
134 |
# # Setup paths
|
135 |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
|
136 |
# DATA_DIR = os.path.join(BASE_DIR, "data")
|
|
|
152 |
# >
|
153 |
# """
|
154 |
|
155 |
+
# def run_value_investing_analysis(csv_path, progress_callback=None):
|
156 |
# current_df = pd.read_csv(csv_path)
|
157 |
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
158 |
+
|
159 |
# if os.path.exists(prev_path):
|
160 |
# previous_df = pd.read_csv(prev_path)
|
161 |
# changed_df = detect_changes(current_df, previous_df)
|
162 |
# if changed_df.empty:
|
163 |
+
# if progress_callback:
|
164 |
+
# progress_callback("β
No changes detected. Skipping processing.")
|
165 |
# return []
|
166 |
# else:
|
167 |
# changed_df = current_df
|
|
|
171 |
# for _, row in changed_df.iterrows():
|
172 |
# topic = row.get("topic")
|
173 |
# timespan = row.get("timespan_days", 7)
|
174 |
+
|
175 |
+
# if progress_callback:
|
176 |
+
# progress_callback(f"π Processing: {topic} ({timespan} days)")
|
177 |
|
178 |
# news = fetch_deep_news(topic, timespan)
|
179 |
# if not news:
|
180 |
+
# if progress_callback:
|
181 |
+
# progress_callback(f"β οΈ No news found for: {topic}")
|
182 |
# continue
|
183 |
|
184 |
+
# if progress_callback:
|
185 |
+
# progress_callback(f"π§ Analyzing news for: {topic}")
|
186 |
|
187 |
+
# report_body = generate_value_investor_report(topic, news)
|
|
|
188 |
|
189 |
+
# # Use placeholder image instead of API call
|
190 |
+
# image_url = "https://via.placeholder.com/1281x721?text=No+Image"
|
191 |
+
# image_credit = "Image unavailable"
|
192 |
|
193 |
# metrics_md = build_metrics_box(topic, len(news))
|
194 |
# full_md = metrics_md + report_body
|
|
|
203 |
# filepath = os.path.join(DATA_DIR, filename)
|
204 |
# counter += 1
|
205 |
|
206 |
+
# if progress_callback:
|
207 |
+
# progress_callback(f"π Saving markdown for: {topic}")
|
208 |
+
|
209 |
# with open(filepath, "w", encoding="utf-8") as f:
|
210 |
# f.write(full_md)
|
211 |
|
212 |
# new_md_files.append(filepath)
|
213 |
|
214 |
+
# if progress_callback:
|
215 |
+
# progress_callback(f"β
Markdown reports saved to: `{DATA_DIR}`")
|
216 |
+
|
217 |
# current_df.to_csv(prev_path, index=False)
|
218 |
# return new_md_files
|
219 |
|
220 |
+
# def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
|
|
221 |
# os.environ["TAVILY_API_KEY"] = tavily_api_key
|
222 |
|
223 |
+
# new_md_files = run_value_investing_analysis(csv_path, progress_callback)
|
224 |
# new_html_paths = []
|
225 |
|
226 |
# for md_path in new_md_files:
|
227 |
+
# if progress_callback:
|
228 |
+
# progress_callback(f"π Converting to HTML: {os.path.basename(md_path)}")
|
229 |
+
|
230 |
# convert_md_to_html(md_path, HTML_DIR)
|
231 |
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
232 |
# new_html_paths.append(html_path)
|
233 |
|
234 |
# return new_html_paths
|
235 |
|
|
|
236 |
# if __name__ == "__main__":
|
237 |
# md_files = run_value_investing_analysis(CSV_PATH)
|
238 |
# for md in md_files:
|
239 |
# convert_md_to_html(md, HTML_DIR)
|
240 |
# print(f"π All reports converted to HTML at: {HTML_DIR}")
|
241 |
|
|