Spaces:
Running
Running
Sigrid De los Santos
commited on
Commit
Β·
0d9c76e
1
Parent(s):
da7cc35
Add matplotlib to requirements
Browse files- src/main.py +37 -59
src/main.py
CHANGED
@@ -3,9 +3,6 @@ import sys
|
|
3 |
from datetime import datetime
|
4 |
from dotenv import load_dotenv
|
5 |
import pandas as pd
|
6 |
-
from io import BytesIO
|
7 |
-
import base64
|
8 |
-
import matplotlib.pyplot as plt
|
9 |
|
10 |
from md_html import convert_single_md_to_html as convert_md_to_html
|
11 |
from news_analysis import fetch_deep_news, generate_value_investor_report
|
@@ -32,33 +29,16 @@ def build_metrics_box(topic, num_articles):
|
|
32 |
>
|
33 |
"""
|
34 |
|
35 |
-
def
|
36 |
-
# Placeholder dummy chart
|
37 |
-
dates = pd.date_range(end=datetime.today(), periods=7)
|
38 |
-
values = [100 + i * 3 for i in range(7)]
|
39 |
-
|
40 |
-
plt.figure(figsize=(6, 3))
|
41 |
-
plt.plot(dates, values, marker='o')
|
42 |
-
plt.title(f"π Sentiment Trend: {topic}")
|
43 |
-
plt.xlabel("Date")
|
44 |
-
plt.ylabel("Sentiment")
|
45 |
-
plt.grid(True)
|
46 |
-
|
47 |
-
buffer = BytesIO()
|
48 |
-
plt.savefig(buffer, format='png')
|
49 |
-
plt.close()
|
50 |
-
buffer.seek(0)
|
51 |
-
encoded = base64.b64encode(buffer.read()).decode("utf-8")
|
52 |
-
return f""
|
53 |
-
|
54 |
-
def run_value_investing_analysis(csv_path):
|
55 |
current_df = pd.read_csv(csv_path)
|
56 |
prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
|
|
57 |
if os.path.exists(prev_path):
|
58 |
previous_df = pd.read_csv(prev_path)
|
59 |
changed_df = detect_changes(current_df, previous_df)
|
60 |
if changed_df.empty:
|
61 |
-
|
|
|
62 |
return []
|
63 |
else:
|
64 |
changed_df = current_df
|
@@ -68,18 +48,25 @@ def run_value_investing_analysis(csv_path):
|
|
68 |
for _, row in changed_df.iterrows():
|
69 |
topic = row.get("topic")
|
70 |
timespan = row.get("timespan_days", 7)
|
71 |
-
|
|
|
|
|
|
|
72 |
|
73 |
news = fetch_deep_news(topic, timespan)
|
74 |
if not news:
|
75 |
-
|
|
|
|
|
|
|
76 |
continue
|
77 |
|
78 |
report_body = generate_value_investor_report(topic, news)
|
79 |
-
|
|
|
80 |
|
81 |
metrics_md = build_metrics_box(topic, len(news))
|
82 |
-
full_md = metrics_md + report_body
|
83 |
|
84 |
base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
|
85 |
filename = base_filename + ".md"
|
@@ -96,14 +83,15 @@ def run_value_investing_analysis(csv_path):
|
|
96 |
|
97 |
new_md_files.append(filepath)
|
98 |
|
99 |
-
|
|
|
100 |
current_df.to_csv(prev_path, index=False)
|
101 |
return new_md_files
|
102 |
|
103 |
-
def run_pipeline(csv_path, tavily_api_key):
|
104 |
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
105 |
|
106 |
-
new_md_files = run_value_investing_analysis(csv_path)
|
107 |
new_html_paths = []
|
108 |
|
109 |
for md_path in new_md_files:
|
@@ -119,18 +107,19 @@ if __name__ == "__main__":
|
|
119 |
convert_md_to_html(md, HTML_DIR)
|
120 |
print(f"π All reports converted to HTML at: {HTML_DIR}")
|
121 |
|
122 |
-
|
123 |
# import os
|
124 |
# import sys
|
125 |
# from datetime import datetime
|
126 |
# from dotenv import load_dotenv
|
127 |
-
# import pandas as pd
|
128 |
|
129 |
# from image_search import search_unsplash_image
|
130 |
# from md_html import convert_single_md_to_html as convert_md_to_html
|
131 |
# from news_analysis import fetch_deep_news, generate_value_investor_report
|
|
|
|
|
132 |
# from csv_utils import detect_changes
|
133 |
|
|
|
134 |
# # Setup paths
|
135 |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
|
136 |
# DATA_DIR = os.path.join(BASE_DIR, "data")
|
@@ -152,16 +141,14 @@ if __name__ == "__main__":
|
|
152 |
# >
|
153 |
# """
|
154 |
|
155 |
-
# def run_value_investing_analysis(csv_path
|
156 |
# current_df = pd.read_csv(csv_path)
|
157 |
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
158 |
-
|
159 |
# if os.path.exists(prev_path):
|
160 |
# previous_df = pd.read_csv(prev_path)
|
161 |
# changed_df = detect_changes(current_df, previous_df)
|
162 |
# if changed_df.empty:
|
163 |
-
#
|
164 |
-
# progress_callback("β
No changes detected. Skipping processing.")
|
165 |
# return []
|
166 |
# else:
|
167 |
# changed_df = current_df
|
@@ -171,24 +158,20 @@ if __name__ == "__main__":
|
|
171 |
# for _, row in changed_df.iterrows():
|
172 |
# topic = row.get("topic")
|
173 |
# timespan = row.get("timespan_days", 7)
|
174 |
-
|
175 |
-
# if progress_callback:
|
176 |
-
# progress_callback(f"π Processing: {topic} ({timespan} days)")
|
177 |
|
178 |
# news = fetch_deep_news(topic, timespan)
|
179 |
# if not news:
|
180 |
-
#
|
181 |
-
# progress_callback(f"β οΈ No news found for: {topic}")
|
182 |
# continue
|
183 |
|
184 |
-
# if progress_callback:
|
185 |
-
# progress_callback(f"π§ Analyzing news for: {topic}")
|
186 |
-
|
187 |
# report_body = generate_value_investor_report(topic, news)
|
|
|
188 |
|
189 |
-
# #
|
190 |
-
# image_url =
|
191 |
-
|
|
|
192 |
|
193 |
# metrics_md = build_metrics_box(topic, len(news))
|
194 |
# full_md = metrics_md + report_body
|
@@ -203,39 +186,34 @@ if __name__ == "__main__":
|
|
203 |
# filepath = os.path.join(DATA_DIR, filename)
|
204 |
# counter += 1
|
205 |
|
206 |
-
# if progress_callback:
|
207 |
-
# progress_callback(f"π Saving markdown for: {topic}")
|
208 |
-
|
209 |
# with open(filepath, "w", encoding="utf-8") as f:
|
210 |
# f.write(full_md)
|
211 |
|
212 |
# new_md_files.append(filepath)
|
213 |
|
214 |
-
#
|
215 |
-
# progress_callback(f"β
Markdown reports saved to: `{DATA_DIR}`")
|
216 |
-
|
217 |
# current_df.to_csv(prev_path, index=False)
|
218 |
# return new_md_files
|
219 |
|
220 |
-
|
|
|
221 |
# os.environ["TAVILY_API_KEY"] = tavily_api_key
|
222 |
|
223 |
-
# new_md_files = run_value_investing_analysis(csv_path
|
224 |
# new_html_paths = []
|
225 |
|
226 |
# for md_path in new_md_files:
|
227 |
-
# if progress_callback:
|
228 |
-
# progress_callback(f"π Converting to HTML: {os.path.basename(md_path)}")
|
229 |
-
|
230 |
# convert_md_to_html(md_path, HTML_DIR)
|
231 |
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
232 |
# new_html_paths.append(html_path)
|
233 |
|
234 |
# return new_html_paths
|
235 |
|
|
|
236 |
# if __name__ == "__main__":
|
237 |
# md_files = run_value_investing_analysis(CSV_PATH)
|
238 |
# for md in md_files:
|
239 |
# convert_md_to_html(md, HTML_DIR)
|
240 |
# print(f"π All reports converted to HTML at: {HTML_DIR}")
|
241 |
|
|
|
|
3 |
from datetime import datetime
|
4 |
from dotenv import load_dotenv
|
5 |
import pandas as pd
|
|
|
|
|
|
|
6 |
|
7 |
from md_html import convert_single_md_to_html as convert_md_to_html
|
8 |
from news_analysis import fetch_deep_news, generate_value_investor_report
|
|
|
29 |
>
|
30 |
"""
|
31 |
|
32 |
+
def run_value_investing_analysis(csv_path, progress_callback=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
current_df = pd.read_csv(csv_path)
|
34 |
prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
35 |
+
|
36 |
if os.path.exists(prev_path):
|
37 |
previous_df = pd.read_csv(prev_path)
|
38 |
changed_df = detect_changes(current_df, previous_df)
|
39 |
if changed_df.empty:
|
40 |
+
if progress_callback:
|
41 |
+
progress_callback("β
No changes detected. Skipping processing.")
|
42 |
return []
|
43 |
else:
|
44 |
changed_df = current_df
|
|
|
48 |
for _, row in changed_df.iterrows():
|
49 |
topic = row.get("topic")
|
50 |
timespan = row.get("timespan_days", 7)
|
51 |
+
msg = f"π Processing: {topic} ({timespan} days)"
|
52 |
+
print(msg)
|
53 |
+
if progress_callback:
|
54 |
+
progress_callback(msg)
|
55 |
|
56 |
news = fetch_deep_news(topic, timespan)
|
57 |
if not news:
|
58 |
+
warning = f"β οΈ No news found for: {topic}"
|
59 |
+
print(warning)
|
60 |
+
if progress_callback:
|
61 |
+
progress_callback(warning)
|
62 |
continue
|
63 |
|
64 |
report_body = generate_value_investor_report(topic, news)
|
65 |
+
image_url = "https://via.placeholder.com/1281x721?text=No+Image+Available"
|
66 |
+
image_credit = "Image placeholder"
|
67 |
|
68 |
metrics_md = build_metrics_box(topic, len(news))
|
69 |
+
full_md = metrics_md + report_body
|
70 |
|
71 |
base_filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}"
|
72 |
filename = base_filename + ".md"
|
|
|
83 |
|
84 |
new_md_files.append(filepath)
|
85 |
|
86 |
+
if progress_callback:
|
87 |
+
progress_callback(f"β
Markdown saved to: {DATA_DIR}")
|
88 |
current_df.to_csv(prev_path, index=False)
|
89 |
return new_md_files
|
90 |
|
91 |
+
def run_pipeline(csv_path, tavily_api_key, progress_callback=None):
|
92 |
os.environ["TAVILY_API_KEY"] = tavily_api_key
|
93 |
|
94 |
+
new_md_files = run_value_investing_analysis(csv_path, progress_callback)
|
95 |
new_html_paths = []
|
96 |
|
97 |
for md_path in new_md_files:
|
|
|
107 |
convert_md_to_html(md, HTML_DIR)
|
108 |
print(f"π All reports converted to HTML at: {HTML_DIR}")
|
109 |
|
|
|
110 |
# import os
|
111 |
# import sys
|
112 |
# from datetime import datetime
|
113 |
# from dotenv import load_dotenv
|
|
|
114 |
|
115 |
# from image_search import search_unsplash_image
|
116 |
# from md_html import convert_single_md_to_html as convert_md_to_html
|
117 |
# from news_analysis import fetch_deep_news, generate_value_investor_report
|
118 |
+
|
119 |
+
# import pandas as pd
|
120 |
# from csv_utils import detect_changes
|
121 |
|
122 |
+
|
123 |
# # Setup paths
|
124 |
# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # one level up from src/
|
125 |
# DATA_DIR = os.path.join(BASE_DIR, "data")
|
|
|
141 |
# >
|
142 |
# """
|
143 |
|
144 |
+
# def run_value_investing_analysis(csv_path):
|
145 |
# current_df = pd.read_csv(csv_path)
|
146 |
# prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv")
|
|
|
147 |
# if os.path.exists(prev_path):
|
148 |
# previous_df = pd.read_csv(prev_path)
|
149 |
# changed_df = detect_changes(current_df, previous_df)
|
150 |
# if changed_df.empty:
|
151 |
+
# print("β
No changes detected. Skipping processing.")
|
|
|
152 |
# return []
|
153 |
# else:
|
154 |
# changed_df = current_df
|
|
|
158 |
# for _, row in changed_df.iterrows():
|
159 |
# topic = row.get("topic")
|
160 |
# timespan = row.get("timespan_days", 7)
|
161 |
+
# print(f"\nπ Processing: {topic} ({timespan} days)")
|
|
|
|
|
162 |
|
163 |
# news = fetch_deep_news(topic, timespan)
|
164 |
# if not news:
|
165 |
+
# print(f"β οΈ No news found for: {topic}")
|
|
|
166 |
# continue
|
167 |
|
|
|
|
|
|
|
168 |
# report_body = generate_value_investor_report(topic, news)
|
169 |
+
# from image_search import search_unsplash_image
|
170 |
|
171 |
+
# # Later inside your loop
|
172 |
+
# image_url, image_credit = search_unsplash_image(topic)
|
173 |
+
|
174 |
+
# #image_url, image_credit = search_unsplash_image(topic, os.getenv("OPENAI_API_KEY"))
|
175 |
|
176 |
# metrics_md = build_metrics_box(topic, len(news))
|
177 |
# full_md = metrics_md + report_body
|
|
|
186 |
# filepath = os.path.join(DATA_DIR, filename)
|
187 |
# counter += 1
|
188 |
|
|
|
|
|
|
|
189 |
# with open(filepath, "w", encoding="utf-8") as f:
|
190 |
# f.write(full_md)
|
191 |
|
192 |
# new_md_files.append(filepath)
|
193 |
|
194 |
+
# print(f"β
Markdown saved to: {DATA_DIR}")
|
|
|
|
|
195 |
# current_df.to_csv(prev_path, index=False)
|
196 |
# return new_md_files
|
197 |
|
198 |
+
|
199 |
+
# def run_pipeline(csv_path, tavily_api_key):
|
200 |
# os.environ["TAVILY_API_KEY"] = tavily_api_key
|
201 |
|
202 |
+
# new_md_files = run_value_investing_analysis(csv_path)
|
203 |
# new_html_paths = []
|
204 |
|
205 |
# for md_path in new_md_files:
|
|
|
|
|
|
|
206 |
# convert_md_to_html(md_path, HTML_DIR)
|
207 |
# html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html"))
|
208 |
# new_html_paths.append(html_path)
|
209 |
|
210 |
# return new_html_paths
|
211 |
|
212 |
+
|
213 |
# if __name__ == "__main__":
|
214 |
# md_files = run_value_investing_analysis(CSV_PATH)
|
215 |
# for md in md_files:
|
216 |
# convert_md_to_html(md, HTML_DIR)
|
217 |
# print(f"π All reports converted to HTML at: {HTML_DIR}")
|
218 |
|
219 |
+
|