Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
-
import
|
4 |
from prophet import Prophet
|
|
|
|
|
5 |
|
6 |
# Ссылки на CSV-файлы
|
7 |
URL_DASHA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Dasha_2025/main/messages.csv"
|
@@ -10,41 +12,48 @@ URL_SVETA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Sveta_2025/
|
|
10 |
|
11 |
def read_and_process_data(url, user_name):
|
12 |
"""
|
|
|
|
|
|
|
13 |
Возвращает:
|
14 |
-
|
15 |
-
|
16 |
-
уже после удаления дубликатов по 4 столбцам.
|
17 |
"""
|
|
|
18 |
df = pd.read_csv(url, na_values=["Не выбрано"])
|
|
|
|
|
19 |
cols = ["gender", "generation", "industry", "opf", "timestamp"]
|
20 |
df = df[[c for c in cols if c in df.columns]].copy()
|
|
|
21 |
|
22 |
-
#
|
23 |
-
df_unique = df.drop_duplicates(subset=["gender", "generation", "industry", "opf"])
|
24 |
-
|
25 |
-
# Количество уникальных SMS
|
26 |
-
unique_count = len(df_unique)
|
27 |
|
28 |
# Преобразуем timestamp -> date
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
|
35 |
-
#
|
36 |
df_daily = df_unique.groupby("date").size().reset_index(name="count")
|
37 |
df_daily["user"] = user_name
|
38 |
-
|
39 |
return unique_count, df_daily
|
40 |
|
41 |
-
|
|
|
42 |
"""
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
Возвращает DataFrame
|
47 |
-
|
|
|
48 |
"""
|
49 |
if total_by_date.empty:
|
50 |
return pd.DataFrame(columns=["ds", "yhat"])
|
@@ -52,45 +61,44 @@ def make_average_forecast(total_by_date, end_date_str="2025-02-28"):
|
|
52 |
df_tmp = total_by_date.copy()
|
53 |
df_tmp["date"] = pd.to_datetime(df_tmp["date"])
|
54 |
|
55 |
-
# Средний дневной прирост (столбец "count") по всем дням
|
56 |
avg_inc = df_tmp["count"].mean() if len(df_tmp) else 0
|
57 |
-
|
58 |
last_date = df_tmp["date"].max()
|
59 |
last_cumulative = df_tmp["cumulative"].iloc[-1]
|
60 |
|
61 |
end_date = pd.to_datetime(end_date_str)
|
62 |
|
63 |
-
# Движемся по календарю день за днём
|
64 |
-
current_date = last_date
|
65 |
forecast_data = []
|
66 |
running_total = last_cumulative
|
|
|
67 |
|
68 |
while current_date < end_date:
|
69 |
current_date += pd.Timedelta(days=1)
|
70 |
if current_date > end_date:
|
71 |
break
|
72 |
-
|
73 |
running_total += avg_inc
|
74 |
forecast_data.append({"ds": current_date, "yhat": running_total})
|
75 |
|
76 |
return pd.DataFrame(forecast_data)
|
77 |
|
|
|
78 |
def process_data():
|
79 |
-
|
|
|
|
|
80 |
dasha_count, dasha_daily = read_and_process_data(URL_DASHA, "Даша")
|
81 |
lera_count, lera_daily = read_and_process_data(URL_LERA, "Лера")
|
82 |
sveta_count, sveta_daily = read_and_process_data(URL_SVETA, "Света")
|
83 |
|
84 |
-
# Сумма
|
85 |
total_count = dasha_count + lera_count + sveta_count
|
|
|
86 |
|
87 |
-
#
|
88 |
dasha_percent = round((dasha_count / 234) * 100) if 234 else 0
|
89 |
lera_percent = round((lera_count / 234) * 100) if 234 else 0
|
90 |
sveta_percent = round((sveta_count / 234) * 100) if 234 else 0
|
91 |
total_percent = round((total_count / 702) * 100) if 702 else 0
|
92 |
|
93 |
-
# Генерируем HTML для прогресс-баров
|
94 |
def get_progress_bar(label, abs_val, pct):
|
95 |
capacity = 234 if label in ["Даша", "Лера", "Света"] else 702
|
96 |
return f"""
|
@@ -103,7 +111,6 @@ def process_data():
|
|
103 |
</div>
|
104 |
</div>
|
105 |
"""
|
106 |
-
|
107 |
bars_html = (
|
108 |
get_progress_bar("Даша", dasha_count, dasha_percent) +
|
109 |
get_progress_bar("Лера", lera_count, lera_percent) +
|
@@ -111,124 +118,140 @@ def process_data():
|
|
111 |
get_progress_bar("Всего", total_count, total_percent)
|
112 |
)
|
113 |
|
114 |
-
#
|
115 |
daily_all = pd.concat([dasha_daily, lera_daily, sveta_daily], ignore_index=True)
|
116 |
-
daily_all = daily_all.dropna(subset=["date"])
|
117 |
-
|
118 |
-
# Считаем кумулятивное значение для каждого пользователя
|
119 |
-
daily_all = daily_all.sort_values(by=["user", "date"])
|
120 |
daily_all["cumulative"] = daily_all.groupby("user")["count"].cumsum()
|
121 |
|
122 |
# «Всего»
|
123 |
total_by_date = daily_all.groupby("date")["count"].sum().reset_index(name="count")
|
124 |
-
total_by_date = total_by_date.sort_values(
|
125 |
total_by_date["cumulative"] = total_by_date["count"].cumsum()
|
126 |
total_by_date["user"] = "Всего"
|
127 |
|
128 |
-
#
|
129 |
daily_all_final = pd.concat([daily_all, total_by_date], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
#
|
132 |
-
|
133 |
-
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
}
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
x="
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
)
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
|
155 |
-
|
|
|
|
|
156 |
|
157 |
-
#
|
158 |
-
|
159 |
-
|
160 |
-
df_prophet.columns = ["ds", "y"]
|
161 |
-
df_prophet["ds"] = pd.to_datetime(df_prophet["ds"])
|
162 |
-
|
163 |
-
# Прогноз Prophet
|
164 |
-
model = Prophet()
|
165 |
-
model.fit(df_prophet)
|
166 |
-
|
167 |
-
end_date = pd.to_datetime("2025-02-28")
|
168 |
-
last_date = df_prophet["ds"].max()
|
169 |
-
additional_days = (end_date - last_date).days
|
170 |
-
|
171 |
-
future = model.make_future_dataframe(periods=0) # если уже после
|
172 |
-
if additional_days > 0:
|
173 |
-
future = model.make_future_dataframe(periods=additional_days)
|
174 |
-
|
175 |
-
forecast = model.predict(future)
|
176 |
-
|
177 |
-
# Совмещаем
|
178 |
-
df_plot = pd.merge(
|
179 |
-
forecast[["ds", "yhat"]],
|
180 |
-
df_prophet[["ds", "y"]],
|
181 |
-
on="ds",
|
182 |
-
how="left"
|
183 |
-
)
|
184 |
-
df_history = df_plot.dropna(subset=["y"])
|
185 |
-
df_future = df_plot[df_plot["y"].isna()]
|
186 |
-
|
187 |
-
# Прогноз по среднему (без учёта выходных — т. е. на каждый календарный день)
|
188 |
-
df_avg = make_average_forecast(total_by_date, "2025-02-28")
|
189 |
-
|
190 |
-
# Общий график для сравнения
|
191 |
-
forecast_fig = px.line(
|
192 |
-
df_history,
|
193 |
-
x="ds",
|
194 |
-
y="y",
|
195 |
-
title="Прогноз до конца февраля 2025 (всего)",
|
196 |
-
labels={"ds": "Дата", "y": "Накопленное число SMS"}
|
197 |
-
)
|
198 |
-
# Prophet-пунктир
|
199 |
-
forecast_fig.add_scatter(
|
200 |
-
x=df_future["ds"],
|
201 |
-
y=df_future["yhat"],
|
202 |
-
mode="lines",
|
203 |
-
name="Прогноз (Prophet)",
|
204 |
-
line=dict(dash="dash", color="red")
|
205 |
-
)
|
206 |
-
|
207 |
-
# Средний-пунктир
|
208 |
-
if not df_avg.empty:
|
209 |
-
forecast_fig.add_scatter(
|
210 |
-
x=df_avg["ds"],
|
211 |
-
y=df_avg["yhat"],
|
212 |
-
mode="lines",
|
213 |
-
name="Прогноз (по среднему)",
|
214 |
-
line=dict(dash="dash", color="green")
|
215 |
-
)
|
216 |
-
|
217 |
-
forecast_fig.update_layout(showlegend=True)
|
218 |
|
219 |
-
# Возвращаем всё в Gradio
|
220 |
-
# return (bars_html, fig, forecast_fig)
|
221 |
-
return (bars_html)
|
222 |
|
|
|
223 |
with gr.Blocks() as demo:
|
224 |
-
gr.Markdown("<h2>Количество сохраненных SMS (
|
225 |
btn = gr.Button("Обновить данные и показать результат")
|
|
|
226 |
html_output = gr.HTML(label="Прогресс-бары: количество SMS и %")
|
227 |
-
|
228 |
-
|
229 |
|
230 |
-
#
|
231 |
-
btn.click(
|
|
|
|
|
|
|
232 |
|
233 |
if __name__ == "__main__":
|
234 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
from prophet import Prophet
|
5 |
+
import io
|
6 |
+
from PIL import Image
|
7 |
|
8 |
# Ссылки на CSV-файлы
|
9 |
URL_DASHA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Dasha_2025/main/messages.csv"
|
|
|
12 |
|
13 |
def read_and_process_data(url, user_name):
|
14 |
"""
|
15 |
+
Считывает CSV, отбирает нужные столбцы,
|
16 |
+
удаляет дубликаты (gender, generation, industry, opf),
|
17 |
+
приводит timestamp -> date.
|
18 |
Возвращает:
|
19 |
+
- unique_count (кол-во уникальных записей)
|
20 |
+
- df_daily: [date, count, user]
|
|
|
21 |
"""
|
22 |
+
print(f"\n=== [{user_name}] чтение CSV ===")
|
23 |
df = pd.read_csv(url, na_values=["Не выбрано"])
|
24 |
+
print(f"[{user_name}] Исходное кол-во строк: {len(df)}")
|
25 |
+
|
26 |
cols = ["gender", "generation", "industry", "opf", "timestamp"]
|
27 |
df = df[[c for c in cols if c in df.columns]].copy()
|
28 |
+
print(f"[{user_name}] После отбора столбцов: {df.shape}")
|
29 |
|
30 |
+
# Удаляем дубликаты
|
31 |
+
df_unique = df.drop_duplicates(subset=["gender", "generation", "industry", "opf"]).copy()
|
32 |
+
print(f"[{user_name}] После drop_duplicates: {df_unique.shape}")
|
|
|
|
|
33 |
|
34 |
# Преобразуем timestamp -> date
|
35 |
+
df_unique["timestamp"] = pd.to_numeric(df_unique["timestamp"], errors='coerce')
|
36 |
+
df_unique["date"] = pd.to_datetime(df_unique["timestamp"], unit="s", origin="unix", errors='coerce').dt.date
|
37 |
+
|
38 |
+
count_nat = df_unique["date"].isna().sum()
|
39 |
+
print(f"[{user_name}] Кол-во NaT дат: {count_nat}")
|
40 |
+
|
41 |
+
unique_count = len(df_unique)
|
42 |
|
43 |
+
# Группировка по датам
|
44 |
df_daily = df_unique.groupby("date").size().reset_index(name="count")
|
45 |
df_daily["user"] = user_name
|
|
|
46 |
return unique_count, df_daily
|
47 |
|
48 |
+
|
49 |
+
def make_average_forecast(total_by_date, end_date_str="2025-03-15"):
|
50 |
"""
|
51 |
+
Делает «прогноз по среднему» до указанной даты (end_date_str).
|
52 |
+
Берём средний дневной прирост count и
|
53 |
+
добавляем его день за днём, не учитывая выходные.
|
54 |
+
Возвращает DataFrame: [ds, yhat]
|
55 |
+
ds - дата (Timestamp)
|
56 |
+
yhat - прогноз накопленной суммы
|
57 |
"""
|
58 |
if total_by_date.empty:
|
59 |
return pd.DataFrame(columns=["ds", "yhat"])
|
|
|
61 |
df_tmp = total_by_date.copy()
|
62 |
df_tmp["date"] = pd.to_datetime(df_tmp["date"])
|
63 |
|
|
|
64 |
avg_inc = df_tmp["count"].mean() if len(df_tmp) else 0
|
|
|
65 |
last_date = df_tmp["date"].max()
|
66 |
last_cumulative = df_tmp["cumulative"].iloc[-1]
|
67 |
|
68 |
end_date = pd.to_datetime(end_date_str)
|
69 |
|
|
|
|
|
70 |
forecast_data = []
|
71 |
running_total = last_cumulative
|
72 |
+
current_date = last_date
|
73 |
|
74 |
while current_date < end_date:
|
75 |
current_date += pd.Timedelta(days=1)
|
76 |
if current_date > end_date:
|
77 |
break
|
78 |
+
|
79 |
running_total += avg_inc
|
80 |
forecast_data.append({"ds": current_date, "yhat": running_total})
|
81 |
|
82 |
return pd.DataFrame(forecast_data)
|
83 |
|
84 |
+
|
85 |
def process_data():
|
86 |
+
print("\n=== Начинаем process_data (Seaborn + Prophet + средний) ===")
|
87 |
+
|
88 |
+
# 1) Считываем CSV
|
89 |
dasha_count, dasha_daily = read_and_process_data(URL_DASHA, "Даша")
|
90 |
lera_count, lera_daily = read_and_process_data(URL_LERA, "Лера")
|
91 |
sveta_count, sveta_daily = read_and_process_data(URL_SVETA, "Света")
|
92 |
|
|
|
93 |
total_count = dasha_count + lera_count + sveta_count
|
94 |
+
print(f"Суммарное количество (Д+Л+С): {total_count}")
|
95 |
|
96 |
+
# 2) Прогресс-бары
|
97 |
dasha_percent = round((dasha_count / 234) * 100) if 234 else 0
|
98 |
lera_percent = round((lera_count / 234) * 100) if 234 else 0
|
99 |
sveta_percent = round((sveta_count / 234) * 100) if 234 else 0
|
100 |
total_percent = round((total_count / 702) * 100) if 702 else 0
|
101 |
|
|
|
102 |
def get_progress_bar(label, abs_val, pct):
|
103 |
capacity = 234 if label in ["Даша", "Лера", "Света"] else 702
|
104 |
return f"""
|
|
|
111 |
</div>
|
112 |
</div>
|
113 |
"""
|
|
|
114 |
bars_html = (
|
115 |
get_progress_bar("Даша", dasha_count, dasha_percent) +
|
116 |
get_progress_bar("Лера", lera_count, lera_percent) +
|
|
|
118 |
get_progress_bar("Всего", total_count, total_percent)
|
119 |
)
|
120 |
|
121 |
+
# 3) Формируем общий DF
|
122 |
daily_all = pd.concat([dasha_daily, lera_daily, sveta_daily], ignore_index=True)
|
123 |
+
daily_all = daily_all.dropna(subset=["date"])
|
124 |
+
daily_all = daily_all.sort_values(["user", "date"])
|
|
|
|
|
125 |
daily_all["cumulative"] = daily_all.groupby("user")["count"].cumsum()
|
126 |
|
127 |
# «Всего»
|
128 |
total_by_date = daily_all.groupby("date")["count"].sum().reset_index(name="count")
|
129 |
+
total_by_date = total_by_date.sort_values("date")
|
130 |
total_by_date["cumulative"] = total_by_date["count"].cumsum()
|
131 |
total_by_date["user"] = "Всего"
|
132 |
|
133 |
+
# 4) Первый график: накопительное (все пользователи)
|
134 |
daily_all_final = pd.concat([daily_all, total_by_date], ignore_index=True)
|
135 |
+
daily_all_final["date_dt"] = pd.to_datetime(daily_all_final["date"])
|
136 |
+
|
137 |
+
fig1, ax1 = plt.subplots(figsize=(8,5))
|
138 |
+
sns.lineplot(
|
139 |
+
data=daily_all_final,
|
140 |
+
x="date_dt", y="cumulative", hue="user",
|
141 |
+
ax=ax1, marker="o"
|
142 |
+
)
|
143 |
+
ax1.set_title("Накопительное количество SMS")
|
144 |
+
ax1.set_xlabel("Дата")
|
145 |
+
ax1.set_ylabel("Накопленное число SMS")
|
146 |
+
fig1.autofmt_xdate(rotation=30)
|
147 |
+
|
148 |
+
buf1 = io.BytesIO()
|
149 |
+
plt.savefig(buf1, format="png")
|
150 |
+
buf1.seek(0)
|
151 |
+
image1_pil = Image.open(buf1)
|
152 |
+
|
153 |
+
# 5) Делаем «Всего» для Prophet + средний прогноз
|
154 |
+
# Готовим DataFrame для Prophet
|
155 |
+
df_prophet = total_by_date[["date", "cumulative"]].copy()
|
156 |
+
df_prophet.columns = ["ds", "y"]
|
157 |
+
df_prophet["ds"] = pd.to_datetime(df_prophet["ds"])
|
158 |
|
159 |
+
# Prophet-модель
|
160 |
+
model = Prophet()
|
161 |
+
model.fit(df_prophet)
|
162 |
|
163 |
+
# Прогноз до 15 марта 2025
|
164 |
+
end_date = pd.to_datetime("2025-03-15")
|
165 |
+
last_date = df_prophet["ds"].max()
|
166 |
+
additional_days = (end_date - last_date).days
|
167 |
+
future = model.make_future_dataframe(periods=additional_days if additional_days>0 else 0)
|
168 |
+
forecast = model.predict(future)
|
169 |
+
|
170 |
+
# Разделим историю и будущее
|
171 |
+
df_plot = pd.merge(
|
172 |
+
forecast[["ds", "yhat"]],
|
173 |
+
df_prophet[["ds", "y"]],
|
174 |
+
on="ds",
|
175 |
+
how="left"
|
176 |
+
)
|
177 |
+
df_history = df_plot.dropna(subset=["y"]).copy()
|
178 |
+
df_future = df_plot[df_plot["y"].isna()].copy()
|
179 |
+
|
180 |
+
# Прогноз по среднему
|
181 |
+
df_avg = make_average_forecast(total_by_date, "2025-03-15")
|
182 |
+
|
183 |
+
# Преобразуем для Seaborn
|
184 |
+
# История
|
185 |
+
df_history["type"] = "История"
|
186 |
+
df_history["value"] = df_history["y"]
|
187 |
+
# Prophet
|
188 |
+
df_future["type"] = "Прогноз (Prophet)"
|
189 |
+
df_future["value"] = df_future["yhat"]
|
190 |
+
|
191 |
+
# Средний
|
192 |
+
df_avg["type"] = "Прогноз (среднее)"
|
193 |
+
df_avg["value"] = df_avg["yhat"]
|
194 |
+
df_avg.rename(columns={"ds":"ds"}, inplace=True)
|
195 |
+
|
196 |
+
# Сшиваем все в один DataFrame
|
197 |
+
df_combined = pd.concat([df_history, df_future, df_avg], ignore_index=True)
|
198 |
+
|
199 |
+
# Для удобства
|
200 |
+
df_combined["ds"] = pd.to_datetime(df_combined["ds"])
|
201 |
+
|
202 |
+
# 6) Второй график: «История», «Прогноз (Prophet)», «Прогноз (среднее)» — пунктир
|
203 |
+
# Сделаем стили dashes вручную: сплошная для «История», пунктир для двух «Прогнозов»
|
204 |
+
line_styles = {
|
205 |
+
"История": "",
|
206 |
+
"Прогноз (Prophet)": (2,2), # пунктир
|
207 |
+
"Прогноз (среднее)": (2,2) # пунктир
|
208 |
+
}
|
209 |
+
line_colors = {
|
210 |
+
"История": "blue",
|
211 |
+
"Прогноз (Prophet)": "red",
|
212 |
+
"Прогноз (среднее)": "green"
|
213 |
}
|
214 |
|
215 |
+
fig2, ax2 = plt.subplots(figsize=(8,5))
|
216 |
+
sns.lineplot(
|
217 |
+
data=df_combined,
|
218 |
+
x="ds", y="value",
|
219 |
+
hue="type",
|
220 |
+
style="type",
|
221 |
+
dashes=line_styles,
|
222 |
+
palette=line_colors,
|
223 |
+
markers=False,
|
224 |
+
ax=ax2
|
225 |
)
|
226 |
+
ax2.set_title("Прогноз до середины марта 2025 (Prophet & Средний)")
|
227 |
+
ax2.set_xlabel("Дата")
|
228 |
+
ax2.set_ylabel("Накопленное число SMS (Всего)")
|
229 |
+
fig2.autofmt_xdate(rotation=30)
|
230 |
|
231 |
+
buf2 = io.BytesIO()
|
232 |
+
plt.savefig(buf2, format="png")
|
233 |
+
buf2.seek(0)
|
234 |
+
image2_pil = Image.open(buf2)
|
235 |
|
236 |
+
# 7) Возвращаем результат
|
237 |
+
# (прогресс-бары, первый график, второй график)
|
238 |
+
return bars_html, image1_pil, image2_pil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
|
|
|
|
|
|
240 |
|
241 |
+
# Gradio-интерфейс
|
242 |
with gr.Blocks() as demo:
|
243 |
+
gr.Markdown("<h2>Количество сохраненных SMS + Прогноз (Prophet и Средний)</h2>")
|
244 |
btn = gr.Button("Обновить данные и показать результат")
|
245 |
+
|
246 |
html_output = gr.HTML(label="Прогресс-бары: количество SMS и %")
|
247 |
+
image_output1 = gr.Image(type="pil", label="Накопительный график")
|
248 |
+
image_output2 = gr.Image(type="pil", label="Прогноз: Prophet & Средний")
|
249 |
|
250 |
+
# process_data возвращает (bars_html, image1_pil, image2_pil)
|
251 |
+
btn.click(
|
252 |
+
fn=process_data,
|
253 |
+
outputs=[html_output, image_output1, image_output2]
|
254 |
+
)
|
255 |
|
256 |
if __name__ == "__main__":
|
257 |
demo.launch()
|