|
import gradio as gr |
|
import pandas as pd |
|
import plotly.express as px |
|
|
|
|
|
from prophet import Prophet |
|
|
|
|
|
URL_DASHA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Dasha_2025/main/messages.csv" |
|
URL_LERA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Lera_2025/main/messages.csv" |
|
URL_SVETA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Sveta_2025/main/messages.csv" |
|
|
|
def read_and_process_data(url, user_name): |
|
""" |
|
Возвращает: |
|
1) unique_count: количество уникальных SMS (по gender, generation, industry, opf) |
|
2) df_daily: дата, пользователь, дневное кол-во уникальных SMS (НЕ накопленное), |
|
но уже после удаления дубликатов по 4 столбцам. |
|
""" |
|
df = pd.read_csv(url, na_values=["Не выбрано"]) |
|
cols = ["gender", "generation", "industry", "opf", "timestamp"] |
|
df = df[[c for c in cols if c in df.columns]].copy() |
|
|
|
|
|
df_unique = df.drop_duplicates(subset=["gender", "generation", "industry", "opf"]) |
|
|
|
|
|
unique_count = len(df_unique) |
|
|
|
|
|
if "timestamp" in df_unique.columns: |
|
df_unique["timestamp"] = pd.to_numeric(df_unique["timestamp"], errors='coerce') |
|
df_unique["date"] = pd.to_datetime(df_unique["timestamp"], unit="s", origin="unix", errors='coerce').dt.date |
|
else: |
|
df_unique["date"] = pd.NaT |
|
|
|
|
|
df_daily = df_unique.groupby("date").size().reset_index(name="count") |
|
df_daily["user"] = user_name |
|
|
|
return unique_count, df_daily |
|
|
|
def process_data(): |
|
|
|
dasha_count, dasha_daily = read_and_process_data(URL_DASHA, "Даша") |
|
lera_count, lera_daily = read_and_process_data(URL_LERA, "Лера") |
|
sveta_count, sveta_daily = read_and_process_data(URL_SVETA, "Света") |
|
|
|
|
|
total_count = dasha_count + lera_count + sveta_count |
|
|
|
|
|
dasha_percent = round((dasha_count / 234) * 100) if 234 else 0 |
|
lera_percent = round((lera_count / 234) * 100) if 234 else 0 |
|
sveta_percent = round((sveta_count / 234) * 100) if 234 else 0 |
|
total_percent = round((total_count / 702) * 100) if 702 else 0 |
|
|
|
|
|
def get_progress_bar(label, abs_val, pct): |
|
return f""" |
|
<div style='margin-bottom: 1em;'> |
|
<div><strong>{label}</strong></div> |
|
<div style='width: 100%; background-color: #ddd; text-align: left;'> |
|
<div style='width: {pct}%; background-color: #4CAF50; padding: 5px 0;'> |
|
{abs_val} SMS ({pct}%) |
|
</div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
bars_html = ( |
|
get_progress_bar("Даша", dasha_count, dasha_percent) + |
|
get_progress_bar("Лера", lera_count, lera_percent) + |
|
get_progress_bar("Света", sveta_count, sveta_percent) + |
|
get_progress_bar("Всего", total_count, total_percent) |
|
) |
|
|
|
|
|
daily_all = pd.concat([dasha_daily, lera_daily, sveta_daily], ignore_index=True) |
|
|
|
daily_all = daily_all.dropna(subset=["date"]) |
|
|
|
|
|
|
|
daily_all = daily_all.sort_values(by=["user", "date"]) |
|
|
|
daily_all["cumulative"] = daily_all.groupby("user")["count"].cumsum() |
|
|
|
|
|
|
|
total_by_date = daily_all.groupby("date")["count"].sum().reset_index(name="count") |
|
total_by_date = total_by_date.sort_values(by="date") |
|
total_by_date["cumulative"] = total_by_date["count"].cumsum() |
|
total_by_date["user"] = "Всего" |
|
|
|
|
|
daily_all_final = pd.concat([daily_all, total_by_date], ignore_index=True) |
|
|
|
|
|
|
|
fig = px.line( |
|
daily_all_final, |
|
x="date", |
|
y="cumulative", |
|
color="user", |
|
title="Кумулятивное количество уникальных SMS по датам (с линией 'Всего')", |
|
labels={ |
|
"date": "Дата", |
|
"cumulative": "Накопительное количество SMS", |
|
"user": "Пользователь" |
|
} |
|
) |
|
|
|
|
|
|
|
forecast_fig = None |
|
if len(total_by_date) > 1: |
|
|
|
df_prophet = total_by_date[["date", "cumulative"]].copy() |
|
df_prophet.columns = ["ds", "y"] |
|
df_prophet["ds"] = pd.to_datetime(df_prophet["ds"]) |
|
|
|
|
|
model = Prophet() |
|
model.fit(df_prophet) |
|
|
|
|
|
future = model.make_future_dataframe(periods=0) |
|
last_date = df_prophet["ds"].max() |
|
|
|
end_date = pd.to_datetime("2025-02-28") |
|
additional_days = (end_date - last_date).days |
|
if additional_days > 0: |
|
future = model.make_future_dataframe(periods=additional_days) |
|
|
|
|
|
forecast = model.predict(future) |
|
|
|
|
|
|
|
|
|
|
|
df_plot = pd.merge( |
|
forecast[["ds", "yhat"]], |
|
df_prophet[["ds", "y"]], |
|
on="ds", |
|
how="left" |
|
) |
|
|
|
|
|
|
|
|
|
df_history = df_plot.dropna(subset=["y"]) |
|
df_future = df_plot[df_plot["y"].isna()] |
|
|
|
forecast_fig = px.line( |
|
df_history, |
|
x="ds", |
|
y="y", |
|
title="Прогноз общего кумулятивного количества SMS до 28.02.2025", |
|
labels={ |
|
"ds": "Дата", |
|
"value": "Количество SMS" |
|
} |
|
) |
|
forecast_fig.add_scatter( |
|
x=df_future["ds"], |
|
y=df_future["yhat"], |
|
mode="lines", |
|
name="Прогноз", |
|
line=dict(dash="dash", color="red") |
|
) |
|
forecast_fig.update_layout( |
|
showlegend=True, |
|
legend=dict(x=0, y=1) |
|
) |
|
|
|
return (bars_html, fig, forecast_fig) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h2>Подсчёт и прогноз уникальных SMS (Даша, Лера, Света)</h2>") |
|
btn = gr.Button("Обновить данные и показать результат") |
|
html_output = gr.HTML(label="Прогресс-бары: количество SMS и %") |
|
plot_output = gr.Plot(label="Кумулятивный график по датам (Даша, Лера, Света, Всего)") |
|
forecast_output = gr.Plot(label="Прогноз до 28.02.2025 (Всего)") |
|
|
|
btn.click(fn=process_data, outputs=[html_output, plot_output, forecast_output]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |