|
import gradio as gr |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from prophet import Prophet |
|
import io |
|
from PIL import Image |
|
|
|
|
|
URL_DASHA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Dasha_2025/main/messages.csv" |
|
URL_LERA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Lera_2025/main/messages.csv" |
|
URL_SVETA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Sveta_2025/main/messages.csv" |
|
|
|
|
|
URL_DASHA_2 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Dasha_2025/main/messages.csv" |
|
URL_LERA_2 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Lera_2025/main/messages.csv" |
|
URL_SVETA_2 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Sveta_2025/main/messages.csv" |
|
|
|
def read_and_process_data(url, user_name): |
|
""" |
|
Считывает CSV, отбирает нужные столбцы, |
|
удаляет дубликаты (gender, generation, industry, opf), |
|
приводит timestamp -> date. |
|
Возвращает: |
|
- unique_count (кол-во уникальных записей) |
|
- df_daily: [date, count, user] |
|
""" |
|
import requests, base64, io |
|
|
|
print(f"\n=== [{user_name}] чтение CSV ===") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import re |
|
|
|
pattern = re.compile(r"https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)") |
|
m = pattern.match(url) |
|
if not m: |
|
|
|
print(f"[{user_name}] URL не совпадает с raw.githubusercontent.com, читаем напрямую...") |
|
df = pd.read_csv(url, na_values=["Не выбрано"]) |
|
else: |
|
owner = m.group(1) |
|
repo_name = m.group(2) |
|
branch = m.group(3) |
|
file_path = m.group(4) |
|
|
|
|
|
api_url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{file_path}?ref={branch}" |
|
print(f"[{user_name}] Пытаемся Contents API: {api_url}") |
|
resp = requests.get(api_url) |
|
if resp.status_code != 200: |
|
print(f"[{user_name}] Не удалось получить JSON (статус={resp.status_code}), читаем напрямую...") |
|
df = pd.read_csv(url, na_values=["Не выбрано"]) |
|
else: |
|
data_json = resp.json() |
|
size = data_json.get("size", 0) |
|
file_content_encoded = data_json.get("content") |
|
download_url = data_json.get("download_url") |
|
|
|
if not file_content_encoded or size > 1_000_000: |
|
|
|
print(f"[{user_name}] Файл крупнее 1 МБ или content отсутствует, скачиваем по download_url={download_url}") |
|
resp2 = requests.get(download_url) |
|
resp2.raise_for_status() |
|
csv_text = resp2.text |
|
df = pd.read_csv(io.StringIO(csv_text), na_values=["Не выбрано"]) |
|
else: |
|
|
|
file_bytes = base64.b64decode(file_content_encoded) |
|
df = pd.read_csv(io.StringIO(file_bytes.decode("utf-8")), na_values=["Не выбрано"]) |
|
|
|
print(f"[{user_name}] Исходное кол-во строк: {len(df)}") |
|
|
|
|
|
cols = ["gender", "generation", "industry", "opf", "timestamp"] |
|
df = df[[c for c in cols if c in df.columns]].copy() |
|
print(f"[{user_name}] После отбора столбцов: {df.shape}") |
|
|
|
df_unique = df.drop_duplicates(subset=["gender", "generation", "industry", "opf"]).copy() |
|
print(f"[{user_name}] После drop_duplicates: {df_unique.shape}") |
|
|
|
df_unique["timestamp"] = pd.to_numeric(df_unique["timestamp"], errors='coerce') |
|
df_unique["date"] = pd.to_datetime(df_unique["timestamp"], unit="s", origin="unix", errors='coerce').dt.date |
|
|
|
count_nat = df_unique["date"].isna().sum() |
|
print(f"[{user_name}] Кол-во NaT дат: {count_nat}") |
|
|
|
unique_count = len(df_unique) |
|
|
|
|
|
df_daily = df_unique.groupby("date").size().reset_index(name="count") |
|
df_daily["user"] = user_name |
|
|
|
return unique_count, df_daily |
|
|
|
|
|
def make_average_forecast(total_by_date, end_date_str="2025-03-31"): |
|
""" |
|
Делает «прогноз по среднему» до указанной даты (end_date_str). |
|
Берём средний дневной прирост count и |
|
добавляем его день за днём (не учитывая выходные). |
|
Возвращает DataFrame: [ds, yhat] |
|
ds - дата (Timestamp) |
|
yhat - прогноз накопленной суммы |
|
""" |
|
if total_by_date.empty: |
|
return pd.DataFrame(columns=["ds", "yhat"]) |
|
|
|
df_tmp = total_by_date.copy() |
|
df_tmp["date"] = pd.to_datetime(df_tmp["date"]) |
|
|
|
avg_inc = df_tmp["count"].mean() if len(df_tmp) else 0 |
|
last_date = df_tmp["date"].max() |
|
last_cumulative = df_tmp["cumulative"].iloc[-1] |
|
|
|
end_date = pd.to_datetime(end_date_str) |
|
|
|
forecast_data = [] |
|
running_total = last_cumulative |
|
current_date = last_date |
|
|
|
while current_date < end_date: |
|
current_date += pd.Timedelta(days=1) |
|
if current_date > end_date: |
|
break |
|
|
|
running_total += avg_inc |
|
forecast_data.append({"ds": current_date, "yhat": running_total}) |
|
|
|
return pd.DataFrame(forecast_data) |
|
|
|
|
|
def process_data(): |
|
print("\n=== Начинаем process_data (Seaborn + Prophet + средний) ===") |
|
|
|
|
|
dasha_count, dasha_daily = read_and_process_data(URL_DASHA, "Даша") |
|
lera_count, lera_daily = read_and_process_data(URL_LERA, "Лера") |
|
sveta_count, sveta_daily = read_and_process_data(URL_SVETA, "Света") |
|
|
|
|
|
try: |
|
dasha_count2, dasha_daily2 = read_and_process_data(URL_DASHA_2, "Даша (2)") |
|
dasha_daily2["user"] = "Даша" |
|
except Exception as e: |
|
print(f"[Даша (2)] Ошибка при чтении дополнительного CSV: {e}") |
|
dasha_count2, dasha_daily2 = 0, pd.DataFrame(columns=["date", "count", "user"]) |
|
|
|
try: |
|
lera_count2, lera_daily2 = read_and_process_data(URL_LERA_2, "Лера (2)") |
|
|
|
lera_daily2["user"] = "Лера" |
|
except Exception as e: |
|
print(f"[Лера (2)] Ошибка при чтении дополнительного CSV: {e}") |
|
lera_count2, lera_daily2 = 0, pd.DataFrame(columns=["date", "count", "user"]) |
|
|
|
try: |
|
sveta_count2, sveta_daily2 = read_and_process_data(URL_SVETA_2, "Света (2)") |
|
sveta_daily2["user"] = "Света" |
|
except Exception as e: |
|
print(f"[Света (2)] Ошибка при чтении дополнительного CSV: {e}") |
|
sveta_count2, sveta_daily2 = 0, pd.DataFrame(columns=["date", "count", "user"]) |
|
|
|
|
|
dasha_count_total = dasha_count + dasha_count2 |
|
lera_count_total = lera_count + lera_count2 |
|
sveta_count_total = sveta_count + sveta_count2 |
|
|
|
dasha_daily_total = pd.concat([dasha_daily, dasha_daily2], ignore_index=True) |
|
lera_daily_total = pd.concat([lera_daily, lera_daily2], ignore_index=True) |
|
sveta_daily_total = pd.concat([sveta_daily, sveta_daily2], ignore_index=True) |
|
|
|
total_count = dasha_count_total + lera_count_total + sveta_count_total |
|
print(f"Суммарное количество (Д+Л+С): {total_count}") |
|
|
|
|
|
|
|
dasha_percent = round((dasha_count_total / 234) * 100) if 234 else 0 |
|
lera_percent = round((lera_count_total / 234) * 100) if 234 else 0 |
|
sveta_percent = round((sveta_count_total / 234) * 100) if 234 else 0 |
|
total_percent = round((total_count / 702) * 100) if 702 else 0 |
|
|
|
def get_progress_bar(label, abs_val, pct): |
|
capacity = 234 if label in ["Даша", "Лера", "Света"] else 702 |
|
return f""" |
|
<div style='margin-bottom: 1em;'> |
|
<div><strong>{label}</strong></div> |
|
<div style='width: 100%; background-color: #ddd; text-align: left;'> |
|
<div style='width: {pct}%; background-color: #4CAF50; padding: 5px 0;'> |
|
{abs_val} SMS ({pct}% из {capacity}) |
|
</div> |
|
</div> |
|
</div> |
|
""" |
|
bars_html = ( |
|
get_progress_bar("Даша", dasha_count_total, dasha_percent) + |
|
get_progress_bar("Лера", lera_count_total, lera_percent) + |
|
get_progress_bar("Света", sveta_count_total, sveta_percent) + |
|
get_progress_bar("Всего", total_count, total_percent) |
|
) |
|
|
|
|
|
daily_all = pd.concat([dasha_daily_total, lera_daily_total, sveta_daily_total], ignore_index=True) |
|
daily_all = daily_all.dropna(subset=["date"]) |
|
daily_all = daily_all.sort_values(["user", "date"]) |
|
|
|
|
|
daily_all["count"] = pd.to_numeric(daily_all["count"], errors="coerce").fillna(0) |
|
|
|
|
|
daily_all["cumulative"] = daily_all.groupby("user")["count"].cumsum() |
|
|
|
|
|
total_by_date = daily_all.groupby("date")["count"].sum().reset_index(name="count") |
|
total_by_date = total_by_date.sort_values("date") |
|
total_by_date["cumulative"] = total_by_date["count"].cumsum() |
|
total_by_date["user"] = "Всего" |
|
|
|
|
|
daily_all_final = pd.concat([daily_all, total_by_date], ignore_index=True) |
|
daily_all_final["date_dt"] = pd.to_datetime(daily_all_final["date"]) |
|
|
|
|
|
last_values = daily_all_final.groupby("user")["cumulative"].last().sort_values(ascending=False) |
|
sorted_users = last_values.index.tolist() |
|
|
|
fig1, ax1 = plt.subplots(figsize=(8,5)) |
|
sns.lineplot( |
|
data=daily_all_final, |
|
x="date_dt", y="cumulative", |
|
hue="user", |
|
hue_order=sorted_users, |
|
ax=ax1, marker="o" |
|
) |
|
ax1.set_title("Накопительное количество SMS") |
|
ax1.set_xlabel("Дата") |
|
ax1.set_ylabel("Накопленное число SMS") |
|
fig1.autofmt_xdate(rotation=30) |
|
|
|
buf1 = io.BytesIO() |
|
plt.savefig(buf1, format="png") |
|
buf1.seek(0) |
|
image1_pil = Image.open(buf1) |
|
|
|
|
|
df_prophet = total_by_date[["date", "cumulative"]].copy() |
|
df_prophet.columns = ["ds", "y"] |
|
df_prophet["ds"] = pd.to_datetime(df_prophet["ds"]) |
|
|
|
|
|
model = Prophet() |
|
model.fit(df_prophet) |
|
|
|
|
|
end_date = pd.to_datetime("2025-03-31") |
|
last_date = df_prophet["ds"].max() |
|
additional_days = (end_date - last_date).days |
|
future = model.make_future_dataframe(periods=additional_days if additional_days>0 else 0) |
|
forecast = model.predict(future) |
|
|
|
|
|
df_plot = pd.merge( |
|
forecast[["ds", "yhat"]], |
|
df_prophet[["ds", "y"]], |
|
on="ds", |
|
how="left" |
|
) |
|
df_history = df_plot.dropna(subset=["y"]).copy() |
|
df_future = df_plot[df_plot["y"].isna()].copy() |
|
|
|
|
|
df_avg = make_average_forecast(total_by_date, "2025-03-31") |
|
|
|
|
|
|
|
df_history["type"] = "История" |
|
df_history["value"] = df_history["y"] |
|
|
|
df_future["type"] = "Прогноз (Prophet)" |
|
df_future["value"] = df_future["yhat"] |
|
|
|
|
|
df_avg["type"] = "Прогноз (среднее)" |
|
df_avg["value"] = df_avg["yhat"] |
|
df_avg.rename(columns={"ds":"ds"}, inplace=True) |
|
|
|
|
|
df_combined = pd.concat([df_history, df_future, df_avg], ignore_index=True) |
|
|
|
|
|
df_combined["ds"] = pd.to_datetime(df_combined["ds"]) |
|
|
|
|
|
line_styles = { |
|
"История": "", |
|
"Прогноз (Prophet)": (2,2), |
|
"Прогноз (среднее)": (2,2) |
|
} |
|
line_colors = { |
|
"История": "blue", |
|
"Прогноз (Prophet)": "red", |
|
"Прогноз (среднее)": "green" |
|
} |
|
|
|
fig2, ax2 = plt.subplots(figsize=(8,5)) |
|
sns.lineplot( |
|
data=df_combined, |
|
x="ds", y="value", |
|
hue="type", |
|
style="type", |
|
dashes=line_styles, |
|
palette=line_colors, |
|
markers=False, |
|
ax=ax2 |
|
) |
|
ax2.set_title("Прогноз до конца марта 2025 (Prophet & По среднему)") |
|
ax2.set_xlabel("Дата") |
|
ax2.set_ylabel("Накопленное число SMS (Всего)") |
|
fig2.autofmt_xdate(rotation=30) |
|
|
|
buf2 = io.BytesIO() |
|
plt.savefig(buf2, format="png") |
|
buf2.seek(0) |
|
image2_pil = Image.open(buf2) |
|
|
|
|
|
return bars_html, image1_pil, image2_pil |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h2>Количество сохраненных SMS (Даша, Лера, Света, Всего) + Прогноз</h2>") |
|
|
|
btn = gr.Button("Обновить данные и показать результат") |
|
|
|
html_output = gr.HTML(label="Прогресс-бары: количество SMS и %") |
|
image_output1 = gr.Image(type="pil", label="Накопительный график") |
|
image_output2 = gr.Image(type="pil", label="Прогноз: Prophet & По среднему") |
|
|
|
btn.click(fn=process_data, outputs=[html_output, image_output1, image_output2]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |