import gradio as gr import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from prophet import Prophet import io from PIL import Image # ===================== # Первый набор CSV-файлов # ===================== URL_DASHA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Dasha_2025/main/messages.csv" URL_LERA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Lera_2025/main/messages.csv" URL_SVETA = "https://raw.githubusercontent.com/fruitpicker01/Storage_Sveta_2025/main/messages.csv" # ===================== # Второй набор CSV-файлов # ===================== URL_DASHA_2 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Dasha_2025/main/messages.csv" URL_LERA_2 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Lera_2025/main/messages.csv" URL_SVETA_2 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Sveta_2025/main/messages.csv" # ===================== # Третий набор CSV-файлов (messages_2.csv) # ===================== URL_DASHA_3 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Dasha_2025/main/messages_2.csv" URL_LERA_3 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Lera_2025/main/messages_2.csv" URL_SVETA_3 = "https://raw.githubusercontent.com/fruitpicker01/Storage_2_Sveta_2025/main/messages_2.csv" def read_and_process_data(url, user_name): """ Считывает CSV, отбирает нужные столбцы, удаляет дубликаты (gender, generation, industry, opf), приводит timestamp -> date. Возвращает: - unique_count (кол-во уникальных записей) - df_daily: [date, count, user] """ import requests, base64, io print(f"\n=== [{user_name}] чтение CSV ===") import re pattern = re.compile(r"https://raw\.githubusercontent\.com/([^/]+)/([^/]+)/([^/]+)/(.+)") m = pattern.match(url) if not m: # Если URL не совпадает с raw.githubusercontent.com, пробуем напрямую print(f"[{user_name}] URL не совпадает с raw.githubusercontent.com, читаем напрямую...") try: df = pd.read_csv(url, na_values=["Не выбрано"]) except Exception as e: print(f"[{user_name}] Ошибка при pd.read_csv напрямую: {e}") return 0, pd.DataFrame(columns=["date", "count", "user"]) else: owner = m.group(1) repo_name = m.group(2) branch = m.group(3) file_path = m.group(4) api_url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{file_path}?ref={branch}" print(f"[{user_name}] Пытаемся Contents API: {api_url}") resp = requests.get(api_url) if resp.status_code != 200: print(f"[{user_name}] Не удалось получить JSON (статус={resp.status_code}), читаем напрямую...") try: df = pd.read_csv(url, na_values=["Не выбрано"]) except Exception as e: print(f"[{user_name}] Ошибка при pd.read_csv напрямую: {e}") return 0, pd.DataFrame(columns=["date", "count", "user"]) else: data_json = resp.json() size = data_json.get("size", 0) file_content_encoded = data_json.get("content") download_url = data_json.get("download_url") if not file_content_encoded or size > 1_000_000: # Большой файл или отсутствует content => используем download_url print(f"[{user_name}] Файл крупнее 1 МБ или content отсутствует, скачиваем по download_url={download_url}") try: resp2 = requests.get(download_url) resp2.raise_for_status() csv_text = resp2.text df = pd.read_csv(io.StringIO(csv_text), na_values=["Не выбрано"]) except Exception as e: print(f"[{user_name}] Ошибка при чтении по download_url: {e}") return 0, pd.DataFrame(columns=["date", "count", "user"]) else: # Получаем Base64 и декодируем try: file_bytes = base64.b64decode(file_content_encoded) df = pd.read_csv(io.StringIO(file_bytes.decode("utf-8")), na_values=["Не выбрано"]) except Exception as e: print(f"[{user_name}] Ошибка декодирования Base64: {e}") return 0, pd.DataFrame(columns=["date", "count", "user"]) print(f"[{user_name}] Исходное кол-во строк: {len(df)}") cols = ["gender", "generation", "industry", "opf", "timestamp"] df = df[[c for c in cols if c in df.columns]].copy() print(f"[{user_name}] После отбора столбцов: {df.shape}") df_unique = df.drop_duplicates(subset=["gender", "generation", "industry", "opf"]).copy() print(f"[{user_name}] После drop_duplicates: {df_unique.shape}") df_unique["timestamp"] = pd.to_numeric(df_unique["timestamp"], errors='coerce') df_unique["date"] = pd.to_datetime(df_unique["timestamp"], unit="s", origin="unix", errors='coerce').dt.date count_nat = df_unique["date"].isna().sum() print(f"[{user_name}] Кол-во NaT дат: {count_nat}") unique_count = len(df_unique) # Группировка по датам df_daily = df_unique.groupby("date").size().reset_index(name="count") df_daily["user"] = user_name return unique_count, df_daily def make_average_forecast(total_by_date, end_date_str="2025-03-31"): """ Делает «прогноз по среднему» до указанной даты (end_date_str). Берём средний дневной прирост count и добавляем его день за днём (не учитывая выходные). Возвращает DataFrame: [ds, yhat] ds - дата (Timestamp) yhat - прогноз накопленной суммы """ if total_by_date.empty: return pd.DataFrame(columns=["ds", "yhat"]) df_tmp = total_by_date.copy() df_tmp["date"] = pd.to_datetime(df_tmp["date"]) avg_inc = df_tmp["count"].mean() if len(df_tmp) else 0 last_date = df_tmp["date"].max() last_cumulative = df_tmp["cumulative"].iloc[-1] end_date = pd.to_datetime(end_date_str) forecast_data = [] running_total = last_cumulative current_date = last_date while current_date < end_date: current_date += pd.Timedelta(days=1) if current_date > end_date: break running_total += avg_inc forecast_data.append({"ds": current_date, "yhat": running_total}) return pd.DataFrame(forecast_data) def process_data(): print("\n=== Начинаем process_data (Seaborn + Prophet + средний) ===") # ====== Чтение данных (первый набор) ====== dasha_count, dasha_daily = read_and_process_data(URL_DASHA, "Даша") lera_count, lera_daily = read_and_process_data(URL_LERA, "Лера") sveta_count, sveta_daily = read_and_process_data(URL_SVETA, "Света") # ====== Чтение (второй набор) ====== try: dasha_count2, dasha_daily2 = read_and_process_data(URL_DASHA_2, "Даша (2)") dasha_daily2["user"] = "Даша" except Exception as e: print(f"[Даша (2)] Ошибка при чтении дополнительного CSV: {e}") dasha_count2, dasha_daily2 = 0, pd.DataFrame(columns=["date", "count", "user"]) try: lera_count2, lera_daily2 = read_and_process_data(URL_LERA_2, "Лера (2)") lera_daily2["user"] = "Лера" except Exception as e: print(f"[Лера (2)] Ошибка при чтении дополнительного CSV: {e}") lera_count2, lera_daily2 = 0, pd.DataFrame(columns=["date", "count", "user"]) try: sveta_count2, sveta_daily2 = read_and_process_data(URL_SVETA_2, "Света (2)") sveta_daily2["user"] = "Света" except Exception as e: print(f"[Света (2)] Ошибка при чтении дополнительного CSV: {e}") sveta_count2, sveta_daily2 = 0, pd.DataFrame(columns=["date", "count", "user"]) # ====== Чтение (третий набор: messages_2.csv) ====== try: dasha_count3, dasha_daily3 = read_and_process_data(URL_DASHA_3, "Даша (3)") # Объединяем с "Дашей" dasha_daily3["user"] = "Даша" except Exception as e: print(f"[Даша (3)] Ошибка при чтении messages_2.csv: {e}") dasha_count3, dasha_daily3 = 0, pd.DataFrame(columns=["date", "count", "user"]) try: lera_count3, lera_daily3 = read_and_process_data(URL_LERA_3, "Лера (3)") lera_daily3["user"] = "Лера" except Exception as e: print(f"[Лера (3)] Ошибка при чтении messages_2.csv: {e}") lera_count3, lera_daily3 = 0, pd.DataFrame(columns=["date", "count", "user"]) try: sveta_count3, sveta_daily3 = read_and_process_data(URL_SVETA_3, "Света (3)") sveta_daily3["user"] = "Света" except Exception as e: print(f"[Света (3)] Ошибка при чтении messages_2.csv: {e}") sveta_count3, sveta_daily3 = 0, pd.DataFrame(columns=["date", "count", "user"]) # ====== Итоговые суммы ====== dasha_count_total = dasha_count + dasha_count2 + dasha_count3 lera_count_total = lera_count + lera_count2 + lera_count3 sveta_count_total = sveta_count + sveta_count2 + sveta_count3 dasha_daily_total = pd.concat([dasha_daily, dasha_daily2, dasha_daily3], ignore_index=True) lera_daily_total = pd.concat([lera_daily, lera_daily2, lera_daily3 ], ignore_index=True) sveta_daily_total = pd.concat([sveta_daily, sveta_daily2, sveta_daily3], ignore_index=True) total_count = dasha_count_total + lera_count_total + sveta_count_total print(f"Суммарное количество (Д+Л+С): {total_count}") # ====== Проценты ====== dasha_percent = round((dasha_count_total / 234) * 100) if 234 else 0 lera_percent = round((lera_count_total / 234) * 100) if 234 else 0 sveta_percent = round((sveta_count_total / 234) * 100) if 234 else 0 total_percent = round((total_count / 702) * 100) if 702 else 0 def get_progress_bar(label, abs_val, pct): capacity = 234 if label in ["Даша", "Лера", "Света"] else 702 return f"""