Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from scipy import stats | |
from rapidfuzz import fuzz | |
####################################### | |
# Debug Logging Function | |
####################################### | |
def debug_print(message): | |
print(message) | |
####################################### | |
# Data Generation Functions | |
####################################### | |
def generate_case_data(num_records=5000): | |
debug_print("Generating case data...") | |
lob_list = np.random.choice(["Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], num_records) | |
issue_types = np.random.choice(["Billing", "Technical", "Hacking", "Service", "Access"], num_records) | |
advocates = np.random.choice(["Alice", "Bob", "Charlie", "Diana", "Eve"], num_records) | |
start_date = pd.Timestamp("2021-01-01") | |
end_date = pd.Timestamp("2023-12-31") | |
days_range = (end_date - start_date).days | |
# Generate random case dates over 3 years | |
case_dates = start_date + pd.to_timedelta(np.random.randint(0, days_range, num_records), unit='D') | |
# Simulated release dates per LOB (set in early 2022) | |
lob_release_dates = { | |
"Modern Life": pd.Timestamp("2022-01-01"), | |
"Xbox": pd.Timestamp("2022-02-01"), | |
"CAPS": pd.Timestamp("2022-03-01"), | |
"Devices": pd.Timestamp("2022-04-01"), | |
"Modern Work": pd.Timestamp("2022-05-01") | |
} | |
release_dates = np.array([lob_release_dates[lob] for lob in lob_list]) | |
pre_release = case_dates < release_dates | |
CSAT = np.where(pre_release, np.random.normal(80, 5, num_records), np.random.normal(85, 5, num_records)) | |
days_to_close = np.where(pre_release, np.random.normal(5, 1, num_records), np.random.normal(4, 1, num_records)) | |
first_contact_resolution = np.where(pre_release, np.random.normal(70, 8, num_records), np.random.normal(75, 8, num_records)) | |
CPI = np.where(pre_release, np.random.normal(50, 5, num_records), np.random.normal(45, 5, num_records)) | |
# For the main case dataset, we do NOT include initiative utilization columns. | |
debug_print("Case data generated.") | |
return pd.DataFrame({ | |
"serial_number": np.arange(1, num_records + 1), | |
"advocate": advocates, | |
"LOB": lob_list, | |
"issue_type": issue_types, | |
"case_date": case_dates, | |
"CSAT": CSAT, | |
"days_to_close": days_to_close, | |
"first_contact_resolution": first_contact_resolution, | |
"CPI": CPI | |
}) | |
def generate_advocate_adoption_data(): | |
debug_print("Generating advocate adoption data...") | |
advocates = ["Alice", "Bob", "Charlie", "Diana", "Eve"] | |
# Ensure adoption dates fall roughly in mid-2022 | |
adoption_dates = { | |
"Symbiosis_adoption_date": ["2022-06-05", "2022-06-10", "2022-06-08", "2022-06-12", "2022-06-07"], | |
"Voice Translation_adoption_date": ["2022-06-03", "2022-06-07", "2022-06-05", "2022-06-09", "2022-06-04"], | |
"NoteHero_adoption_date": ["2022-06-02", "2022-06-06", "2022-06-04", "2022-06-08", "2022-06-03"] | |
} | |
df = pd.DataFrame({ | |
"advocate": advocates, | |
"Symbiosis_adoption_date": pd.to_datetime(adoption_dates["Symbiosis_adoption_date"]), | |
"Voice Translation_adoption_date": pd.to_datetime(adoption_dates["Voice Translation_adoption_date"]), | |
"NoteHero_adoption_date": pd.to_datetime(adoption_dates["NoteHero_adoption_date"]) | |
}) | |
# Convert to date objects | |
for col in ["Symbiosis_adoption_date", "Voice Translation_adoption_date", "NoteHero_adoption_date"]: | |
df[col] = df[col].dt.date | |
debug_print("Advocate adoption data generated.") | |
return df | |
def generate_utilization_data(): | |
debug_print("Generating initiative utilization data...") | |
# Create a standalone DataFrame with serial numbers and binary flags for each initiative. | |
df = global_case_data.copy()[["serial_number"]].copy() | |
# For demonstration, we simulate 50% usage for each initiative. | |
for initiative in ["Voice Translation_utilized", "Symbiosis_utilized", "NoteHero_utilized"]: | |
df[initiative] = np.random.choice([0, 1], size=len(df), p=[0.5, 0.5]) | |
debug_print("Initiative utilization data generated.") | |
return df | |
####################################### | |
# Global Data Setup | |
####################################### | |
global_case_data = generate_case_data(num_records=5000) | |
global_advocate_adoption = generate_advocate_adoption_data() | |
global_initiative_utilization = generate_utilization_data() | |
# Ensure that the utilization dataset only contains serial numbers that are in the main dataset. | |
valid_serials = set(global_case_data["serial_number"]) | |
global_initiative_utilization = global_initiative_utilization[global_initiative_utilization["serial_number"].isin(valid_serials)] | |
debug_print("Global datasets generated.") | |
####################################### | |
# Helper Calculation Functions | |
####################################### | |
def calculate_throughput(df, start_date, end_date): | |
df_filtered = df.loc[(df["case_date"] >= start_date) & (df["case_date"] <= end_date)] | |
num_cases = len(df_filtered) | |
num_days = (end_date - start_date).days or 1 | |
return num_cases / num_days if num_cases > 0 else 0 | |
def calculate_throughput_per_advocate(df, start_date, end_date): | |
df_filtered = df.loc[(df["case_date"] >= start_date) & (df["case_date"] <= end_date)] | |
if df_filtered.empty: | |
return None | |
throughput = df_filtered.groupby(["LOB", "advocate"]).size() / (end_date - start_date).days | |
return throughput | |
####################################### | |
# Analysis Functions | |
####################################### | |
def analyze_overall_impact(release_date_str, lob_filter, issue_filter, kpi, one_tailed): | |
debug_print("Running Overall Impact Analysis...") | |
try: | |
# Parse release date as a date object (no .dt on a scalar) | |
release_date = pd.to_datetime(release_date_str).date() | |
except Exception as e: | |
return f"Error parsing release date: {str(e)}", None | |
df = global_case_data.copy() | |
df["case_date"] = pd.to_datetime(df["case_date"]).dt.date | |
if lob_filter != "All": | |
df = df[df["LOB"] == lob_filter] | |
if issue_filter != "All": | |
df = df[df["issue_type"] == issue_filter] | |
if df.empty: | |
return "No data available for the selected filters.", None | |
pre_data = df[df["case_date"] < release_date] | |
post_data = df[df["case_date"] >= release_date] | |
if pre_data.empty or post_data.empty: | |
return "No data available for the selected date range.", None | |
if kpi.lower() == "throughput": | |
throughput_pre = calculate_throughput(pre_data, pre_data["case_date"].min(), pre_data["case_date"].max()) | |
throughput_post = calculate_throughput(post_data, post_data["case_date"].min(), post_data["case_date"].max()) | |
t_stat, p_value = stats.ttest_ind(np.array([throughput_pre]), np.array([throughput_post]), equal_var=False) | |
else: | |
pre_vals, post_vals = pre_data[kpi].values, post_data[kpi].values | |
t_stat, p_value = stats.ttest_ind(pre_vals, post_vals, equal_var=False) | |
if one_tailed: | |
p_value = p_value / 2 | |
significance = "Significant" if p_value < 0.05 and t_stat > 0 else "Not Significant" | |
else: | |
significance = "Significant" if p_value < 0.05 else "Not Significant" | |
analysis_text = f"""Overall Impact Analysis for KPI: {kpi} | |
Filters - LOB: {lob_filter}, Issue Type: {issue_filter} | |
Global Release Date: {release_date} | |
T-Test: T-Statistic = {t_stat:.3f}, P-Value = {p_value:.3f} ({significance}) | |
""" | |
# Here you could also add additional aggregated results if needed. | |
fig, ax = plt.subplots(figsize=(6, 4)) | |
if kpi.lower() == "throughput": | |
# For throughput, show a simple bar graph with aggregated throughput (for demonstration) | |
ax.bar(["Pre", "Post"], [throughput_pre, throughput_post], color=["blue", "green"]) | |
ax.set_ylabel("Throughput (cases/day)") | |
else: | |
ax.boxplot([pre_data[kpi].values, post_data[kpi].values], labels=["Pre", "Post"]) | |
ax.set_ylabel(kpi) | |
ax.set_title("Overall Impact Analysis") | |
plt.tight_layout() | |
plt.close(fig) | |
return analysis_text, fig | |
def analyze_all_advocates_impact(method, initiative, lob_filter, issue_filter, kpi, one_tailed, | |
adoption_file, adoption_name_col, adoption_date_col, utilization_file): | |
try: | |
debug_print("π Running Advocate Impact Analysis...") | |
df = global_case_data.copy() | |
if lob_filter != "All": | |
df = df[df["LOB"] == lob_filter] | |
if issue_filter != "All": | |
df = df[df["issue_type"] == issue_filter] | |
if df.empty: | |
debug_print("β No cases available for the selected filters.") | |
return "No data available for the selected filters.", None, None | |
df["case_date"] = pd.to_datetime(df["case_date"], utc=True, errors="coerce").dt.normalize().dt.date | |
debug_print(f"β Data filtered. {len(df)} cases remain.") | |
debug_print(f"π Min case date: {df['case_date'].min()}, Max case date: {df['case_date'].max()}") | |
# For Initiative Utilization, use standalone DF | |
utilization_df = global_initiative_utilization.copy() | |
if method == "Initiative Utilization" and utilization_file is not None: | |
try: | |
util_df = pd.read_csv(utilization_file.name) | |
except Exception: | |
try: | |
util_df = pd.read_excel(utilization_file.name) | |
except Exception as e: | |
debug_print(f"β Error reading utilization file: {str(e)}") | |
return f"Error reading utilization file: {str(e)}", None, None | |
if "serial_number" not in util_df.columns: | |
debug_print("β The uploaded utilization file must have a 'serial_number' column.") | |
return "The uploaded utilization file must have a 'serial_number' column.", None, None | |
utilization_df = util_df.copy() | |
debug_print(f"β Uploaded initiative utilization file processed: {utilization_df.shape[0]} rows.") | |
else: | |
debug_print("π No initiative utilization file uploaded; using default global initiative utilization data.") | |
# Build adoption mapping for Adoption Date method | |
adoption_mapping = {} | |
if method == "Adoption Date" and adoption_file is not None: | |
try: | |
uploaded_df = pd.read_csv(adoption_file.name) | |
except Exception: | |
try: | |
uploaded_df = pd.read_excel(adoption_file.name) | |
except Exception as e: | |
debug_print(f"β Error reading adoption file: {str(e)}") | |
return f"Error reading adoption file: {str(e)}", None, None | |
if adoption_name_col not in uploaded_df.columns or adoption_date_col not in uploaded_df.columns: | |
debug_print("β Specified columns not found in the uploaded adoption file.") | |
return "Specified columns not found in the uploaded adoption file.", None, None | |
debug_print("π Processing uploaded adoption file...") | |
for idx, row in uploaded_df.iterrows(): | |
name_uploaded = str(row[adoption_name_col]) | |
adoption_date = pd.to_datetime(row[adoption_date_col], utc=True, errors="coerce") | |
if pd.isnull(adoption_date): | |
debug_print(f"β Skipping invalid adoption date for {name_uploaded}") | |
continue | |
adoption_date = adoption_date.date() | |
# Map using fuzzy matching on the default global adoption names | |
for adv in df["advocate"].unique(): | |
score = fuzz.ratio(name_uploaded.lower(), adv.lower()) | |
if score >= 95: | |
adoption_mapping[adv] = min(adoption_mapping.get(adv, adoption_date), adoption_date) | |
debug_print(f"β Uploaded adoption file processed. Mapped {len(adoption_mapping)} advocates.") | |
else: | |
debug_print("π No adoption file uploaded; using default global adoption data.") | |
# Normalize global adoption dates | |
for col in ["Symbiosis_adoption_date", "Voice Translation_adoption_date", "NoteHero_adoption_date"]: | |
global_advocate_adoption[col] = pd.to_datetime(global_advocate_adoption[col], utc=True, errors="coerce") | |
global_advocate_adoption[col] = global_advocate_adoption[col].apply(lambda x: x.date() if pd.notnull(x) else None) | |
all_pre_vals, all_post_vals = [], [] | |
results = [] | |
debug_print("π Processing advocates...") | |
for adv in df["advocate"].unique(): | |
try: | |
df_adv = df[df["advocate"] == adv] | |
if method == "Adoption Date": | |
if adv in adoption_mapping: | |
adoption_date = adoption_mapping[adv] | |
else: | |
col_name = initiative + "_adoption_date" | |
adoption_series = global_advocate_adoption.loc[global_advocate_adoption["advocate"] == adv, col_name] | |
if adoption_series.empty or pd.isnull(adoption_series.values[0]): | |
debug_print(f"β Skipping {adv}: No valid adoption date found.") | |
continue | |
adoption_date = adoption_series.values[0] | |
if pd.isnull(adoption_date): | |
debug_print(f"β Skipping {adv}: Adoption date is NULL after conversion.") | |
continue | |
debug_print(f"π Processing {adv}: Adoption Date = {adoption_date}") | |
pre_data = df_adv[df_adv["case_date"] < adoption_date] | |
post_data = df_adv[df_adv["case_date"] >= adoption_date] | |
debug_print(f" {adv}: Pre-data count = {len(pre_data)}, Post-data count = {len(post_data)}") | |
if pre_data.empty: | |
debug_print(f"β Skipping {adv}: No pre-adoption cases.") | |
continue | |
if post_data.empty: | |
debug_print(f"β Skipping {adv}: No post-adoption cases.") | |
continue | |
slice_info = f"Adoption Date: {adoption_date}" | |
elif method == "Initiative Utilization": | |
col_name = initiative + "_utilized" | |
df_adv = df_adv.copy() | |
df_adv = df_adv.merge(utilization_df[["serial_number", col_name]], on="serial_number", how="left") | |
df_adv[col_name] = df_adv[col_name].fillna(0) | |
pre_data = df_adv[df_adv[col_name] == 0] | |
post_data = df_adv[df_adv[col_name] == 1] | |
slice_info = "Initiative Utilization" | |
else: | |
continue | |
if pre_data.empty or post_data.empty: | |
debug_print(f"β Advocate {adv}: Not enough data; skipping.") | |
continue | |
if kpi.lower() == "throughput": | |
pre_val = calculate_throughput(pre_data, pre_data["case_date"].min(), pre_data["case_date"].max()) | |
post_val = calculate_throughput(post_data, post_data["case_date"].min(), post_data["case_date"].max()) | |
else: | |
pre_val = np.mean(pre_data[kpi].values) | |
post_val = np.mean(post_data[kpi].values) | |
pct_change = ((post_val - pre_val) / pre_val) * 100 if pre_val else np.nan | |
results.append({ | |
"advocate": adv, | |
"Pre_Mean": pre_val, | |
"Post_Mean": post_val, | |
"Percent_Change": pct_change, | |
"Slice_Info": slice_info | |
}) | |
all_pre_vals.extend(pre_data[kpi].values) | |
all_post_vals.extend(post_data[kpi].values) | |
debug_print(f"β Processed {adv}: {pct_change:.2f}% change.") | |
except Exception as e: | |
debug_print(f"β Error processing {adv}: {str(e)}") | |
if not results: | |
debug_print("β No valid advocates found for analysis.") | |
return "No valid advocates found for analysis. Check the case date ranges.", None, None | |
results_df = pd.DataFrame(results).sort_values(by="Percent_Change", ascending=False) | |
# Perform aggregated T-Test | |
try: | |
if len(all_pre_vals) > 1 and len(all_post_vals) > 1: | |
t_stat, p_value = stats.ttest_ind(all_pre_vals, all_post_vals, equal_var=False) | |
if one_tailed: | |
p_value = p_value / 2 | |
significance = "Statistically Significant" if p_value < 0.05 and t_stat > 0 else "Not Statistically Significant" | |
else: | |
significance = "Statistically Significant" if p_value < 0.05 else "Not Statistically Significant" | |
else: | |
t_stat, p_value = np.nan, np.nan | |
significance = "Insufficient Data for Statistical Test" | |
except Exception as e: | |
debug_print(f"β Error performing T-Test: {str(e)}") | |
return f"Error performing T-Test: {str(e)}", None, None | |
pre_mean = np.mean(all_pre_vals) if len(all_pre_vals) > 0 else np.nan | |
post_mean = np.mean(all_post_vals) if len(all_post_vals) > 0 else np.nan | |
overall_pct_change = ((post_mean - pre_mean) / pre_mean) * 100 if pre_mean else np.nan | |
overall_summary = f"""π Aggregated Advocate Impact Analysis using method '{method}' for initiative '{initiative}' on KPI '{kpi}'. | |
Number of advocates analyzed: {len(results_df)} | |
Aggregated Pre vs Post Analysis: | |
- Pre-Adoption Mean: {pre_mean:.2f} | |
- Post-Adoption Mean: {post_mean:.2f} | |
- Percent Change: {overall_pct_change:.2f}% | |
T-Test Results: | |
- T-Statistic: {t_stat:.3f} | |
- P-Value: {p_value:.3f} | |
- Result: {significance} | |
""" | |
fig, ax = plt.subplots(figsize=(6, 4)) | |
ax.bar(["Pre-Adoption", "Post-Adoption"], [pre_mean, post_mean], color=["blue", "green"]) | |
ax.set_title(f"Aggregated Impact of {initiative} on {kpi}") | |
ax.set_ylabel(kpi) | |
plt.tight_layout() | |
plt.close(fig) | |
debug_print("π― Advocate Impact Analysis completed.") | |
return overall_summary, fig, results_df | |
except Exception as e: | |
debug_print(f"β Fatal Error in Function: {str(e)}") | |
return f"Fatal Error: {str(e)}", None, None | |
with gr.Blocks() as demo: | |
gr.Markdown("# Impact Analysis Dashboard") | |
with gr.Tabs(): | |
# Tab 1: Overall Impact Analysis | |
with gr.TabItem("Overall Impact Analysis"): | |
gr.Markdown("### Overall Impact Analysis (Global Release Date)") | |
overall_release_date = gr.Textbox(label="Global Release Date (YYYY-MM-DD)", placeholder="e.g., 2022-01-15") | |
overall_lob = gr.Dropdown(choices=["All", "Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], | |
label="Filter by LOB", value="All") | |
overall_issue = gr.Dropdown(choices=["All", "Billing", "Technical", "Hacking", "Service", "Access"], | |
label="Filter by Issue Type", value="All") | |
overall_kpi = gr.Dropdown(choices=["CSAT", "days_to_close", "first_contact_resolution", "CPI", "throughput"], | |
label="Select KPI", value="CSAT") | |
one_tailed_overall = gr.Checkbox(label="Use One-Tailed T-Test") | |
overall_btn = gr.Button("Analyze Overall Impact") | |
overall_output = gr.Textbox(label="Overall Impact Analysis Results") | |
overall_plot = gr.Plot(label="Overall Impact Graph") | |
overall_btn.click(analyze_overall_impact, | |
inputs=[overall_release_date, overall_lob, overall_issue, overall_kpi, one_tailed_overall], | |
outputs=[overall_output, overall_plot]) | |
# Tab 2: Advocate Impact Analysis | |
with gr.TabItem("Advocate Impact Analysis"): | |
gr.Markdown("### Advocate Impact Analysis (Aggregated Pre vs Post)") | |
adoption_method = gr.Radio(choices=["Adoption Date", "Initiative Utilization"], | |
label="Method", value="Adoption Date") | |
initiative_select = gr.Dropdown(choices=["Symbiosis", "Voice Translation", "NoteHero"], | |
label="Select Initiative", value="Symbiosis") | |
adv_lob = gr.Dropdown(choices=["All", "Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], | |
label="Filter by LOB", value="All") | |
adv_issue = gr.Dropdown(choices=["All", "Billing", "Technical", "Hacking", "Service", "Access"], | |
label="Filter by Issue Type", value="All") | |
adv_kpi = gr.Dropdown(choices=["CSAT", "days_to_close", "first_contact_resolution", "CPI", "throughput"], | |
label="Select KPI", value="CSAT") | |
one_tailed_adv = gr.Checkbox(label="Use One-Tailed T-Test") | |
with gr.Accordion("Optional File Uploads (Click to expand)", open=False): | |
gr.Markdown("Upload is optional. For Adoption Date method, upload a CSV/Excel with two columns (Advocate Name and Adoption Date). For Initiative Utilization, upload a CSV/Excel with a 'serial_number' column.") | |
adoption_file = gr.File(label="Upload Adoption Date File (optional)") | |
adoption_name_col = gr.Textbox(label="Adoption File: Advocate Name Column", placeholder="e.g., Name") | |
adoption_date_col = gr.Textbox(label="Adoption File: Adoption Date Column", placeholder="e.g., AdoptionDate") | |
utilization_file = gr.File(label="Upload Initiative Utilization File (optional)") | |
adv_btn = gr.Button("Analyze Advocate Impact") | |
adv_overall_output = gr.Textbox(label="Aggregated Advocate Impact Summary") | |
adv_plot = gr.Plot(label="Aggregated Advocate Impact Graph") | |
adv_table = gr.Dataframe(label="Advocate Impact Details") | |
adv_btn.click(analyze_all_advocates_impact, | |
inputs=[adoption_method, initiative_select, adv_lob, adv_issue, adv_kpi, one_tailed_adv, | |
adoption_file, adoption_name_col, adoption_date_col, utilization_file], | |
outputs=[adv_overall_output, adv_plot, adv_table]) | |
# Optional Debug Logs Tab | |
with gr.TabItem("Debug Logs"): | |
gr.Markdown("### Debug Logs") | |
debug_btn = gr.Button("Refresh Debug Logs") | |
debug_output = gr.Textbox(label="Debug Logs", lines=15) | |
debug_btn.click(lambda: "Check console output for debug logs.", inputs=[], outputs=[debug_output]) | |
demo.launch() | |