|
import gradio as gr |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from scipy import stats |
|
from rapidfuzz import fuzz |
|
|
|
|
|
|
|
|
|
def debug_print(message): |
|
print(message) |
|
|
|
|
|
|
|
|
|
def generate_case_data(num_records=5000): |
|
debug_print("Generating case data...") |
|
lob_list = np.random.choice(["Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], num_records) |
|
issue_types = np.random.choice(["Billing", "Technical", "Hacking", "Service", "Access"], num_records) |
|
advocates = np.random.choice(["Alice", "Bob", "Charlie", "Diana", "Eve"], num_records) |
|
|
|
start_date = pd.Timestamp("2021-01-01") |
|
end_date = pd.Timestamp("2023-12-31") |
|
days_range = (end_date - start_date).days |
|
|
|
|
|
case_dates = start_date + pd.to_timedelta(np.random.randint(0, days_range, num_records), unit='D') |
|
|
|
|
|
lob_release_dates = { |
|
"Modern Life": pd.Timestamp("2022-01-01"), |
|
"Xbox": pd.Timestamp("2022-02-01"), |
|
"CAPS": pd.Timestamp("2022-03-01"), |
|
"Devices": pd.Timestamp("2022-04-01"), |
|
"Modern Work": pd.Timestamp("2022-05-01") |
|
} |
|
release_dates = np.array([lob_release_dates[lob] for lob in lob_list]) |
|
pre_release = case_dates < release_dates |
|
|
|
CSAT = np.where(pre_release, np.random.normal(80, 5, num_records), np.random.normal(85, 5, num_records)) |
|
days_to_close = np.where(pre_release, np.random.normal(5, 1, num_records), np.random.normal(4, 1, num_records)) |
|
first_contact_resolution = np.where(pre_release, np.random.normal(70, 8, num_records), np.random.normal(75, 8, num_records)) |
|
CPI = np.where(pre_release, np.random.normal(50, 5, num_records), np.random.normal(45, 5, num_records)) |
|
|
|
|
|
debug_print("Case data generated.") |
|
return pd.DataFrame({ |
|
"serial_number": np.arange(1, num_records + 1), |
|
"advocate": advocates, |
|
"LOB": lob_list, |
|
"issue_type": issue_types, |
|
"case_date": case_dates, |
|
"CSAT": CSAT, |
|
"days_to_close": days_to_close, |
|
"first_contact_resolution": first_contact_resolution, |
|
"CPI": CPI |
|
}) |
|
|
|
def generate_advocate_adoption_data(): |
|
debug_print("Generating advocate adoption data...") |
|
advocates = ["Alice", "Bob", "Charlie", "Diana", "Eve"] |
|
|
|
adoption_dates = { |
|
"Symbiosis_adoption_date": ["2022-06-05", "2022-06-10", "2022-06-08", "2022-06-12", "2022-06-07"], |
|
"Voice Translation_adoption_date": ["2022-06-03", "2022-06-07", "2022-06-05", "2022-06-09", "2022-06-04"], |
|
"NoteHero_adoption_date": ["2022-06-02", "2022-06-06", "2022-06-04", "2022-06-08", "2022-06-03"] |
|
} |
|
df = pd.DataFrame({ |
|
"advocate": advocates, |
|
"Symbiosis_adoption_date": pd.to_datetime(adoption_dates["Symbiosis_adoption_date"]), |
|
"Voice Translation_adoption_date": pd.to_datetime(adoption_dates["Voice Translation_adoption_date"]), |
|
"NoteHero_adoption_date": pd.to_datetime(adoption_dates["NoteHero_adoption_date"]) |
|
}) |
|
|
|
for col in ["Symbiosis_adoption_date", "Voice Translation_adoption_date", "NoteHero_adoption_date"]: |
|
df[col] = df[col].dt.date |
|
debug_print("Advocate adoption data generated.") |
|
return df |
|
|
|
def generate_utilization_data(): |
|
debug_print("Generating initiative utilization data...") |
|
|
|
df = global_case_data.copy()[["serial_number"]].copy() |
|
|
|
for initiative in ["Voice Translation_utilized", "Symbiosis_utilized", "NoteHero_utilized"]: |
|
df[initiative] = np.random.choice([0, 1], size=len(df), p=[0.5, 0.5]) |
|
debug_print("Initiative utilization data generated.") |
|
return df |
|
|
|
|
|
|
|
|
|
global_case_data = generate_case_data(num_records=5000) |
|
global_advocate_adoption = generate_advocate_adoption_data() |
|
global_initiative_utilization = generate_utilization_data() |
|
|
|
valid_serials = set(global_case_data["serial_number"]) |
|
global_initiative_utilization = global_initiative_utilization[global_initiative_utilization["serial_number"].isin(valid_serials)] |
|
debug_print("Global datasets generated.") |
|
|
|
|
|
|
|
|
|
def calculate_throughput(df, start_date, end_date): |
|
df_filtered = df.loc[(df["case_date"] >= start_date) & (df["case_date"] <= end_date)] |
|
num_cases = len(df_filtered) |
|
num_days = (end_date - start_date).days or 1 |
|
return num_cases / num_days if num_cases > 0 else 0 |
|
|
|
def calculate_throughput_per_advocate(df, start_date, end_date): |
|
df_filtered = df.loc[(df["case_date"] >= start_date) & (df["case_date"] <= end_date)] |
|
if df_filtered.empty: |
|
return None |
|
throughput = df_filtered.groupby(["LOB", "advocate"]).size() / (end_date - start_date).days |
|
return throughput |
|
|
|
|
|
|
|
|
|
def analyze_overall_impact(release_date_str, lob_filter, issue_filter, kpi, one_tailed): |
|
debug_print("Running Overall Impact Analysis...") |
|
try: |
|
|
|
release_date = pd.to_datetime(release_date_str).date() |
|
except Exception as e: |
|
return f"Error parsing release date: {str(e)}", None |
|
|
|
df = global_case_data.copy() |
|
df["case_date"] = pd.to_datetime(df["case_date"]).dt.date |
|
|
|
if lob_filter != "All": |
|
df = df[df["LOB"] == lob_filter] |
|
if issue_filter != "All": |
|
df = df[df["issue_type"] == issue_filter] |
|
if df.empty: |
|
return "No data available for the selected filters.", None |
|
|
|
pre_data = df[df["case_date"] < release_date] |
|
post_data = df[df["case_date"] >= release_date] |
|
if pre_data.empty or post_data.empty: |
|
return "No data available for the selected date range.", None |
|
|
|
if kpi.lower() == "throughput": |
|
throughput_pre = calculate_throughput(pre_data, pre_data["case_date"].min(), pre_data["case_date"].max()) |
|
throughput_post = calculate_throughput(post_data, post_data["case_date"].min(), post_data["case_date"].max()) |
|
t_stat, p_value = stats.ttest_ind(np.array([throughput_pre]), np.array([throughput_post]), equal_var=False) |
|
else: |
|
pre_vals, post_vals = pre_data[kpi].values, post_data[kpi].values |
|
t_stat, p_value = stats.ttest_ind(pre_vals, post_vals, equal_var=False) |
|
if one_tailed: |
|
p_value = p_value / 2 |
|
significance = "Significant" if p_value < 0.05 and t_stat > 0 else "Not Significant" |
|
else: |
|
significance = "Significant" if p_value < 0.05 else "Not Significant" |
|
|
|
analysis_text = f"""Overall Impact Analysis for KPI: {kpi} |
|
Filters - LOB: {lob_filter}, Issue Type: {issue_filter} |
|
Global Release Date: {release_date} |
|
|
|
T-Test: T-Statistic = {t_stat:.3f}, P-Value = {p_value:.3f} ({significance}) |
|
""" |
|
|
|
fig, ax = plt.subplots(figsize=(6, 4)) |
|
if kpi.lower() == "throughput": |
|
|
|
ax.bar(["Pre", "Post"], [throughput_pre, throughput_post], color=["blue", "green"]) |
|
ax.set_ylabel("Throughput (cases/day)") |
|
else: |
|
ax.boxplot([pre_data[kpi].values, post_data[kpi].values], labels=["Pre", "Post"]) |
|
ax.set_ylabel(kpi) |
|
ax.set_title("Overall Impact Analysis") |
|
plt.tight_layout() |
|
plt.close(fig) |
|
return analysis_text, fig |
|
|
|
def analyze_all_advocates_impact(method, initiative, lob_filter, issue_filter, kpi, one_tailed, |
|
adoption_file, adoption_name_col, adoption_date_col, utilization_file): |
|
try: |
|
debug_print("π Running Advocate Impact Analysis...") |
|
df = global_case_data.copy() |
|
|
|
if lob_filter != "All": |
|
df = df[df["LOB"] == lob_filter] |
|
if issue_filter != "All": |
|
df = df[df["issue_type"] == issue_filter] |
|
if df.empty: |
|
debug_print("β No cases available for the selected filters.") |
|
return "No data available for the selected filters.", None, None |
|
|
|
df["case_date"] = pd.to_datetime(df["case_date"], utc=True, errors="coerce").dt.normalize().dt.date |
|
debug_print(f"β
Data filtered. {len(df)} cases remain.") |
|
debug_print(f"π Min case date: {df['case_date'].min()}, Max case date: {df['case_date'].max()}") |
|
|
|
|
|
utilization_df = global_initiative_utilization.copy() |
|
if method == "Initiative Utilization" and utilization_file is not None: |
|
try: |
|
util_df = pd.read_csv(utilization_file.name) |
|
except Exception: |
|
try: |
|
util_df = pd.read_excel(utilization_file.name) |
|
except Exception as e: |
|
debug_print(f"β Error reading utilization file: {str(e)}") |
|
return f"Error reading utilization file: {str(e)}", None, None |
|
if "serial_number" not in util_df.columns: |
|
debug_print("β The uploaded utilization file must have a 'serial_number' column.") |
|
return "The uploaded utilization file must have a 'serial_number' column.", None, None |
|
utilization_df = util_df.copy() |
|
debug_print(f"β
Uploaded initiative utilization file processed: {utilization_df.shape[0]} rows.") |
|
else: |
|
debug_print("π No initiative utilization file uploaded; using default global initiative utilization data.") |
|
|
|
|
|
adoption_mapping = {} |
|
if method == "Adoption Date" and adoption_file is not None: |
|
try: |
|
uploaded_df = pd.read_csv(adoption_file.name) |
|
except Exception: |
|
try: |
|
uploaded_df = pd.read_excel(adoption_file.name) |
|
except Exception as e: |
|
debug_print(f"β Error reading adoption file: {str(e)}") |
|
return f"Error reading adoption file: {str(e)}", None, None |
|
if adoption_name_col not in uploaded_df.columns or adoption_date_col not in uploaded_df.columns: |
|
debug_print("β Specified columns not found in the uploaded adoption file.") |
|
return "Specified columns not found in the uploaded adoption file.", None, None |
|
debug_print("π Processing uploaded adoption file...") |
|
for idx, row in uploaded_df.iterrows(): |
|
name_uploaded = str(row[adoption_name_col]) |
|
adoption_date = pd.to_datetime(row[adoption_date_col], utc=True, errors="coerce") |
|
if pd.isnull(adoption_date): |
|
debug_print(f"β Skipping invalid adoption date for {name_uploaded}") |
|
continue |
|
adoption_date = adoption_date.date() |
|
|
|
for adv in df["advocate"].unique(): |
|
score = fuzz.ratio(name_uploaded.lower(), adv.lower()) |
|
if score >= 95: |
|
adoption_mapping[adv] = min(adoption_mapping.get(adv, adoption_date), adoption_date) |
|
debug_print(f"β
Uploaded adoption file processed. Mapped {len(adoption_mapping)} advocates.") |
|
else: |
|
debug_print("π No adoption file uploaded; using default global adoption data.") |
|
|
|
|
|
for col in ["Symbiosis_adoption_date", "Voice Translation_adoption_date", "NoteHero_adoption_date"]: |
|
global_advocate_adoption[col] = pd.to_datetime(global_advocate_adoption[col], utc=True, errors="coerce") |
|
global_advocate_adoption[col] = global_advocate_adoption[col].apply(lambda x: x.date() if pd.notnull(x) else None) |
|
|
|
all_pre_vals, all_post_vals = [], [] |
|
results = [] |
|
|
|
debug_print("π Processing advocates...") |
|
|
|
for adv in df["advocate"].unique(): |
|
try: |
|
df_adv = df[df["advocate"] == adv] |
|
|
|
if method == "Adoption Date": |
|
if adv in adoption_mapping: |
|
adoption_date = adoption_mapping[adv] |
|
else: |
|
col_name = initiative + "_adoption_date" |
|
adoption_series = global_advocate_adoption.loc[global_advocate_adoption["advocate"] == adv, col_name] |
|
if adoption_series.empty or pd.isnull(adoption_series.values[0]): |
|
debug_print(f"β Skipping {adv}: No valid adoption date found.") |
|
continue |
|
adoption_date = adoption_series.values[0] |
|
|
|
if pd.isnull(adoption_date): |
|
debug_print(f"β Skipping {adv}: Adoption date is NULL after conversion.") |
|
continue |
|
|
|
debug_print(f"π Processing {adv}: Adoption Date = {adoption_date}") |
|
|
|
pre_data = df_adv[df_adv["case_date"] < adoption_date] |
|
post_data = df_adv[df_adv["case_date"] >= adoption_date] |
|
|
|
debug_print(f" {adv}: Pre-data count = {len(pre_data)}, Post-data count = {len(post_data)}") |
|
|
|
if pre_data.empty: |
|
debug_print(f"β Skipping {adv}: No pre-adoption cases.") |
|
continue |
|
if post_data.empty: |
|
debug_print(f"β Skipping {adv}: No post-adoption cases.") |
|
continue |
|
|
|
slice_info = f"Adoption Date: {adoption_date}" |
|
|
|
elif method == "Initiative Utilization": |
|
col_name = initiative + "_utilized" |
|
df_adv = df_adv.copy() |
|
df_adv = df_adv.merge(utilization_df[["serial_number", col_name]], on="serial_number", how="left") |
|
df_adv[col_name] = df_adv[col_name].fillna(0) |
|
pre_data = df_adv[df_adv[col_name] == 0] |
|
post_data = df_adv[df_adv[col_name] == 1] |
|
slice_info = "Initiative Utilization" |
|
else: |
|
continue |
|
|
|
if pre_data.empty or post_data.empty: |
|
debug_print(f"β Advocate {adv}: Not enough data; skipping.") |
|
continue |
|
|
|
if kpi.lower() == "throughput": |
|
pre_val = calculate_throughput(pre_data, pre_data["case_date"].min(), pre_data["case_date"].max()) |
|
post_val = calculate_throughput(post_data, post_data["case_date"].min(), post_data["case_date"].max()) |
|
else: |
|
pre_val = np.mean(pre_data[kpi].values) |
|
post_val = np.mean(post_data[kpi].values) |
|
|
|
pct_change = ((post_val - pre_val) / pre_val) * 100 if pre_val else np.nan |
|
results.append({ |
|
"advocate": adv, |
|
"Pre_Mean": pre_val, |
|
"Post_Mean": post_val, |
|
"Percent_Change": pct_change, |
|
"Slice_Info": slice_info |
|
}) |
|
|
|
all_pre_vals.extend(pre_data[kpi].values) |
|
all_post_vals.extend(post_data[kpi].values) |
|
|
|
debug_print(f"β
Processed {adv}: {pct_change:.2f}% change.") |
|
except Exception as e: |
|
debug_print(f"β Error processing {adv}: {str(e)}") |
|
|
|
if not results: |
|
debug_print("β No valid advocates found for analysis.") |
|
return "No valid advocates found for analysis. Check the case date ranges.", None, None |
|
|
|
results_df = pd.DataFrame(results).sort_values(by="Percent_Change", ascending=False) |
|
|
|
|
|
try: |
|
if len(all_pre_vals) > 1 and len(all_post_vals) > 1: |
|
t_stat, p_value = stats.ttest_ind(all_pre_vals, all_post_vals, equal_var=False) |
|
if one_tailed: |
|
p_value = p_value / 2 |
|
significance = "Statistically Significant" if p_value < 0.05 and t_stat > 0 else "Not Statistically Significant" |
|
else: |
|
significance = "Statistically Significant" if p_value < 0.05 else "Not Statistically Significant" |
|
else: |
|
t_stat, p_value = np.nan, np.nan |
|
significance = "Insufficient Data for Statistical Test" |
|
except Exception as e: |
|
debug_print(f"β Error performing T-Test: {str(e)}") |
|
return f"Error performing T-Test: {str(e)}", None, None |
|
|
|
pre_mean = np.mean(all_pre_vals) if len(all_pre_vals) > 0 else np.nan |
|
post_mean = np.mean(all_post_vals) if len(all_post_vals) > 0 else np.nan |
|
overall_pct_change = ((post_mean - pre_mean) / pre_mean) * 100 if pre_mean else np.nan |
|
|
|
overall_summary = f"""π Aggregated Advocate Impact Analysis using method '{method}' for initiative '{initiative}' on KPI '{kpi}'. |
|
Number of advocates analyzed: {len(results_df)} |
|
|
|
Aggregated Pre vs Post Analysis: |
|
- Pre-Adoption Mean: {pre_mean:.2f} |
|
- Post-Adoption Mean: {post_mean:.2f} |
|
- Percent Change: {overall_pct_change:.2f}% |
|
|
|
T-Test Results: |
|
- T-Statistic: {t_stat:.3f} |
|
- P-Value: {p_value:.3f} |
|
- Result: {significance} |
|
""" |
|
fig, ax = plt.subplots(figsize=(6, 4)) |
|
ax.bar(["Pre-Adoption", "Post-Adoption"], [pre_mean, post_mean], color=["blue", "green"]) |
|
ax.set_title(f"Aggregated Impact of {initiative} on {kpi}") |
|
ax.set_ylabel(kpi) |
|
plt.tight_layout() |
|
plt.close(fig) |
|
|
|
debug_print("π― Advocate Impact Analysis completed.") |
|
return overall_summary, fig, results_df |
|
|
|
except Exception as e: |
|
debug_print(f"β Fatal Error in Function: {str(e)}") |
|
return f"Fatal Error: {str(e)}", None, None |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Impact Analysis Dashboard") |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("Overall Impact Analysis"): |
|
gr.Markdown("### Overall Impact Analysis (Global Release Date)") |
|
overall_release_date = gr.Textbox(label="Global Release Date (YYYY-MM-DD)", placeholder="e.g., 2022-01-15") |
|
overall_lob = gr.Dropdown(choices=["All", "Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], |
|
label="Filter by LOB", value="All") |
|
overall_issue = gr.Dropdown(choices=["All", "Billing", "Technical", "Hacking", "Service", "Access"], |
|
label="Filter by Issue Type", value="All") |
|
overall_kpi = gr.Dropdown(choices=["CSAT", "days_to_close", "first_contact_resolution", "CPI", "throughput"], |
|
label="Select KPI", value="CSAT") |
|
one_tailed_overall = gr.Checkbox(label="Use One-Tailed T-Test") |
|
overall_btn = gr.Button("Analyze Overall Impact") |
|
overall_output = gr.Textbox(label="Overall Impact Analysis Results") |
|
overall_plot = gr.Plot(label="Overall Impact Graph") |
|
|
|
overall_btn.click(analyze_overall_impact, |
|
inputs=[overall_release_date, overall_lob, overall_issue, overall_kpi, one_tailed_overall], |
|
outputs=[overall_output, overall_plot]) |
|
|
|
|
|
with gr.TabItem("Advocate Impact Analysis"): |
|
gr.Markdown("### Advocate Impact Analysis (Aggregated Pre vs Post)") |
|
adoption_method = gr.Radio(choices=["Adoption Date", "Initiative Utilization"], |
|
label="Method", value="Adoption Date") |
|
initiative_select = gr.Dropdown(choices=["Symbiosis", "Voice Translation", "NoteHero"], |
|
label="Select Initiative", value="Symbiosis") |
|
adv_lob = gr.Dropdown(choices=["All", "Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], |
|
label="Filter by LOB", value="All") |
|
adv_issue = gr.Dropdown(choices=["All", "Billing", "Technical", "Hacking", "Service", "Access"], |
|
label="Filter by Issue Type", value="All") |
|
adv_kpi = gr.Dropdown(choices=["CSAT", "days_to_close", "first_contact_resolution", "CPI", "throughput"], |
|
label="Select KPI", value="CSAT") |
|
one_tailed_adv = gr.Checkbox(label="Use One-Tailed T-Test") |
|
with gr.Accordion("Optional File Uploads (Click to expand)", open=False): |
|
gr.Markdown("Upload is optional. For Adoption Date method, upload a CSV/Excel with two columns (Advocate Name and Adoption Date). For Initiative Utilization, upload a CSV/Excel with a 'serial_number' column.") |
|
adoption_file = gr.File(label="Upload Adoption Date File (optional)") |
|
adoption_name_col = gr.Textbox(label="Adoption File: Advocate Name Column", placeholder="e.g., Name") |
|
adoption_date_col = gr.Textbox(label="Adoption File: Adoption Date Column", placeholder="e.g., AdoptionDate") |
|
utilization_file = gr.File(label="Upload Initiative Utilization File (optional)") |
|
adv_btn = gr.Button("Analyze Advocate Impact") |
|
adv_overall_output = gr.Textbox(label="Aggregated Advocate Impact Summary") |
|
adv_plot = gr.Plot(label="Aggregated Advocate Impact Graph") |
|
adv_table = gr.Dataframe(label="Advocate Impact Details") |
|
|
|
adv_btn.click(analyze_all_advocates_impact, |
|
inputs=[adoption_method, initiative_select, adv_lob, adv_issue, adv_kpi, one_tailed_adv, |
|
adoption_file, adoption_name_col, adoption_date_col, utilization_file], |
|
outputs=[adv_overall_output, adv_plot, adv_table]) |
|
|
|
|
|
with gr.TabItem("Debug Logs"): |
|
gr.Markdown("### Debug Logs") |
|
debug_btn = gr.Button("Refresh Debug Logs") |
|
debug_output = gr.Textbox(label="Debug Logs", lines=15) |
|
debug_btn.click(lambda: "Check console output for debug logs.", inputs=[], outputs=[debug_output]) |
|
|
|
demo.launch() |
|
|