Spaces:

arithescientist
/

impactanalysistool

Sleeping

App Files Files Community

arithescientist commited on about 1 month ago

Commit

c4655e6

verified ·

1 Parent(s): c755e72

Create app.py

Browse files

Files changed (1) hide show

app.py +442 -0

app.py ADDED Viewed

	@@ -0,0 +1,442 @@

+import gradio as gr
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scipy import stats
+from rapidfuzz import fuzz
+#######################################
+# Debug Logging Function
+#######################################
+def debug_print(message):
+    print(message)
+#######################################
+# Data Generation Functions
+#######################################
+def generate_case_data(num_records=5000):
+    debug_print("Generating case data...")
+    lob_list = np.random.choice(["Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"], num_records)
+    issue_types = np.random.choice(["Billing", "Technical", "Hacking", "Service", "Access"], num_records)
+    advocates = np.random.choice(["Alice", "Bob", "Charlie", "Diana", "Eve"], num_records)
+    start_date = pd.Timestamp("2021-01-01")
+    end_date = pd.Timestamp("2023-12-31")
+    days_range = (end_date - start_date).days
+    # Generate random case dates over 3 years
+    case_dates = start_date + pd.to_timedelta(np.random.randint(0, days_range, num_records), unit='D')
+    # Simulated release dates per LOB (set in early 2022)
+    lob_release_dates = {
+        "Modern Life": pd.Timestamp("2022-01-01"),
+        "Xbox": pd.Timestamp("2022-02-01"),
+        "CAPS": pd.Timestamp("2022-03-01"),
+        "Devices": pd.Timestamp("2022-04-01"),
+        "Modern Work": pd.Timestamp("2022-05-01")
+    }
+    release_dates = np.array([lob_release_dates[lob] for lob in lob_list])
+    pre_release = case_dates < release_dates
+    CSAT = np.where(pre_release, np.random.normal(80, 5, num_records), np.random.normal(85, 5, num_records))
+    days_to_close = np.where(pre_release, np.random.normal(5, 1, num_records), np.random.normal(4, 1, num_records))
+    first_contact_resolution = np.where(pre_release, np.random.normal(70, 8, num_records), np.random.normal(75, 8, num_records))
+    CPI = np.where(pre_release, np.random.normal(50, 5, num_records), np.random.normal(45, 5, num_records))
+    # For the main case dataset, we do NOT include initiative utilization columns.
+    debug_print("Case data generated.")
+    return pd.DataFrame({
+        "serial_number": np.arange(1, num_records + 1),
+        "advocate": advocates,
+        "LOB": lob_list,
+        "issue_type": issue_types,
+        "case_date": case_dates,
+        "CSAT": CSAT,
+        "days_to_close": days_to_close,
+        "first_contact_resolution": first_contact_resolution,
+        "CPI": CPI
+    })
+def generate_advocate_adoption_data():
+    debug_print("Generating advocate adoption data...")
+    advocates = ["Alice", "Bob", "Charlie", "Diana", "Eve"]
+    # Ensure adoption dates fall roughly in mid-2022
+    adoption_dates = {
+        "Symbiosis_adoption_date": ["2022-06-05", "2022-06-10", "2022-06-08", "2022-06-12", "2022-06-07"],
+        "Voice Translation_adoption_date": ["2022-06-03", "2022-06-07", "2022-06-05", "2022-06-09", "2022-06-04"],
+        "NoteHero_adoption_date": ["2022-06-02", "2022-06-06", "2022-06-04", "2022-06-08", "2022-06-03"]
+    }
+    df = pd.DataFrame({
+        "advocate": advocates,
+        "Symbiosis_adoption_date": pd.to_datetime(adoption_dates["Symbiosis_adoption_date"]),
+        "Voice Translation_adoption_date": pd.to_datetime(adoption_dates["Voice Translation_adoption_date"]),
+        "NoteHero_adoption_date": pd.to_datetime(adoption_dates["NoteHero_adoption_date"])
+    })
+    # Convert to date objects
+    for col in ["Symbiosis_adoption_date", "Voice Translation_adoption_date", "NoteHero_adoption_date"]:
+        df[col] = df[col].dt.date
+    debug_print("Advocate adoption data generated.")
+    return df
+def generate_utilization_data():
+    debug_print("Generating initiative utilization data...")
+    # Create a standalone DataFrame with serial numbers and binary flags for each initiative.
+    df = global_case_data.copy()[["serial_number"]].copy()
+    # For demonstration, we simulate 50% usage for each initiative.
+    for initiative in ["Voice Translation_utilized", "Symbiosis_utilized", "NoteHero_utilized"]:
+        df[initiative] = np.random.choice([0, 1], size=len(df), p=[0.5, 0.5])
+    debug_print("Initiative utilization data generated.")
+    return df
+#######################################
+# Global Data Setup
+#######################################
+global_case_data = generate_case_data(num_records=5000)
+global_advocate_adoption = generate_advocate_adoption_data()
+global_initiative_utilization = generate_utilization_data()
+# Ensure that the utilization dataset only contains serial numbers that are in the main dataset.
+valid_serials = set(global_case_data["serial_number"])
+global_initiative_utilization = global_initiative_utilization[global_initiative_utilization["serial_number"].isin(valid_serials)]
+debug_print("Global datasets generated.")
+#######################################
+# Helper Calculation Functions
+#######################################
+def calculate_throughput(df, start_date, end_date):
+    df_filtered = df.loc[(df["case_date"] >= start_date) & (df["case_date"] <= end_date)]
+    num_cases = len(df_filtered)
+    num_days = (end_date - start_date).days or 1
+    return num_cases / num_days if num_cases > 0 else 0
+def calculate_throughput_per_advocate(df, start_date, end_date):
+    df_filtered = df.loc[(df["case_date"] >= start_date) & (df["case_date"] <= end_date)]
+    if df_filtered.empty:
+        return None
+    throughput = df_filtered.groupby(["LOB", "advocate"]).size() / (end_date - start_date).days
+    return throughput
+#######################################
+# Analysis Functions
+#######################################
+def analyze_overall_impact(release_date_str, lob_filter, issue_filter, kpi, one_tailed):
+    debug_print("Running Overall Impact Analysis...")
+    try:
+        # Parse release date as a date object (no .dt on a scalar)
+        release_date = pd.to_datetime(release_date_str).date()
+    except Exception as e:
+        return f"Error parsing release date: {str(e)}", None
+    df = global_case_data.copy()
+    df["case_date"] = pd.to_datetime(df["case_date"]).dt.date
+    if lob_filter != "All":
+        df = df[df["LOB"] == lob_filter]
+    if issue_filter != "All":
+        df = df[df["issue_type"] == issue_filter]
+    if df.empty:
+        return "No data available for the selected filters.", None
+    pre_data = df[df["case_date"] < release_date]
+    post_data = df[df["case_date"] >= release_date]
+    if pre_data.empty or post_data.empty:
+        return "No data available for the selected date range.", None
+    if kpi.lower() == "throughput":
+        throughput_pre = calculate_throughput(pre_data, pre_data["case_date"].min(), pre_data["case_date"].max())
+        throughput_post = calculate_throughput(post_data, post_data["case_date"].min(), post_data["case_date"].max())
+        t_stat, p_value = stats.ttest_ind(np.array([throughput_pre]), np.array([throughput_post]), equal_var=False)
+    else:
+        pre_vals, post_vals = pre_data[kpi].values, post_data[kpi].values
+        t_stat, p_value = stats.ttest_ind(pre_vals, post_vals, equal_var=False)
+    if one_tailed:
+        p_value = p_value / 2
+        significance = "Significant" if p_value < 0.05 and t_stat > 0 else "Not Significant"
+    else:
+        significance = "Significant" if p_value < 0.05 else "Not Significant"
+    analysis_text = f"""Overall Impact Analysis for KPI: {kpi}
+Filters - LOB: {lob_filter}, Issue Type: {issue_filter}
+Global Release Date: {release_date}
+T-Test: T-Statistic = {t_stat:.3f}, P-Value = {p_value:.3f} ({significance})
+"""
+    # Here you could also add additional aggregated results if needed.
+    fig, ax = plt.subplots(figsize=(6, 4))
+    if kpi.lower() == "throughput":
+        # For throughput, show a simple bar graph with aggregated throughput (for demonstration)
+        ax.bar(["Pre", "Post"], [throughput_pre, throughput_post], color=["blue", "green"])
+        ax.set_ylabel("Throughput (cases/day)")
+    else:
+        ax.boxplot([pre_data[kpi].values, post_data[kpi].values], labels=["Pre", "Post"])
+        ax.set_ylabel(kpi)
+    ax.set_title("Overall Impact Analysis")
+    plt.tight_layout()
+    plt.close(fig)
+    return analysis_text, fig
+def analyze_all_advocates_impact(method, initiative, lob_filter, issue_filter, kpi, one_tailed,
+                                  adoption_file, adoption_name_col, adoption_date_col, utilization_file):
+    try:
+        debug_print("🚀 Running Advocate Impact Analysis...")
+        df = global_case_data.copy()
+        if lob_filter != "All":
+            df = df[df["LOB"] == lob_filter]
+        if issue_filter != "All":
+            df = df[df["issue_type"] == issue_filter]
+        if df.empty:
+            debug_print("⚠ No cases available for the selected filters.")
+            return "No data available for the selected filters.", None, None
+        df["case_date"] = pd.to_datetime(df["case_date"], utc=True, errors="coerce").dt.normalize().dt.date
+        debug_print(f"✅ Data filtered. {len(df)} cases remain.")
+        debug_print(f"🗓 Min case date: {df['case_date'].min()}, Max case date: {df['case_date'].max()}")
+        # For Initiative Utilization, use standalone DF
+        utilization_df = global_initiative_utilization.copy()
+        if method == "Initiative Utilization" and utilization_file is not None:
+            try:
+                util_df = pd.read_csv(utilization_file.name)
+            except Exception:
+                try:
+                    util_df = pd.read_excel(utilization_file.name)
+                except Exception as e:
+                    debug_print(f"❌ Error reading utilization file: {str(e)}")
+                    return f"Error reading utilization file: {str(e)}", None, None
+            if "serial_number" not in util_df.columns:
+                debug_print("⚠ The uploaded utilization file must have a 'serial_number' column.")
+                return "The uploaded utilization file must have a 'serial_number' column.", None, None
+            utilization_df = util_df.copy()
+            debug_print(f"✅ Uploaded initiative utilization file processed: {utilization_df.shape[0]} rows.")
+        else:
+            debug_print("📂 No initiative utilization file uploaded; using default global initiative utilization data.")
+        # Build adoption mapping for Adoption Date method
+        adoption_mapping = {}
+        if method == "Adoption Date" and adoption_file is not None:
+            try:
+                uploaded_df = pd.read_csv(adoption_file.name)
+            except Exception:
+                try:
+                    uploaded_df = pd.read_excel(adoption_file.name)
+                except Exception as e:
+                    debug_print(f"❌ Error reading adoption file: {str(e)}")
+                    return f"Error reading adoption file: {str(e)}", None, None
+            if adoption_name_col not in uploaded_df.columns or adoption_date_col not in uploaded_df.columns:
+                debug_print("⚠ Specified columns not found in the uploaded adoption file.")
+                return "Specified columns not found in the uploaded adoption file.", None, None
+            debug_print("📂 Processing uploaded adoption file...")
+            for idx, row in uploaded_df.iterrows():
+                name_uploaded = str(row[adoption_name_col])
+                adoption_date = pd.to_datetime(row[adoption_date_col], utc=True, errors="coerce")
+                if pd.isnull(adoption_date):
+                    debug_print(f"⚠ Skipping invalid adoption date for {name_uploaded}")
+                    continue
+                adoption_date = adoption_date.date()
+                # Map using fuzzy matching on the default global adoption names
+                for adv in df["advocate"].unique():
+                    score = fuzz.ratio(name_uploaded.lower(), adv.lower())
+                    if score >= 95:
+                        adoption_mapping[adv] = min(adoption_mapping.get(adv, adoption_date), adoption_date)
+            debug_print(f"✅ Uploaded adoption file processed. Mapped {len(adoption_mapping)} advocates.")
+        else:
+            debug_print("📂 No adoption file uploaded; using default global adoption data.")
+        # Normalize global adoption dates
+        for col in ["Symbiosis_adoption_date", "Voice Translation_adoption_date", "NoteHero_adoption_date"]:
+            global_advocate_adoption[col] = pd.to_datetime(global_advocate_adoption[col], utc=True, errors="coerce")
+            global_advocate_adoption[col] = global_advocate_adoption[col].apply(lambda x: x.date() if pd.notnull(x) else None)
+        all_pre_vals, all_post_vals = [], []
+        results = []
+        debug_print("🔎 Processing advocates...")
+        for adv in df["advocate"].unique():
+            try:
+                df_adv = df[df["advocate"] == adv]
+                if method == "Adoption Date":
+                    if adv in adoption_mapping:
+                        adoption_date = adoption_mapping[adv]
+                    else:
+                        col_name = initiative + "_adoption_date"
+                        adoption_series = global_advocate_adoption.loc[global_advocate_adoption["advocate"] == adv, col_name]
+                        if adoption_series.empty or pd.isnull(adoption_series.values[0]):
+                            debug_print(f"⚠ Skipping {adv}: No valid adoption date found.")
+                            continue
+                        adoption_date = adoption_series.values[0]
+                    if pd.isnull(adoption_date):
+                        debug_print(f"⚠ Skipping {adv}: Adoption date is NULL after conversion.")
+                        continue
+                    debug_print(f"🔎 Processing {adv}: Adoption Date = {adoption_date}")
+                    pre_data = df_adv[df_adv["case_date"] < adoption_date]
+                    post_data = df_adv[df_adv["case_date"] >= adoption_date]
+                    debug_print(f"   {adv}: Pre-data count = {len(pre_data)}, Post-data count = {len(post_data)}")
+                    if pre_data.empty:
+                        debug_print(f"⚠ Skipping {adv}: No pre-adoption cases.")
+                        continue
+                    if post_data.empty:
+                        debug_print(f"⚠ Skipping {adv}: No post-adoption cases.")
+                        continue
+                    slice_info = f"Adoption Date: {adoption_date}"
+                elif method == "Initiative Utilization":
+                    col_name = initiative + "_utilized"
+                    df_adv = df_adv.copy()
+                    df_adv = df_adv.merge(utilization_df[["serial_number", col_name]], on="serial_number", how="left")
+                    df_adv[col_name] = df_adv[col_name].fillna(0)
+                    pre_data = df_adv[df_adv[col_name] == 0]
+                    post_data = df_adv[df_adv[col_name] == 1]
+                    slice_info = "Initiative Utilization"
+                else:
+                    continue
+                if pre_data.empty or post_data.empty:
+                    debug_print(f"⚠ Advocate {adv}: Not enough data; skipping.")
+                    continue
+                if kpi.lower() == "throughput":
+                    pre_val = calculate_throughput(pre_data, pre_data["case_date"].min(), pre_data["case_date"].max())
+                    post_val = calculate_throughput(post_data, post_data["case_date"].min(), post_data["case_date"].max())
+                else:
+                    pre_val = np.mean(pre_data[kpi].values)
+                    post_val = np.mean(post_data[kpi].values)
+                pct_change = ((post_val - pre_val) / pre_val) * 100 if pre_val else np.nan
+                results.append({
+                    "advocate": adv,
+                    "Pre_Mean": pre_val,
+                    "Post_Mean": post_val,
+                    "Percent_Change": pct_change,
+                    "Slice_Info": slice_info
+                })
+                all_pre_vals.extend(pre_data[kpi].values)
+                all_post_vals.extend(post_data[kpi].values)
+                debug_print(f"✅ Processed {adv}: {pct_change:.2f}% change.")
+            except Exception as e:
+                debug_print(f"❌ Error processing {adv}: {str(e)}")
+        if not results:
+            debug_print("⚠ No valid advocates found for analysis.")
+            return "No valid advocates found for analysis. Check the case date ranges.", None, None
+        results_df = pd.DataFrame(results).sort_values(by="Percent_Change", ascending=False)
+        # Perform aggregated T-Test
+        try:
+            if len(all_pre_vals) > 1 and len(all_post_vals) > 1:
+                t_stat, p_value = stats.ttest_ind(all_pre_vals, all_post_vals, equal_var=False)
+                if one_tailed:
+                    p_value = p_value / 2
+                    significance = "Statistically Significant" if p_value < 0.05 and t_stat > 0 else "Not Statistically Significant"
+                else:
+                    significance = "Statistically Significant" if p_value < 0.05 else "Not Statistically Significant"
+            else:
+                t_stat, p_value = np.nan, np.nan
+                significance = "Insufficient Data for Statistical Test"
+        except Exception as e:
+            debug_print(f"❌ Error performing T-Test: {str(e)}")
+            return f"Error performing T-Test: {str(e)}", None, None
+        pre_mean = np.mean(all_pre_vals) if len(all_pre_vals) > 0 else np.nan
+        post_mean = np.mean(all_post_vals) if len(all_post_vals) > 0 else np.nan
+        overall_pct_change = ((post_mean - pre_mean) / pre_mean) * 100 if pre_mean else np.nan
+        overall_summary = f"""📊 Aggregated Advocate Impact Analysis using method '{method}' for initiative '{initiative}' on KPI '{kpi}'.
+Number of advocates analyzed: {len(results_df)}
+Aggregated Pre vs Post Analysis:
+- Pre-Adoption Mean: {pre_mean:.2f}
+- Post-Adoption Mean: {post_mean:.2f}
+- Percent Change: {overall_pct_change:.2f}%
+T-Test Results:
+- T-Statistic: {t_stat:.3f}
+- P-Value: {p_value:.3f}
+- Result: {significance}
+"""
+        fig, ax = plt.subplots(figsize=(6, 4))
+        ax.bar(["Pre-Adoption", "Post-Adoption"], [pre_mean, post_mean], color=["blue", "green"])
+        ax.set_title(f"Aggregated Impact of {initiative} on {kpi}")
+        ax.set_ylabel(kpi)
+        plt.tight_layout()
+        plt.close(fig)
+        debug_print("🎯 Advocate Impact Analysis completed.")
+        return overall_summary, fig, results_df
+    except Exception as e:
+        debug_print(f"❌ Fatal Error in Function: {str(e)}")
+        return f"Fatal Error: {str(e)}", None, None
+with gr.Blocks() as demo:
+    gr.Markdown("# Impact Analysis Dashboard")
+    with gr.Tabs():
+        # Tab 1: Overall Impact Analysis
+        with gr.TabItem("Overall Impact Analysis"):
+            gr.Markdown("### Overall Impact Analysis (Global Release Date)")
+            overall_release_date = gr.Textbox(label="Global Release Date (YYYY-MM-DD)", placeholder="e.g., 2022-01-15")
+            overall_lob = gr.Dropdown(choices=["All", "Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"],
+                                      label="Filter by LOB", value="All")
+            overall_issue = gr.Dropdown(choices=["All", "Billing", "Technical", "Hacking", "Service", "Access"],
+                                        label="Filter by Issue Type", value="All")
+            overall_kpi = gr.Dropdown(choices=["CSAT", "days_to_close", "first_contact_resolution", "CPI", "throughput"],
+                                      label="Select KPI", value="CSAT")
+            one_tailed_overall = gr.Checkbox(label="Use One-Tailed T-Test")
+            overall_btn = gr.Button("Analyze Overall Impact")
+            overall_output = gr.Textbox(label="Overall Impact Analysis Results")
+            overall_plot = gr.Plot(label="Overall Impact Graph")
+            overall_btn.click(analyze_overall_impact,
+                              inputs=[overall_release_date, overall_lob, overall_issue, overall_kpi, one_tailed_overall],
+                              outputs=[overall_output, overall_plot])
+        # Tab 2: Advocate Impact Analysis
+        with gr.TabItem("Advocate Impact Analysis"):
+            gr.Markdown("### Advocate Impact Analysis (Aggregated Pre vs Post)")
+            adoption_method = gr.Radio(choices=["Adoption Date", "Initiative Utilization"],
+                                        label="Method", value="Adoption Date")
+            initiative_select = gr.Dropdown(choices=["Symbiosis", "Voice Translation", "NoteHero"],
+                                            label="Select Initiative", value="Symbiosis")
+            adv_lob = gr.Dropdown(choices=["All", "Modern Life", "Xbox", "CAPS", "Devices", "Modern Work"],
+                                  label="Filter by LOB", value="All")
+            adv_issue = gr.Dropdown(choices=["All", "Billing", "Technical", "Hacking", "Service", "Access"],
+                                    label="Filter by Issue Type", value="All")
+            adv_kpi = gr.Dropdown(choices=["CSAT", "days_to_close", "first_contact_resolution", "CPI", "throughput"],
+                                  label="Select KPI", value="CSAT")
+            one_tailed_adv = gr.Checkbox(label="Use One-Tailed T-Test")
+            with gr.Accordion("Optional File Uploads (Click to expand)", open=False):
+                gr.Markdown("Upload is optional. For Adoption Date method, upload a CSV/Excel with two columns (Advocate Name and Adoption Date). For Initiative Utilization, upload a CSV/Excel with a 'serial_number' column.")
+                adoption_file = gr.File(label="Upload Adoption Date File (optional)")
+                adoption_name_col = gr.Textbox(label="Adoption File: Advocate Name Column", placeholder="e.g., Name")
+                adoption_date_col = gr.Textbox(label="Adoption File: Adoption Date Column", placeholder="e.g., AdoptionDate")
+                utilization_file = gr.File(label="Upload Initiative Utilization File (optional)")
+            adv_btn = gr.Button("Analyze Advocate Impact")
+            adv_overall_output = gr.Textbox(label="Aggregated Advocate Impact Summary")
+            adv_plot = gr.Plot(label="Aggregated Advocate Impact Graph")
+            adv_table = gr.Dataframe(label="Advocate Impact Details")
+            adv_btn.click(analyze_all_advocates_impact,
+                          inputs=[adoption_method, initiative_select, adv_lob, adv_issue, adv_kpi, one_tailed_adv,
+                                  adoption_file, adoption_name_col, adoption_date_col, utilization_file],
+                          outputs=[adv_overall_output, adv_plot, adv_table])
+        # Optional Debug Logs Tab
+        with gr.TabItem("Debug Logs"):
+            gr.Markdown("### Debug Logs")
+            debug_btn = gr.Button("Refresh Debug Logs")
+            debug_output = gr.Textbox(label="Debug Logs", lines=15)
+            debug_btn.click(lambda: "Check console output for debug logs.", inputs=[], outputs=[debug_output])
+demo.launch()