Spaces:

AMKhakbaz
/

AMKAPP

Running

App Files Files Community

AMKhakbaz commited on Jan 1

Commit

74cbf4e

verified ·

1 Parent(s): dc9de67

Create app.py

Browse files

Files changed (1) hide show

app.py +236 -0

app.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+from scipy.stats import norm
+# Define your helper functions
+def is_matching_pattern(column, prefix):
+    if not column.startswith(prefix + '_'):
+        return False
+    suffix = column[len(prefix) + 1:]
+    if 1 <= len(suffix) <= 3 and suffix.isdigit():
+        return True
+    return False
+def multi_answer(df):
+    frequency = {}
+    for i in df.columns:
+        unique_values = list(set(df[i].dropna()))[0]
+        frequency[str(unique_values)] = df[i].value_counts().get(unique_values, 0)
+    frequency_dataframe = pd.DataFrame({
+        "Value": frequency.keys(),
+        "Frequency": frequency.values(),
+        "Percentile": np.array(list(frequency.values())) / len(df.dropna(how='all'))
+    }).sort_values(by='Value')
+    frequency_dataframe.loc[len(frequency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
+    return frequency_dataframe
+def single_answer(df):
+    counter = df.value_counts()
+    frequency_dataframe = pd.DataFrame({
+        'Value': counter.index,
+        'Frequency': counter.values,
+        'Percentage': (counter.values / counter.sum()) * 100
+    }).sort_values(by='Value')
+    frequency_dataframe.loc[len(frequency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
+    return frequency_dataframe
+def two_variable_ss(df, var1, var2):
+    counter = df.groupby(var1)[var2].value_counts()
+    frequency_dataframe = counter.unstack(fill_value=0)
+    column_sums = frequency_dataframe.sum(axis=0)
+    percentile_dataframe = frequency_dataframe.div(column_sums, axis=1)
+    frequency_dataframe.loc['Sample_size'] = list(single_answer(df[var2]).iloc[:,1])[:-1]
+    frequency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
+    return percentile_dataframe, frequency_dataframe
+# Functions related to Z-Test
+def read_excel_sheets(file):
+    """Reads an Excel file with multiple sheets and returns a dictionary of DataFrames."""
+    try:
+        xls = pd.ExcelFile(file)
+        sheets_data = {sheet: xls.parse(sheet) for sheet in xls.sheet_names}
+        return sheets_data
+    except Exception as e:
+        st.error(f"❌ Error reading Excel file: {e}")
+        return None
+def z_testes(n1, n2, p1, p2):
+    """Performs Z-test for proportions and returns p-value."""
+    try:
+        pooled_p = (n1 * p1 + n2 * p2) / (n1 + n2)
+        se = np.sqrt(pooled_p * (1 - pooled_p) * (1 / n1 + 1 / n2))
+        z = (p1 - p2) / se
+        p_value = 2 * (1 - norm.cdf(abs(z)))
+        return p_value
+    except ZeroDivisionError:
+        return np.nan
+def Z_test_dataframes(sheets_data):
+    """Processes each sheet's DataFrame and computes new DataFrames with Z-test results."""
+    result_dataframes = {}
+    for sheet_name, df in sheets_data.items():
+        if df.empty:
+            st.warning(f"⚠️ Sheet '{sheet_name}' is empty and has been skipped.")
+            continue
+        df = df.set_index(df.columns[0])  # Use the first column as index
+        rows, cols = df.shape
+        if cols < 2:
+            st.warning(f"⚠️ Sheet '{sheet_name}' does not have enough columns for analysis and has been skipped.")
+            continue
+        new_df = pd.DataFrame(index=df.index[:-1], columns=df.columns[1:])
+        for i, row_name in enumerate(df.index[:-1]):
+            for j, col_name in enumerate(df.columns[1:]):
+                try:
+                    n1 = df.iloc[-1, 0]  # x_I1
+                    n2 = df.iloc[-1, j+1]  # x_Ij
+                    p1 = df.iloc[i, 0]  # x_1J
+                    p2 = df.iloc[i, j+1]  # x_ij
+                    p_value = z_testes(n1, n2, p1, p2)
+                    new_df.iloc[i, j] = p_value
+                except Exception as e:
+                    st.error(f"❌ Error processing sheet '{sheet_name}', row '{row_name}', column '{col_name}': {e}")
+                    new_df.iloc[i, j] = np.nan
+        result_dataframes[sheet_name] = new_df
+    return result_dataframes
+def analyze_z_test(file):
+    """
+    Performs Z-Test analysis on the uploaded Excel file.
+    Parameters:
+    - file: Uploaded Excel file
+    Returns:
+    - result_dataframes: Dictionary of DataFrames with p-values
+    """
+    sheets_data = read_excel_sheets(file)
+    if sheets_data is None:
+        return None
+    result_dataframes = Z_test_dataframes(sheets_data)
+    if not result_dataframes:
+        st.error("❌ No valid sheets found for Z-Test analysis.")
+        return None
+    st.write("### 📈 Processed Tables with Z-Test Results")
+    for sheet_name, df in result_dataframes.items():
+        st.write(f"#### Sheet: {sheet_name}")
+        # Apply color coding based on p-value
+        def color_p_value(val):
+            try:
+                if pd.isna(val):
+                    return 'background-color: lightgray'
+                elif val < 0.05:
+                    return 'background-color: lightgreen'
+                else:
+                    return 'background-color: lightcoral'
+            except:
+                return 'background-color: lightgray'
+        styled_df = df.style.applymap(color_p_value)
+        # Display the styled DataFrame
+        st.dataframe(styled_df, use_container_width=True)
+    return result_dataframes
+# Streamlit User Interface
+st.title("Data Analysis Application")
+# Main options
+main_option = st.selectbox("Please select an option:", ["Tabulation", "Hypothesis test", "Machine Learning", "Coding"])
+if main_option == "Tabulation":
+    st.header("Tabulation Analysis")
+    uploaded_file = st.file_uploader("Please upload your Excel file", type=["xlsx", "xls"])
+    if uploaded_file:
+        try:
+            df = pd.read_excel(uploaded_file)
+            st.subheader("Displaying the first few rows of the DataFrame")
+            st.dataframe(df.head())
+            tabulation_option = st.selectbox("Please select the type of analysis:", ["All", "Univariate", "Multivariate"])
+            if tabulation_option == "All":
+                st.info("This section of the program is under development.")
+            elif tabulation_option == "Univariate":
+                uni_option = st.selectbox("Select the type of univariate analysis:", ["Multiple answer", "Single answer"])
+                if uni_option == "Single answer":
+                    var = st.text_input("Please enter the name of the desired column:")
+                    if var:
+                        if var in df.columns:
+                            result_df = single_answer(df[var])
+                            st.subheader("Univariate Analysis Results")
+                            st.dataframe(result_df)
+                            fig = px.bar(result_df, x='Value', y='Percentage', title='Percentage Histogram')
+                            st.plotly_chart(fig, use_container_width=True)
+                        else:
+                            st.error("The entered column was not found.")
+                elif uni_option == "Multiple answer":
+                    var = st.text_input("Please enter the name of the desired column:")
+                    if var:
+                        matching_cols = [col for col in df.columns if is_matching_pattern(col, var)]
+                        if matching_cols:
+                            subset_df = df[matching_cols]
+                            result_df = multi_answer(subset_df)
+                            st.subheader("Multiple Answer Analysis Results")
+                            st.dataframe(result_df)
+                            fig = px.bar(result_df, x='Value', y='Percentile', title='Percentile Histogram')
+                            st.plotly_chart(fig, use_container_width=True)
+                        else:
+                            st.error("No columns matching the entered pattern were found.")
+            elif tabulation_option == "Multivariate":
+                st.subheader("Multivariate Analysis")
+                var1 = st.text_input("Please enter the name of the first column:")
+                var2 = st.text_input("Please enter the name of the second column:")
+                if var1 and var2:
+                    if var1 in df.columns and var2 in df.columns:
+                        type1 = st.selectbox("Select the type of analysis for the first column:", ["Multiple answer", "Single answer"], key='type1')
+                        type2 = st.selectbox("Select the type of analysis for the second column:", ["Multiple answer", "Single answer"], key='type2')
+                        if type1 == "Single answer" and type2 == "Single answer":
+                            percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
+                            st.subheader("Percentile Table")
+                            st.dataframe(percentile_df)
+                            st.subheader("Frequency Table")
+                            st.dataframe(frequency_df)
+                            fig = px.imshow(percentile_df, text_auto=True, title='Percentile Heatmap')
+                            st.plotly_chart(fig, use_container_width=True)
+                        else:
+                            st.info("This section of the program is under development.")
+                    else:
+                        st.error("One or both of the entered columns were not found.")
+        except Exception as e:
+            st.error(f"❌ Error reading the Excel file: {e}")
+elif main_option == "Hypothesis test":
+    st.header("Hypothesis Testing")
+    hypothesis_option = st.selectbox("Please select the type of hypothesis test:", ["Z test", "T test", "Chi-Square test", "ANOVA test"])
+    if hypothesis_option != "Z test":
+        st.info("This section of the program is under development.")
+    else:
+        uploaded_file = st.file_uploader("Please upload your Excel file for Z-Test", type=["xlsx", "xls"])
+        if uploaded_file:
+            result = analyze_z_test(uploaded_file)
+            if result:
+                st.success("Z-Test analysis completed successfully.")
+elif main_option in ["Machine Learning", "Coding"]:
+    st.info("This section of the program is under development.")