Spaces:

AMKhakbaz
/

AMKAPP

Running

App Files Files Community

AMKhakbaz commited on Jan 2

Commit

8fb73ac

verified ·

1 Parent(s): 74cbf4e

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -29

app.py CHANGED Viewed

@@ -2,9 +2,52 @@ import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
 from scipy.stats import norm
-# Define your helper functions
 def is_matching_pattern(column, prefix):
     if not column.startswith(prefix + '_'):
         return False
@@ -13,41 +56,63 @@ def is_matching_pattern(column, prefix):
         return True
     return False
 def multi_answer(df):
-    frequency = {}
     for i in df.columns:
         unique_values = list(set(df[i].dropna()))[0]
-        frequency[str(unique_values)] = df[i].value_counts().get(unique_values, 0)
-    frequency_dataframe = pd.DataFrame({
-        "Value": frequency.keys(),
-        "Frequency": frequency.values(),
-        "Percentile": np.array(list(frequency.values())) / len(df.dropna(how='all'))
-    }).sort_values(by='Value')
-    frequency_dataframe.loc[len(frequency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
-    return frequency_dataframe
 def single_answer(df):
     counter = df.value_counts()
-    frequency_dataframe = pd.DataFrame({
-        'Value': counter.index,
-        'Frequency': counter.values,
-        'Percentage': (counter.values / counter.sum()) * 100
-    }).sort_values(by='Value')
-    frequency_dataframe.loc[len(frequency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
-    return frequency_dataframe
 def two_variable_ss(df, var1, var2):
     counter = df.groupby(var1)[var2].value_counts()
-    frequency_dataframe = counter.unstack(fill_value=0)
-    column_sums = frequency_dataframe.sum(axis=0)
-    percentile_dataframe = frequency_dataframe.div(column_sums, axis=1)
-    frequency_dataframe.loc['Sample_size'] = list(single_answer(df[var2]).iloc[:,1])[:-1]
-    frequency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
-    return percentile_dataframe, frequency_dataframe
 # Functions related to Z-Test
 def read_excel_sheets(file):
@@ -174,7 +239,7 @@ if main_option == "Tabulation":
                             st.subheader("Univariate Analysis Results")
                             st.dataframe(result_df)
-                            fig = px.bar(result_df, x='Value', y='Percentage', title='Percentage Histogram')
                             st.plotly_chart(fig, use_container_width=True)
                         else:
                             st.error("The entered column was not found.")
@@ -187,8 +252,8 @@ if main_option == "Tabulation":
                             result_df = multi_answer(subset_df)
                             st.subheader("Multiple Answer Analysis Results")
                             st.dataframe(result_df)
-                            fig = px.bar(result_df, x='Value', y='Percentile', title='Percentile Histogram')
                             st.plotly_chart(fig, use_container_width=True)
                         else:
                             st.error("No columns matching the entered pattern were found.")
@@ -204,14 +269,30 @@ if main_option == "Tabulation":
                         if type1 == "Single answer" and type2 == "Single answer":
                             percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
-                            st.subheader("Percentile Table")
                             st.dataframe(percentile_df)
                             st.subheader("Frequency Table")
                             st.dataframe(frequency_df)
-                            fig = px.imshow(percentile_df, text_auto=True, title='Percentile Heatmap')
                             st.plotly_chart(fig, use_container_width=True)
                         else:
                             st.info("This section of the program is under development.")
                     else:

 import pandas as pd
 import numpy as np
 import plotly.express as px
+import plotly.graph_objects as go
 from scipy.stats import norm
+def figo(plot_type, df, title, xlabel=None, ylabel=None, legend_title=None, colorscale='Plotly3'):
+    if plot_type == "Heatmap":
+        df = df.apply(pd.to_numeric, errors='coerce')
+        fig = go.Figure(data=go.Heatmap(
+            z=df.values,
+            x=df.columns,
+            y=df.index,
+            hoverongaps=False,
+            colorscale=colorscale
+        ))
+    elif plot_type == "Bar":
+        fig = go.Figure()
+        col = df.name
+        fig.add_trace(go.Bar(
+            x=df.index,
+            y=df,
+            name=col
+        ))
+        fig.update_layout(barmode='group')
+    else:
+        raise ValueError("Invalid plot_type. Supported types are 'Heatmap' and 'Bar'.")
+    fig.update_layout(
+        title={
+            'text': title,
+            'y':0.95,
+            'x':0.5,
+            'xanchor': 'center',
+            'yanchor': 'top'
+        },
+        xaxis_title=xlabel,
+        yaxis_title=ylabel,
+        legend_title=legend_title,
+        template="plotly_white"
+    )
+    return fig
 def is_matching_pattern(column, prefix):
     if not column.startswith(prefix + '_'):
         return False
         return True
     return False
 def multi_answer(df):
+    friquency = {}
     for i in df.columns:
         unique_values = list(set(df[i].dropna()))[0]
+        friquency[str(unique_values)] = df[i].value_counts().get(unique_values, 0)
+    friquency_dataframe = pd.DataFrame(
+        {"Value": friquency.keys(),
+         "Friquency": friquency.values(),
+         "Percentage": np.array(list(friquency.values()))/len(df.dropna(how='all'))*100}).sort_values(by='Value')
+    friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
+    return friquency_dataframe
 def single_answer(df):
     counter = df.value_counts()
+    friquency_dataframe = pd.DataFrame({
+        'Value': counter.index,
+        'Frequency': counter.values,
+        'Percentage': (counter.values / counter.sum()) * 100}).sort_values(by='Value')
+    friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
+    return friquency_dataframe
 def two_variable_ss(df, var1, var2):
     counter = df.groupby(var1)[var2].value_counts()
+    friquency_dataframe = counter.unstack(fill_value=0)
+    column_sums = friquency_dataframe.sum(axis=0)
+    percentage_dataframe = friquency_dataframe.div(column_sums, axis=1)
+    friquency_dataframe.loc['Sample_size'] = list(single_answer(df[var2]).iloc[:,1])[:-1]
+    friquency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
+    return percentage_dataframe, friquency_dataframe
+def two_variable_sm(df, var1, var2):
+    unique_values = list(set(df[var1].dropna()))
+    value = multi_answer(df[var2]).iloc[:-1,0]
+    friquency_dataframe, percentage_dataframe = {}, {}
+    for i in unique_values:
+        dataframe = multi_answer(df[df[var1] == i][var2]).iloc[:-1,:]
+        friquency_dataframe[i], percentage_dataframe[i] = dataframe['Friquency'], dataframe['Percentage']
+    friquency_dataframe = pd.DataFrame(friquency_dataframe)
+    percentage_dataframe = pd.DataFrame(percentage_dataframe)
+    friquency_dataframe.index, percentage_dataframe.index = value, value
+    friquency_dataframe.loc['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])[:-1]
+    friquency_dataframe['Sample_size'] = list(multi_answer(df[var2]).iloc[:,1])
+    percentage_dataframe.loc['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])[:-1]
+    percentage_dataframe['Sample_size'] = list(multi_answer(df[var2]).iloc[:,1])
+    return percentage_dataframe, friquency_dataframe
 # Functions related to Z-Test
 def read_excel_sheets(file):
                             st.subheader("Univariate Analysis Results")
                             st.dataframe(result_df)
+                            fig = figo('Bar', result_df, title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
                             st.plotly_chart(fig, use_container_width=True)
                         else:
                             st.error("The entered column was not found.")
                             result_df = multi_answer(subset_df)
                             st.subheader("Multiple Answer Analysis Results")
                             st.dataframe(result_df)
+                            fig = figo('Bar', result_df, title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
                             st.plotly_chart(fig, use_container_width=True)
                         else:
                             st.error("No columns matching the entered pattern were found.")
                         if type1 == "Single answer" and type2 == "Single answer":
                             percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
+                            st.subheader("Percentage Table")
                             st.dataframe(percentile_df)
                             st.subheader("Frequency Table")
                             st.dataframe(frequency_df)
+                            fig = figo('Heatmap', percentile_df, title='Percentage Histogram', xlabel=var1, ylabel=var2, colorscale='Plotly3')
                             st.plotly_chart(fig, use_container_width=True)
+                        elif type1 == "Single answer" and type2 == "Multiple answer":
+                            matching_cols = [col for col in df.columns if is_matching_pattern(col, var)]
+                            if matching_cols:
+                                percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
+                                st.subheader("Percentage Table")
+                                st.dataframe(percentile_df)
+                                st.subheader("Frequency Table")
+                                st.dataframe(frequency_df)
+                                fig = figo('Heatmap', percentile_df, title='Percentage Histogram', xlabel=var1, ylabel=var2, colorscale='Plotly3')
+                                st.plotly_chart(fig, use_container_width=True)
+                            else:
+                                st.error("No columns matching the entered pattern were found.")
                         else:
                             st.info("This section of the program is under development.")
                     else: