Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,52 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import plotly.express as px
|
|
|
5 |
from scipy.stats import norm
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def is_matching_pattern(column, prefix):
|
9 |
if not column.startswith(prefix + '_'):
|
10 |
return False
|
@@ -13,41 +56,63 @@ def is_matching_pattern(column, prefix):
|
|
13 |
return True
|
14 |
return False
|
15 |
|
|
|
16 |
def multi_answer(df):
|
17 |
-
|
18 |
for i in df.columns:
|
19 |
unique_values = list(set(df[i].dropna()))[0]
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
"Value":
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
|
30 |
def single_answer(df):
|
31 |
counter = df.value_counts()
|
32 |
-
|
33 |
-
'Value': counter.index,
|
34 |
-
'Frequency': counter.values,
|
35 |
-
'Percentage': (counter.values / counter.sum()) * 100
|
36 |
-
|
37 |
-
|
38 |
-
return frequency_dataframe
|
39 |
|
40 |
def two_variable_ss(df, var1, var2):
|
|
|
41 |
counter = df.groupby(var1)[var2].value_counts()
|
42 |
-
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
|
48 |
-
frequency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# Functions related to Z-Test
|
53 |
def read_excel_sheets(file):
|
@@ -174,7 +239,7 @@ if main_option == "Tabulation":
|
|
174 |
st.subheader("Univariate Analysis Results")
|
175 |
st.dataframe(result_df)
|
176 |
|
177 |
-
fig =
|
178 |
st.plotly_chart(fig, use_container_width=True)
|
179 |
else:
|
180 |
st.error("The entered column was not found.")
|
@@ -187,8 +252,8 @@ if main_option == "Tabulation":
|
|
187 |
result_df = multi_answer(subset_df)
|
188 |
st.subheader("Multiple Answer Analysis Results")
|
189 |
st.dataframe(result_df)
|
190 |
-
|
191 |
-
fig =
|
192 |
st.plotly_chart(fig, use_container_width=True)
|
193 |
else:
|
194 |
st.error("No columns matching the entered pattern were found.")
|
@@ -204,14 +269,30 @@ if main_option == "Tabulation":
|
|
204 |
|
205 |
if type1 == "Single answer" and type2 == "Single answer":
|
206 |
percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
|
207 |
-
st.subheader("
|
208 |
st.dataframe(percentile_df)
|
209 |
|
210 |
st.subheader("Frequency Table")
|
211 |
st.dataframe(frequency_df)
|
212 |
|
213 |
-
fig =
|
214 |
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
else:
|
216 |
st.info("This section of the program is under development.")
|
217 |
else:
|
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import plotly.express as px
|
5 |
+
import plotly.graph_objects as go
|
6 |
from scipy.stats import norm
|
7 |
|
8 |
+
def figo(plot_type, df, title, xlabel=None, ylabel=None, legend_title=None, colorscale='Plotly3'):
|
9 |
+
|
10 |
+
if plot_type == "Heatmap":
|
11 |
+
df = df.apply(pd.to_numeric, errors='coerce')
|
12 |
+
|
13 |
+
fig = go.Figure(data=go.Heatmap(
|
14 |
+
z=df.values,
|
15 |
+
x=df.columns,
|
16 |
+
y=df.index,
|
17 |
+
hoverongaps=False,
|
18 |
+
colorscale=colorscale
|
19 |
+
))
|
20 |
+
|
21 |
+
elif plot_type == "Bar":
|
22 |
+
fig = go.Figure()
|
23 |
+
col = df.name
|
24 |
+
fig.add_trace(go.Bar(
|
25 |
+
x=df.index,
|
26 |
+
y=df,
|
27 |
+
name=col
|
28 |
+
))
|
29 |
+
|
30 |
+
fig.update_layout(barmode='group')
|
31 |
+
|
32 |
+
else:
|
33 |
+
raise ValueError("Invalid plot_type. Supported types are 'Heatmap' and 'Bar'.")
|
34 |
+
|
35 |
+
fig.update_layout(
|
36 |
+
title={
|
37 |
+
'text': title,
|
38 |
+
'y':0.95,
|
39 |
+
'x':0.5,
|
40 |
+
'xanchor': 'center',
|
41 |
+
'yanchor': 'top'
|
42 |
+
},
|
43 |
+
xaxis_title=xlabel,
|
44 |
+
yaxis_title=ylabel,
|
45 |
+
legend_title=legend_title,
|
46 |
+
template="plotly_white"
|
47 |
+
)
|
48 |
+
|
49 |
+
return fig
|
50 |
+
|
51 |
def is_matching_pattern(column, prefix):
|
52 |
if not column.startswith(prefix + '_'):
|
53 |
return False
|
|
|
56 |
return True
|
57 |
return False
|
58 |
|
59 |
+
|
60 |
def multi_answer(df):
|
61 |
+
friquency = {}
|
62 |
for i in df.columns:
|
63 |
unique_values = list(set(df[i].dropna()))[0]
|
64 |
+
friquency[str(unique_values)] = df[i].value_counts().get(unique_values, 0)
|
65 |
+
|
66 |
+
friquency_dataframe = pd.DataFrame(
|
67 |
+
{"Value": friquency.keys(),
|
68 |
+
"Friquency": friquency.values(),
|
69 |
+
"Percentage": np.array(list(friquency.values()))/len(df.dropna(how='all'))*100}).sort_values(by='Value')
|
70 |
+
friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
|
71 |
+
return friquency_dataframe
|
72 |
+
|
73 |
|
74 |
def single_answer(df):
|
75 |
counter = df.value_counts()
|
76 |
+
friquency_dataframe = pd.DataFrame({
|
77 |
+
'Value': counter.index,
|
78 |
+
'Frequency': counter.values,
|
79 |
+
'Percentage': (counter.values / counter.sum()) * 100}).sort_values(by='Value')
|
80 |
+
friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
|
81 |
+
return friquency_dataframe
|
|
|
82 |
|
83 |
def two_variable_ss(df, var1, var2):
|
84 |
+
|
85 |
counter = df.groupby(var1)[var2].value_counts()
|
86 |
+
friquency_dataframe = counter.unstack(fill_value=0)
|
87 |
+
|
88 |
+
column_sums = friquency_dataframe.sum(axis=0)
|
89 |
+
percentage_dataframe = friquency_dataframe.div(column_sums, axis=1)
|
90 |
|
91 |
+
friquency_dataframe.loc['Sample_size'] = list(single_answer(df[var2]).iloc[:,1])[:-1]
|
92 |
+
friquency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
|
93 |
|
94 |
+
return percentage_dataframe, friquency_dataframe
|
|
|
95 |
|
96 |
+
def two_variable_sm(df, var1, var2):
|
97 |
+
unique_values = list(set(df[var1].dropna()))
|
98 |
+
value = multi_answer(df[var2]).iloc[:-1,0]
|
99 |
+
friquency_dataframe, percentage_dataframe = {}, {}
|
100 |
+
|
101 |
+
for i in unique_values:
|
102 |
+
dataframe = multi_answer(df[df[var1] == i][var2]).iloc[:-1,:]
|
103 |
+
friquency_dataframe[i], percentage_dataframe[i] = dataframe['Friquency'], dataframe['Percentage']
|
104 |
+
|
105 |
+
friquency_dataframe = pd.DataFrame(friquency_dataframe)
|
106 |
+
percentage_dataframe = pd.DataFrame(percentage_dataframe)
|
107 |
+
|
108 |
+
friquency_dataframe.index, percentage_dataframe.index = value, value
|
109 |
+
|
110 |
+
friquency_dataframe.loc['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])[:-1]
|
111 |
+
friquency_dataframe['Sample_size'] = list(multi_answer(df[var2]).iloc[:,1])
|
112 |
+
percentage_dataframe.loc['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])[:-1]
|
113 |
+
percentage_dataframe['Sample_size'] = list(multi_answer(df[var2]).iloc[:,1])
|
114 |
+
|
115 |
+
return percentage_dataframe, friquency_dataframe
|
116 |
|
117 |
# Functions related to Z-Test
|
118 |
def read_excel_sheets(file):
|
|
|
239 |
st.subheader("Univariate Analysis Results")
|
240 |
st.dataframe(result_df)
|
241 |
|
242 |
+
fig = figo('Bar', result_df, title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
|
243 |
st.plotly_chart(fig, use_container_width=True)
|
244 |
else:
|
245 |
st.error("The entered column was not found.")
|
|
|
252 |
result_df = multi_answer(subset_df)
|
253 |
st.subheader("Multiple Answer Analysis Results")
|
254 |
st.dataframe(result_df)
|
255 |
+
|
256 |
+
fig = figo('Bar', result_df, title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
|
257 |
st.plotly_chart(fig, use_container_width=True)
|
258 |
else:
|
259 |
st.error("No columns matching the entered pattern were found.")
|
|
|
269 |
|
270 |
if type1 == "Single answer" and type2 == "Single answer":
|
271 |
percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
|
272 |
+
st.subheader("Percentage Table")
|
273 |
st.dataframe(percentile_df)
|
274 |
|
275 |
st.subheader("Frequency Table")
|
276 |
st.dataframe(frequency_df)
|
277 |
|
278 |
+
fig = figo('Heatmap', percentile_df, title='Percentage Histogram', xlabel=var1, ylabel=var2, colorscale='Plotly3')
|
279 |
st.plotly_chart(fig, use_container_width=True)
|
280 |
+
|
281 |
+
elif type1 == "Single answer" and type2 == "Multiple answer":
|
282 |
+
matching_cols = [col for col in df.columns if is_matching_pattern(col, var)]
|
283 |
+
if matching_cols:
|
284 |
+
percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
|
285 |
+
st.subheader("Percentage Table")
|
286 |
+
st.dataframe(percentile_df)
|
287 |
+
|
288 |
+
st.subheader("Frequency Table")
|
289 |
+
st.dataframe(frequency_df)
|
290 |
+
|
291 |
+
fig = figo('Heatmap', percentile_df, title='Percentage Histogram', xlabel=var1, ylabel=var2, colorscale='Plotly3')
|
292 |
+
st.plotly_chart(fig, use_container_width=True)
|
293 |
+
|
294 |
+
else:
|
295 |
+
st.error("No columns matching the entered pattern were found.")
|
296 |
else:
|
297 |
st.info("This section of the program is under development.")
|
298 |
else:
|