AMKhakbaz commited on
Commit
8fb73ac
·
verified ·
1 Parent(s): 74cbf4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -29
app.py CHANGED
@@ -2,9 +2,52 @@ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
 
5
  from scipy.stats import norm
6
 
7
- # Define your helper functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def is_matching_pattern(column, prefix):
9
  if not column.startswith(prefix + '_'):
10
  return False
@@ -13,41 +56,63 @@ def is_matching_pattern(column, prefix):
13
  return True
14
  return False
15
 
 
16
  def multi_answer(df):
17
- frequency = {}
18
  for i in df.columns:
19
  unique_values = list(set(df[i].dropna()))[0]
20
- frequency[str(unique_values)] = df[i].value_counts().get(unique_values, 0)
21
-
22
- frequency_dataframe = pd.DataFrame({
23
- "Value": frequency.keys(),
24
- "Frequency": frequency.values(),
25
- "Percentile": np.array(list(frequency.values())) / len(df.dropna(how='all'))
26
- }).sort_values(by='Value')
27
- frequency_dataframe.loc[len(frequency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
28
- return frequency_dataframe
29
 
30
  def single_answer(df):
31
  counter = df.value_counts()
32
- frequency_dataframe = pd.DataFrame({
33
- 'Value': counter.index,
34
- 'Frequency': counter.values,
35
- 'Percentage': (counter.values / counter.sum()) * 100
36
- }).sort_values(by='Value')
37
- frequency_dataframe.loc[len(frequency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
38
- return frequency_dataframe
39
 
40
  def two_variable_ss(df, var1, var2):
 
41
  counter = df.groupby(var1)[var2].value_counts()
42
- frequency_dataframe = counter.unstack(fill_value=0)
 
 
 
43
 
44
- column_sums = frequency_dataframe.sum(axis=0)
45
- percentile_dataframe = frequency_dataframe.div(column_sums, axis=1)
46
 
47
- frequency_dataframe.loc['Sample_size'] = list(single_answer(df[var2]).iloc[:,1])[:-1]
48
- frequency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
49
 
50
- return percentile_dataframe, frequency_dataframe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  # Functions related to Z-Test
53
  def read_excel_sheets(file):
@@ -174,7 +239,7 @@ if main_option == "Tabulation":
174
  st.subheader("Univariate Analysis Results")
175
  st.dataframe(result_df)
176
 
177
- fig = px.bar(result_df, x='Value', y='Percentage', title='Percentage Histogram')
178
  st.plotly_chart(fig, use_container_width=True)
179
  else:
180
  st.error("The entered column was not found.")
@@ -187,8 +252,8 @@ if main_option == "Tabulation":
187
  result_df = multi_answer(subset_df)
188
  st.subheader("Multiple Answer Analysis Results")
189
  st.dataframe(result_df)
190
-
191
- fig = px.bar(result_df, x='Value', y='Percentile', title='Percentile Histogram')
192
  st.plotly_chart(fig, use_container_width=True)
193
  else:
194
  st.error("No columns matching the entered pattern were found.")
@@ -204,14 +269,30 @@ if main_option == "Tabulation":
204
 
205
  if type1 == "Single answer" and type2 == "Single answer":
206
  percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
207
- st.subheader("Percentile Table")
208
  st.dataframe(percentile_df)
209
 
210
  st.subheader("Frequency Table")
211
  st.dataframe(frequency_df)
212
 
213
- fig = px.imshow(percentile_df, text_auto=True, title='Percentile Heatmap')
214
  st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  else:
216
  st.info("This section of the program is under development.")
217
  else:
 
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
5
+ import plotly.graph_objects as go
6
  from scipy.stats import norm
7
 
8
+ def figo(plot_type, df, title, xlabel=None, ylabel=None, legend_title=None, colorscale='Plotly3'):
9
+
10
+ if plot_type == "Heatmap":
11
+ df = df.apply(pd.to_numeric, errors='coerce')
12
+
13
+ fig = go.Figure(data=go.Heatmap(
14
+ z=df.values,
15
+ x=df.columns,
16
+ y=df.index,
17
+ hoverongaps=False,
18
+ colorscale=colorscale
19
+ ))
20
+
21
+ elif plot_type == "Bar":
22
+ fig = go.Figure()
23
+ col = df.name
24
+ fig.add_trace(go.Bar(
25
+ x=df.index,
26
+ y=df,
27
+ name=col
28
+ ))
29
+
30
+ fig.update_layout(barmode='group')
31
+
32
+ else:
33
+ raise ValueError("Invalid plot_type. Supported types are 'Heatmap' and 'Bar'.")
34
+
35
+ fig.update_layout(
36
+ title={
37
+ 'text': title,
38
+ 'y':0.95,
39
+ 'x':0.5,
40
+ 'xanchor': 'center',
41
+ 'yanchor': 'top'
42
+ },
43
+ xaxis_title=xlabel,
44
+ yaxis_title=ylabel,
45
+ legend_title=legend_title,
46
+ template="plotly_white"
47
+ )
48
+
49
+ return fig
50
+
51
  def is_matching_pattern(column, prefix):
52
  if not column.startswith(prefix + '_'):
53
  return False
 
56
  return True
57
  return False
58
 
59
+
60
  def multi_answer(df):
61
+ friquency = {}
62
  for i in df.columns:
63
  unique_values = list(set(df[i].dropna()))[0]
64
+ friquency[str(unique_values)] = df[i].value_counts().get(unique_values, 0)
65
+
66
+ friquency_dataframe = pd.DataFrame(
67
+ {"Value": friquency.keys(),
68
+ "Friquency": friquency.values(),
69
+ "Percentage": np.array(list(friquency.values()))/len(df.dropna(how='all'))*100}).sort_values(by='Value')
70
+ friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
71
+ return friquency_dataframe
72
+
73
 
74
  def single_answer(df):
75
  counter = df.value_counts()
76
+ friquency_dataframe = pd.DataFrame({
77
+ 'Value': counter.index,
78
+ 'Frequency': counter.values,
79
+ 'Percentage': (counter.values / counter.sum()) * 100}).sort_values(by='Value')
80
+ friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
81
+ return friquency_dataframe
 
82
 
83
  def two_variable_ss(df, var1, var2):
84
+
85
  counter = df.groupby(var1)[var2].value_counts()
86
+ friquency_dataframe = counter.unstack(fill_value=0)
87
+
88
+ column_sums = friquency_dataframe.sum(axis=0)
89
+ percentage_dataframe = friquency_dataframe.div(column_sums, axis=1)
90
 
91
+ friquency_dataframe.loc['Sample_size'] = list(single_answer(df[var2]).iloc[:,1])[:-1]
92
+ friquency_dataframe['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])
93
 
94
+ return percentage_dataframe, friquency_dataframe
 
95
 
96
+ def two_variable_sm(df, var1, var2):
97
+ unique_values = list(set(df[var1].dropna()))
98
+ value = multi_answer(df[var2]).iloc[:-1,0]
99
+ friquency_dataframe, percentage_dataframe = {}, {}
100
+
101
+ for i in unique_values:
102
+ dataframe = multi_answer(df[df[var1] == i][var2]).iloc[:-1,:]
103
+ friquency_dataframe[i], percentage_dataframe[i] = dataframe['Friquency'], dataframe['Percentage']
104
+
105
+ friquency_dataframe = pd.DataFrame(friquency_dataframe)
106
+ percentage_dataframe = pd.DataFrame(percentage_dataframe)
107
+
108
+ friquency_dataframe.index, percentage_dataframe.index = value, value
109
+
110
+ friquency_dataframe.loc['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])[:-1]
111
+ friquency_dataframe['Sample_size'] = list(multi_answer(df[var2]).iloc[:,1])
112
+ percentage_dataframe.loc['Sample_size'] = list(single_answer(df[var1]).iloc[:,1])[:-1]
113
+ percentage_dataframe['Sample_size'] = list(multi_answer(df[var2]).iloc[:,1])
114
+
115
+ return percentage_dataframe, friquency_dataframe
116
 
117
  # Functions related to Z-Test
118
  def read_excel_sheets(file):
 
239
  st.subheader("Univariate Analysis Results")
240
  st.dataframe(result_df)
241
 
242
+ fig = figo('Bar', result_df, title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
243
  st.plotly_chart(fig, use_container_width=True)
244
  else:
245
  st.error("The entered column was not found.")
 
252
  result_df = multi_answer(subset_df)
253
  st.subheader("Multiple Answer Analysis Results")
254
  st.dataframe(result_df)
255
+
256
+ fig = figo('Bar', result_df, title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
257
  st.plotly_chart(fig, use_container_width=True)
258
  else:
259
  st.error("No columns matching the entered pattern were found.")
 
269
 
270
  if type1 == "Single answer" and type2 == "Single answer":
271
  percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
272
+ st.subheader("Percentage Table")
273
  st.dataframe(percentile_df)
274
 
275
  st.subheader("Frequency Table")
276
  st.dataframe(frequency_df)
277
 
278
+ fig = figo('Heatmap', percentile_df, title='Percentage Histogram', xlabel=var1, ylabel=var2, colorscale='Plotly3')
279
  st.plotly_chart(fig, use_container_width=True)
280
+
281
+ elif type1 == "Single answer" and type2 == "Multiple answer":
282
+ matching_cols = [col for col in df.columns if is_matching_pattern(col, var)]
283
+ if matching_cols:
284
+ percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
285
+ st.subheader("Percentage Table")
286
+ st.dataframe(percentile_df)
287
+
288
+ st.subheader("Frequency Table")
289
+ st.dataframe(frequency_df)
290
+
291
+ fig = figo('Heatmap', percentile_df, title='Percentage Histogram', xlabel=var1, ylabel=var2, colorscale='Plotly3')
292
+ st.plotly_chart(fig, use_container_width=True)
293
+
294
+ else:
295
+ st.error("No columns matching the entered pattern were found.")
296
  else:
297
  st.info("This section of the program is under development.")
298
  else: