Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -82,6 +82,16 @@ def single_answer(df):
|
|
82 |
friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
|
83 |
return friquency_dataframe
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def two_variable_ss(df, var1, var2):
|
86 |
|
87 |
counter = df.groupby(var1)[var2].value_counts()
|
@@ -162,6 +172,24 @@ def z_testes(n1, n2, p1, p2):
|
|
162 |
except ZeroDivisionError:
|
163 |
return np.nan
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
def Z_test_dataframes(sheets_data):
|
166 |
"""Processes each sheet's DataFrame and computes new DataFrames with Z-test results."""
|
167 |
result_dataframes = {}
|
@@ -255,7 +283,7 @@ if main_option == "Tabulation":
|
|
255 |
if tabulation_option == "All":
|
256 |
st.info("This section of the program is under development.")
|
257 |
elif tabulation_option == "Univariate":
|
258 |
-
uni_option = st.selectbox("Select the type of univariate analysis:", ["Multiple answer", "Single answer"])
|
259 |
|
260 |
if uni_option == "Single answer":
|
261 |
var = st.text_input("Please enter the name of the desired column:")
|
@@ -284,6 +312,21 @@ if main_option == "Tabulation":
|
|
284 |
st.plotly_chart(fig, use_container_width=True)
|
285 |
else:
|
286 |
st.error("No columns matching the entered pattern were found.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
elif tabulation_option == "Multivariate":
|
288 |
st.subheader("Multivariate Analysis")
|
289 |
var1 = st.text_input("Please enter the name of the first column:")
|
@@ -296,7 +339,7 @@ if main_option == "Tabulation":
|
|
296 |
if type1 == "Single answer" and type2 == "Single answer":
|
297 |
percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
|
298 |
st.subheader("Percentage Table")
|
299 |
-
st.dataframe(percentile_df)
|
300 |
|
301 |
st.subheader("Frequency Table")
|
302 |
st.dataframe(frequency_df)
|
@@ -309,7 +352,7 @@ if main_option == "Tabulation":
|
|
309 |
if matching_cols:
|
310 |
percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
|
311 |
st.subheader("Percentage Table")
|
312 |
-
st.dataframe(percentile_df)
|
313 |
|
314 |
st.subheader("Frequency Table")
|
315 |
st.dataframe(frequency_df)
|
@@ -326,7 +369,7 @@ if main_option == "Tabulation":
|
|
326 |
if matching_cols1 and matching_cols2:
|
327 |
percentile_df, frequency_df = two_variable_mm(df[matching_cols1 + matching_cols2], matching_cols1, matching_cols2)
|
328 |
st.subheader("Percentage Table")
|
329 |
-
st.dataframe(percentile_df)
|
330 |
|
331 |
st.subheader("Frequency Table")
|
332 |
st.dataframe(frequency_df)
|
|
|
82 |
friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna()), 1]
|
83 |
return friquency_dataframe
|
84 |
|
85 |
+
def score_answer(df):
|
86 |
+
counter = df.value_counts().sort_index()
|
87 |
+
|
88 |
+
friquency_dataframe = pd.DataFrame({
|
89 |
+
'Value': list(counter.index)+["Meen", "Variance"],
|
90 |
+
'Frequency': list(counter.values)+[df.mean(), df.var()],
|
91 |
+
'Percentage': list((counter.values / counter.sum()) * 100)+["", ""]})
|
92 |
+
|
93 |
+
return friquency_dataframe
|
94 |
+
|
95 |
def two_variable_ss(df, var1, var2):
|
96 |
|
97 |
counter = df.groupby(var1)[var2].value_counts()
|
|
|
172 |
except ZeroDivisionError:
|
173 |
return np.nan
|
174 |
|
175 |
+
def z_test_data(df):
|
176 |
+
|
177 |
+
rows, cols = df.shape
|
178 |
+
|
179 |
+
for i in range(rows-2):
|
180 |
+
for j in range(cols-2):
|
181 |
+
n1 = df.iloc[-1, -1] # x_IJ
|
182 |
+
n2 = df.iloc[-1, j] # x_Ij
|
183 |
+
p1 = df.iloc[i, -1] # x_1J
|
184 |
+
p2 = df.iloc[i, j] # x_ij
|
185 |
+
p_value = z_testes(n1, n2, p1, p2)
|
186 |
+
if p_value < 0.05:
|
187 |
+
df.iloc[i, j] = f'<span style="background-color:lightgreen">{df.iloc[i, j]}</span>'
|
188 |
+
else:
|
189 |
+
df.iloc[i, j] = f'<span style="background-color:lightgray">{df.iloc[i, j]}</span>'
|
190 |
+
|
191 |
+
return df
|
192 |
+
|
193 |
def Z_test_dataframes(sheets_data):
|
194 |
"""Processes each sheet's DataFrame and computes new DataFrames with Z-test results."""
|
195 |
result_dataframes = {}
|
|
|
283 |
if tabulation_option == "All":
|
284 |
st.info("This section of the program is under development.")
|
285 |
elif tabulation_option == "Univariate":
|
286 |
+
uni_option = st.selectbox("Select the type of univariate analysis:", ["Multiple answer", "Single answer", "Score answer"])
|
287 |
|
288 |
if uni_option == "Single answer":
|
289 |
var = st.text_input("Please enter the name of the desired column:")
|
|
|
312 |
st.plotly_chart(fig, use_container_width=True)
|
313 |
else:
|
314 |
st.error("No columns matching the entered pattern were found.")
|
315 |
+
|
316 |
+
elif uni_option == "Score answer":
|
317 |
+
var = st.text_input("Please enter the name of the desired column:")
|
318 |
+
if var:
|
319 |
+
subset_df = df[var]
|
320 |
+
result_df = score_answer(subset_df)
|
321 |
+
|
322 |
+
st.subheader("Score Answer Analysis Results")
|
323 |
+
st.dataframe(result_df)
|
324 |
+
|
325 |
+
fig = figo('Bar', result_df["Percentage"][:-1], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
|
326 |
+
st.plotly_chart(fig, use_container_width=True)
|
327 |
+
else:
|
328 |
+
st.error("No columns matching the entered pattern were found.")
|
329 |
+
|
330 |
elif tabulation_option == "Multivariate":
|
331 |
st.subheader("Multivariate Analysis")
|
332 |
var1 = st.text_input("Please enter the name of the first column:")
|
|
|
339 |
if type1 == "Single answer" and type2 == "Single answer":
|
340 |
percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
|
341 |
st.subheader("Percentage Table")
|
342 |
+
st.dataframe(z_test_data(percentile_df))
|
343 |
|
344 |
st.subheader("Frequency Table")
|
345 |
st.dataframe(frequency_df)
|
|
|
352 |
if matching_cols:
|
353 |
percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
|
354 |
st.subheader("Percentage Table")
|
355 |
+
st.dataframe(z_test_data(percentile_df))
|
356 |
|
357 |
st.subheader("Frequency Table")
|
358 |
st.dataframe(frequency_df)
|
|
|
369 |
if matching_cols1 and matching_cols2:
|
370 |
percentile_df, frequency_df = two_variable_mm(df[matching_cols1 + matching_cols2], matching_cols1, matching_cols2)
|
371 |
st.subheader("Percentage Table")
|
372 |
+
st.dataframe(z_test_data(percentile_df))
|
373 |
|
374 |
st.subheader("Frequency Table")
|
375 |
st.dataframe(frequency_df)
|