Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -113,7 +113,7 @@ def multi_answer(df):
|
|
113 |
friquency[i] = 0
|
114 |
|
115 |
|
116 |
-
friquency_dataframe = pd.DataFrame({"Value": friquency.keys(),
|
117 |
friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
|
118 |
return friquency_dataframe
|
119 |
|
@@ -236,6 +236,34 @@ def two_variable_msc(df, var1, var2):
|
|
236 |
|
237 |
return mean_dataframe
|
238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
def t_test(m1, m2, n1, n2, v1, v2):
|
240 |
te = (m1 - m2) / ((v1/n1 + v2/n2)**0.5)
|
241 |
p_value = 2 * (1 - t.cdf(abs(te), n1+n2-2))
|
@@ -364,7 +392,7 @@ def analyze_z_test(file):
|
|
364 |
st.title("Data Analysis Application")
|
365 |
|
366 |
# Main options
|
367 |
-
main_option = st.selectbox("Please select an option:", ["Tabulation", "Hypothesis test", "Machine Learning", "Coding"])
|
368 |
|
369 |
if main_option == "Tabulation":
|
370 |
st.header("Tabulation Analysis")
|
@@ -504,6 +532,56 @@ if main_option == "Tabulation":
|
|
504 |
except Exception as e:
|
505 |
st.error(f"❌ Error reading the Excel file: {e}")
|
506 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
elif main_option == "Hypothesis test":
|
508 |
st.header("Hypothesis Testing")
|
509 |
hypothesis_option = st.selectbox("Please select the type of hypothesis test:", ["Z test", "T test", "Chi-Square test", "ANOVA test"])
|
|
|
113 |
friquency[i] = 0
|
114 |
|
115 |
|
116 |
+
friquency_dataframe = pd.DataFrame({"Value": friquency.keys(), 'Frequency': friquency.values(), "Percentage": np.array(list(friquency.values()))/len(df.dropna(how='all'))*100}).sort_values(by='Value')
|
117 |
friquency_dataframe.loc[len(friquency_dataframe)] = ['Sample_size', len(df.dropna(how='all')), 1]
|
118 |
return friquency_dataframe
|
119 |
|
|
|
236 |
|
237 |
return mean_dataframe
|
238 |
|
239 |
+
def funnel(df, dictionary):
|
240 |
+
friquency = {}
|
241 |
+
for i in dictionary.keys():
|
242 |
+
if dictionary[i] == "Single":
|
243 |
+
friquency[i] = list(single_answer(df[i])['Frequency'])[:-1]
|
244 |
+
|
245 |
+
elif dictionary[i] == "Multi":
|
246 |
+
matching_cols = [col for col in df.columns if is_matching_pattern(col, i)]
|
247 |
+
friquency[i] = list(multi_answer(df[matching_cols])['Frequency'])[:-1]
|
248 |
+
|
249 |
+
elif dictionary[i] == "Score":
|
250 |
+
friquency[i] = list(score_answer(df[i])['Frequency'])[:-1]
|
251 |
+
|
252 |
+
try:
|
253 |
+
friquency = pd.DataFrame(friquency)
|
254 |
+
except:
|
255 |
+
friquency = equalize_list_lengths(friquency)
|
256 |
+
|
257 |
+
first = None
|
258 |
+
for key, value in dictionary.items():
|
259 |
+
if value == "Single":
|
260 |
+
first = key
|
261 |
+
break
|
262 |
+
|
263 |
+
percentage = friquency/len(df[first])
|
264 |
+
|
265 |
+
return friquency, percentage
|
266 |
+
|
267 |
def t_test(m1, m2, n1, n2, v1, v2):
|
268 |
te = (m1 - m2) / ((v1/n1 + v2/n2)**0.5)
|
269 |
p_value = 2 * (1 - t.cdf(abs(te), n1+n2-2))
|
|
|
392 |
st.title("Data Analysis Application")
|
393 |
|
394 |
# Main options
|
395 |
+
main_option = st.selectbox("Please select an option:", ["Tabulation", "Funnel", "Hypothesis test", "Machine Learning", "Coding"])
|
396 |
|
397 |
if main_option == "Tabulation":
|
398 |
st.header("Tabulation Analysis")
|
|
|
532 |
except Exception as e:
|
533 |
st.error(f"❌ Error reading the Excel file: {e}")
|
534 |
|
535 |
+
elif main_option == "Funnel":
|
536 |
+
st.header("Funnel")
|
537 |
+
|
538 |
+
uploaded_file = st.file_uploader("Please upload your Excel file", type=["xlsx", "xls"])
|
539 |
+
if uploaded_file:
|
540 |
+
try:
|
541 |
+
#df = pd.read_excel(uploaded_file)
|
542 |
+
#st.subheader("Displaying the first few rows of the DataFrame")
|
543 |
+
|
544 |
+
data = {
|
545 |
+
'A': [10, 20, 30, np.nan, np.nan],
|
546 |
+
'B': [5, 10, 15, 20, 25],
|
547 |
+
'C': [np.nan, 100, 150, 200, 250],
|
548 |
+
'D': [100, np.nan, 200, 300, 400]
|
549 |
+
}
|
550 |
+
df = pd.DataFrame(data)
|
551 |
+
|
552 |
+
st.dataframe(df.head())
|
553 |
+
st.sidebar.header("Chart Settings")
|
554 |
+
bar_columns = st.sidebar.multiselect('Which columns should be displayed as bar charts?', df.columns)
|
555 |
+
line_columns = st.sidebar.multiselect('Which columns should be displayed as line charts?', df.columns)
|
556 |
+
|
557 |
+
df_cleaned = df.dropna(axis=0, how='all')
|
558 |
+
|
559 |
+
fig = go.Figure()
|
560 |
+
|
561 |
+
for col in bar_columns:
|
562 |
+
df_col = df_cleaned[col]
|
563 |
+
fig.add_trace(go.Bar(x=df_cleaned.index, y=df_col, name=col))
|
564 |
+
|
565 |
+
for col in line_columns:
|
566 |
+
df_col = df_cleaned[col]
|
567 |
+
fig.add_trace(go.Scatter(x=df_cleaned.index, y=df_col, mode='lines', name=col))
|
568 |
+
|
569 |
+
fig.update_layout(
|
570 |
+
title="Combined Bar and Line Chart",
|
571 |
+
xaxis_title="Rows",
|
572 |
+
yaxis_title="Value",
|
573 |
+
template="plotly_dark",
|
574 |
+
barmode="group",
|
575 |
+
xaxis=dict(tickmode='linear')
|
576 |
+
)
|
577 |
+
|
578 |
+
st.plotly_chart(fig)
|
579 |
+
|
580 |
+
|
581 |
+
except Exception as e:
|
582 |
+
st.error(f"❌ Error reading the Excel file: {e}")
|
583 |
+
|
584 |
+
|
585 |
elif main_option == "Hypothesis test":
|
586 |
st.header("Hypothesis Testing")
|
587 |
hypothesis_option = st.selectbox("Please select the type of hypothesis test:", ["Z test", "T test", "Chi-Square test", "ANOVA test"])
|