AMKhakbaz commited on
Commit
fc8e87b
·
verified ·
1 Parent(s): 5e088e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +325 -305
app.py CHANGED
@@ -499,6 +499,36 @@ def hierarchical_clustering_with_plotly(df, linkage_method):
499
 
500
  return df
501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
503
 
504
  with main_col:
@@ -514,329 +544,319 @@ import streamlit as st
514
 
515
  st.markdown('[Click to register a suggestion or comment](https://docs.google.com/forms/d/e/1FAIpQLScLyP7bBbqMfGdspjL7Ij64UZ6v2KjqjKNbm8gwEsgWsFs_Qg/viewform?usp=header)')
516
 
 
 
517
 
518
- # Main options
 
519
 
520
- uploaded_file = st.file_uploader("Please upload your Excel file", type=["xlsx", "xls"])
521
- if uploaded_file:
522
- try:
523
- df = pd.read_excel(uploaded_file)
524
- st.subheader("Displaying the first few rows of the DataFrame")
525
- st.dataframe(df.head())
526
 
527
- cols = edit_strings(df.columns)
528
- cols = sorted(list(set(cols)))
529
 
530
- main_option = st.selectbox("Please select an option:", ["Tabulation", "Funnel Analysis", "Segmentation Analysis", "Hypothesis test", "Machine Learning", "Coding"])
 
 
531
 
532
- if main_option == "Tabulation":
533
- st.header("Tabulation Analysis")
534
-
535
- tabulation_option = st.selectbox("Please select the type of analysis:", ["Univariate", "Multivariate", "All"])
536
 
537
- if tabulation_option == "All":
538
-
539
- st.sidebar.header("Settings")
 
 
 
 
 
 
 
 
 
 
 
540
 
541
- main_dict = {"single": [], "multi": [], "score": []}
542
-
543
- st.sidebar.subheader("Main")
544
- main_dict["single"] = st.sidebar.multiselect(
545
- 'Main: Single answer questions',
546
- cols,
547
- default=[]
548
- )
549
-
550
- main_dict["multi"] = st.sidebar.multiselect(
551
- 'Main: Multi answer questions',
552
- cols,
553
- default=[]
554
- )
555
-
556
- main_dict["score"] = st.sidebar.multiselect(
557
- 'Main: Score answer questions',
558
- cols,
559
- default=[]
560
- )
561
-
562
- follow_dict = {"single": [], "multi": [], "score": []}
563
-
564
- st.sidebar.subheader("Follow")
565
- follow_dict["single"] = st.sidebar.multiselect(
566
- 'Follow: Single answer questions',
567
- cols,
568
- default=[]
569
- )
570
-
571
- follow_dict["multi"] = st.sidebar.multiselect(
572
- 'Follow: Multi answer questions',
573
- cols,
574
- default=[]
575
- )
576
-
577
- follow_dict["score"] = st.sidebar.multiselect(
578
- 'Follow: Score answer questions',
579
- cols,
580
- default=[]
581
- )
582
-
583
- all_tabulation(df, main_dict, follow_dict)
584
-
585
- elif tabulation_option == "Univariate":
586
- uni_option = st.selectbox("Select the type of univariate analysis:", ["Multiple answer", "Single answer", "Score answer"])
587
-
588
- if uni_option == "Single answer":
589
- var = st.text_input("Please enter the name of the desired column:")
590
- if var:
591
- if var in df.columns:
592
- result_df = single_answer(df[var])
593
- st.subheader("Univariate Analysis Results")
594
- st.dataframe(result_df)
595
-
596
- fig = figo('Bar', result_df["Percentage"][:-1, ], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
597
- st.plotly_chart(fig, use_container_width=True)
598
- else:
599
- st.error("The entered column was not found.")
600
- elif uni_option == "Multiple answer":
601
- var = st.text_input("Please enter the name of the desired column:")
602
- if var:
603
- matching_cols = [col for col in df.columns if is_matching_pattern(col, var)]
604
- if matching_cols:
605
- subset_df = df[matching_cols]
606
- result_df = multi_answer(subset_df)
607
-
608
- st.subheader("Multiple Answer Analysis Results")
609
- st.dataframe(result_df)
610
-
611
- fig = figo('Bar', result_df["Percentage"][:-1], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
612
- st.plotly_chart(fig, use_container_width=True)
613
- else:
614
- st.error("No columns matching the entered pattern were found.")
615
-
616
- elif uni_option == "Score answer":
617
- var = st.text_input("Please enter the name of the desired column:")
618
- if var:
619
- subset_df = df[var]
620
- result_df = score_answer(subset_df)
621
-
622
- st.subheader("Score Answer Analysis Results")
623
- st.dataframe(result_df)
624
-
625
- fig = figo('Bar', result_df["Percentage"][:-2], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
626
- st.plotly_chart(fig, use_container_width=True)
627
- else:
628
- st.error("No columns matching the entered pattern were found.")
629
 
630
- elif tabulation_option == "Multivariate":
631
- st.subheader("Multivariate Analysis")
632
- var1 = st.text_input("Please enter the name of the first column:")
633
- var2 = st.text_input("Please enter the name of the second column:")
634
-
635
- if var1 and var2:
636
- type1 = st.selectbox("Select the type of analysis for the first column:", ["Multiple answer", "Single answer"], key='type1')
637
- type2 = st.selectbox("Select the type of analysis for the second column:", ["Multiple answer", "Single answer", "Score answer"], key='type2')
638
-
639
- if type1 == "Single answer" and type2 == "Single answer":
640
- percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
641
- st.subheader("Percentage Table")
642
- st.write(z_test_data(percentile_df))
643
-
644
- st.subheader("Frequency Table")
645
- st.dataframe(frequency_df)
646
-
647
- row, col = df.shape
648
- fig = figo('Scatter', percentile_df.iloc[:-1,:], title='Percentage Scatter plot', width=(col*5)+5, height=(row*25) + 10)
649
- st.plotly_chart(fig, use_container_width=True)
650
-
651
- elif type1 == "Single answer" and type2 == "Multiple answer":
652
- matching_cols = [col for col in df.columns if is_matching_pattern(col, var2)]
653
- if matching_cols:
654
- percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
655
- st.subheader("Percentage Table")
656
- st.write(z_test_data(percentile_df))
657
-
658
- st.subheader("Frequency Table")
659
- st.dataframe(frequency_df)
660
-
661
- row, col = df.shape
662
- fig = figo('Scatter', percentile_df.iloc[:-1,:], title='Percentage Scatter plot', width=(col*5)+5, height=(row*25) + 10)
663
- st.plotly_chart(fig, use_container_width=True)
664
-
665
- else:
666
- st.error("No columns matching the entered pattern were found.")
667
-
668
- elif type1 == "Multiple answer" and type2 == "Multiple answer":
669
- matching_cols1 = [col for col in df.columns if is_matching_pattern(col, var1)]
670
- matching_cols2 = [col for col in df.columns if is_matching_pattern(col, var2)]
671
- if matching_cols1 and matching_cols2:
672
- percentile_df, frequency_df = two_variable_mm(df[matching_cols1 + matching_cols2], matching_cols1, matching_cols2)
673
- st.subheader("Percentage Table")
674
- st.write(z_test_data(percentile_df))
675
-
676
- st.subheader("Frequency Table")
677
- st.dataframe(frequency_df)
678
-
679
- row, col = df.shape
680
- fig = figo('Scatter', percentile_df.iloc[:-1,:], title='Percentage Scatter plot', width=(col*5)+5, height=(row*25) + 10)
681
- st.plotly_chart(fig, use_container_width=True)
682
-
683
- elif type1 == "Single answer" and type2 == "Score answer":
684
-
685
- mean_df = two_variable_ssc(df[[var1, var2]], var1, var2)
686
- st.subheader("Mean Table")
687
- st.write(t_test_data(mean_df))
688
-
689
- row, col = df.shape
690
- fig = figo('Bar', mean_df["Mean"][:-1], title='Mean Histogram', xlabel=var1, ylabel='Mean', colorscale='Plotly3')
691
- st.plotly_chart(fig, use_container_width=True)
692
-
693
-
694
- elif type1 == "Multiple answer" and type2 == "Score answer":
695
- matching_cols1 = [col for col in df.columns if is_matching_pattern(col, var1)]
696
- if matching_cols1:
697
- mean_df = two_variable_msc(df[matching_cols1 + [var2]], matching_cols1, var2)
698
- st.subheader("Mean Table")
699
- st.write(t_test_data(mean_df))
700
-
701
- row, col = df.shape
702
- fig = figo('Bar', mean_df["Mean"][:-1], title='Mean Histogram', xlabel=var1, ylabel='Mean', colorscale='Plotly3')
703
- st.plotly_chart(fig, use_container_width=True)
704
- else:
705
- st.info("This section of the program is under development.")
706
-
707
- elif main_option == "Funnel Analysis":
708
- st.header("Funnel")
709
-
710
- st.sidebar.header("Funnel Settings")
711
- single_list = st.sidebar.multiselect(
712
- 'Single answer questions',
713
- cols,
714
- default=[]
715
- )
716
-
717
- multi_list = st.sidebar.multiselect(
718
- 'Multi answer questions',
719
- cols,
720
- default=[]
721
- )
722
- selected_dict = {}
723
-
724
- for option in single_list:
725
- selected_dict[option] = "Single"
726
- for option in multi_list:
727
- selected_dict[option] = "Multi"
728
-
729
- funnel_frequency, funnel_percentage = funnel(df, selected_dict)
730
- st.subheader("Percentage Table")
731
- st.dataframe(funnel_percentage)
732
-
733
- st.subheader("Frequency Table")
734
- st.dataframe(funnel_frequency)
735
-
736
- st.sidebar.header("Chart Settings")
737
- bar_columns = st.sidebar.multiselect('Which columns should be displayed as bar charts?', sorted(funnel_percentage.columns))
738
- line_columns = st.sidebar.multiselect('Which columns should be displayed as line charts?', sorted(funnel_percentage.columns))
739
 
740
- funnel_percentage_cleaned = funnel_percentage.dropna(axis=0, how='all')
741
-
742
- fig = go.Figure()
743
-
744
- # Define modern and diverse color palette
745
- modern_colors = [
746
- "#FF6F61", "#6B5B95", "#88B04B", "#F7CAC9", "#92A8D1",
747
- "#955251", "#B565A7", "#009B77", "#DD4124", "#45B8AC"
748
- ]
749
-
750
- # Add Bar traces with transparency and custom colors
751
- for idx, col in enumerate(bar_columns):
752
- funnel_percentage_col = funnel_percentage_cleaned[col]
753
- fig.add_trace(
754
- go.Bar(
755
- x=funnel_percentage_cleaned.index,
756
- y=funnel_percentage_col,
757
- name=col,
758
- marker_color=modern_colors[idx % len(modern_colors)], # Cycle through colors
759
- opacity=0.8 # Set transparency
760
- )
761
- )
762
-
763
- # Add Line traces with transparency and custom colors
764
- for idx, col in enumerate(line_columns):
765
- funnel_percentage_col = funnel_percentage_cleaned[col]
766
- fig.add_trace(
767
- go.Scatter(
768
- x=funnel_percentage_cleaned.index,
769
- y=funnel_percentage_col,
770
- mode='lines',
771
- name=col,
772
- line=dict(color=modern_colors[(idx + len(bar_columns)) % len(modern_colors)]), # Cycle through colors
773
- opacity=0.8 # Set transparency
774
- )
775
- )
 
 
 
 
 
 
 
 
 
 
 
 
776
 
777
- fig.update_layout(
778
- title="Combined Bar and Line Chart",
779
- xaxis_title="Brands",
780
- yaxis_title="Percentage",
781
- template="plotly_dark",
782
- barmode="group",
783
- xaxis=dict(tickmode='linear')
784
- )
785
-
786
- st.plotly_chart(fig)
787
 
788
- elif main_option == "Segmentation Analysis":
789
- st.header("Segmentation Analysis")
790
-
791
- st.sidebar.header("Selection of questions")
792
- single_list = st.sidebar.multiselect(
793
- 'Single answer questions',
794
- cols,
795
- default=[]
796
- )
797
 
798
- multi_list = st.sidebar.multiselect(
799
- 'Multi answer questions',
800
- cols,
801
- default=[]
802
- )
803
 
804
- score_list = st.sidebar.multiselect(
805
- 'Score answer questions',
806
- cols,
807
- default=[]
808
- )
809
 
810
- matching_cols1 = []
811
- for i in multi_list:
812
- matching_cols1 += [col for col in df.columns if is_matching_pattern(col, i)]
813
 
814
- df_clean = process_dataframe(df[single_list + matching_cols1])
815
- st.subheader("Selected Table")
816
- st.dataframe(df_clean)
817
 
818
- linkage_method = st.sidebar.selectbox("Select the Linkage Method of Segmentation Analysis:", ['average', 'single', 'complete', 'weighted', 'centroid', 'median', 'ward'])
819
-
820
- df_cluster = hierarchical_clustering_with_plotly(df_clean, linkage_method)
821
 
822
- st.subheader("Cluster Table")
823
- st.dataframe(df_clean)
824
 
825
- elif main_option == "Hypothesis test":
826
- st.header("Hypothesis Testing")
827
- hypothesis_option = st.selectbox("Please select the type of hypothesis test:", ["Z test", "T test", "Chi-Square test", "ANOVA test"])
828
-
829
- if hypothesis_option != "Z test":
830
- st.info("This section of the program is under development.")
 
 
 
 
831
  else:
832
- uploaded_file = st.file_uploader("Please upload your Excel file for Z-Test", type=["xlsx", "xls"])
833
- if uploaded_file:
834
- result = analyze_z_test(uploaded_file)
835
- if result:
836
- st.success("Z-Test analysis completed successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
 
838
- elif main_option in ["Machine Learning", "Coding"]:
839
- st.info("This section of the program is under development.")
 
 
840
 
841
- except Exception as e:
842
- st.error(f" Error reading the Excel file: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
 
500
  return df
501
 
502
+
503
+ def upload_and_select_dataframe():
504
+ st.sidebar.title("File Upload")
505
+ uploaded_files = st.sidebar.file_uploader("Choose CSV or Excel files", type=["csv", "xlsx", "xls", "xlsb"], accept_multiple_files=True)
506
+
507
+ dataframes = {}
508
+ for uploaded_file in uploaded_files:
509
+ try:
510
+ if uploaded_file.name.endswith(('.csv')):
511
+ df = pd.read_csv(uploaded_file)
512
+ elif uploaded_file.name.endswith(('.xls', '.xlsx', '.xlsb')):
513
+ df = pd.read_excel(uploaded_file)
514
+ else:
515
+ st.sidebar.error(f"Unsupported file type: {uploaded_file.name}")
516
+ continue
517
+ dataframes[uploaded_file.name] = df
518
+ except Exception as e:
519
+ st.sidebar.error(f"Error reading {uploaded_file.name}: {e}")
520
+
521
+ if len(uploaded_files) > 7:
522
+ st.sidebar.error('Maximum 7 files can be uploaded.')
523
+ return None
524
+
525
+ if dataframes:
526
+ selected_file = st.sidebar.selectbox("Select a DataFrame", list(dataframes.keys()))
527
+ return dataframes[selected_file]
528
+ else:
529
+ st.sidebar.info("Please upload some files.")
530
+ return None
531
+
532
  empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
533
 
534
  with main_col:
 
544
 
545
  st.markdown('[Click to register a suggestion or comment](https://docs.google.com/forms/d/e/1FAIpQLScLyP7bBbqMfGdspjL7Ij64UZ6v2KjqjKNbm8gwEsgWsFs_Qg/viewform?usp=header)')
546
 
547
+ st.subheader("Displaying the first few rows of the DataFrame")
548
+ st.dataframe(df.head())
549
 
550
+ cols = edit_strings(df.columns)
551
+ cols = sorted(list(set(cols)))
552
 
553
+ main_option = st.selectbox("Please select an option:", ["Tabulation", "Funnel Analysis", "Segmentation Analysis", "Hypothesis test", "Machine Learning", "Coding"])
 
 
 
 
 
554
 
555
+ if main_option == "Tabulation":
556
+ st.header("Tabulation Analysis")
557
 
558
+ tabulation_option = st.selectbox("Please select the type of analysis:", ["Univariate", "Multivariate", "All"])
559
+
560
+ if tabulation_option == "All":
561
 
562
+ st.sidebar.header("Settings")
 
 
 
563
 
564
+ main_dict = {"single": [], "multi": [], "score": []}
565
+
566
+ st.sidebar.subheader("Main")
567
+ main_dict["single"] = st.sidebar.multiselect(
568
+ 'Main: Single answer questions',
569
+ cols,
570
+ default=[]
571
+ )
572
+
573
+ main_dict["multi"] = st.sidebar.multiselect(
574
+ 'Main: Multi answer questions',
575
+ cols,
576
+ default=[]
577
+ )
578
 
579
+ main_dict["score"] = st.sidebar.multiselect(
580
+ 'Main: Score answer questions',
581
+ cols,
582
+ default=[]
583
+ )
584
+
585
+ follow_dict = {"single": [], "multi": [], "score": []}
586
+
587
+ st.sidebar.subheader("Follow")
588
+ follow_dict["single"] = st.sidebar.multiselect(
589
+ 'Follow: Single answer questions',
590
+ cols,
591
+ default=[]
592
+ )
593
+
594
+ follow_dict["multi"] = st.sidebar.multiselect(
595
+ 'Follow: Multi answer questions',
596
+ cols,
597
+ default=[]
598
+ )
599
+
600
+ follow_dict["score"] = st.sidebar.multiselect(
601
+ 'Follow: Score answer questions',
602
+ cols,
603
+ default=[]
604
+ )
605
+
606
+ all_tabulation(df, main_dict, follow_dict)
607
+
608
+ elif tabulation_option == "Univariate":
609
+ uni_option = st.selectbox("Select the type of univariate analysis:", ["Multiple answer", "Single answer", "Score answer"])
610
+
611
+ if uni_option == "Single answer":
612
+ var = st.text_input("Please enter the name of the desired column:")
613
+ if var:
614
+ if var in df.columns:
615
+ result_df = single_answer(df[var])
616
+ st.subheader("Univariate Analysis Results")
617
+ st.dataframe(result_df)
618
+
619
+ fig = figo('Bar', result_df["Percentage"][:-1, ], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
620
+ st.plotly_chart(fig, use_container_width=True)
621
+ else:
622
+ st.error("The entered column was not found.")
623
+ elif uni_option == "Multiple answer":
624
+ var = st.text_input("Please enter the name of the desired column:")
625
+ if var:
626
+ matching_cols = [col for col in df.columns if is_matching_pattern(col, var)]
627
+ if matching_cols:
628
+ subset_df = df[matching_cols]
629
+ result_df = multi_answer(subset_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
+ st.subheader("Multiple Answer Analysis Results")
632
+ st.dataframe(result_df)
633
+
634
+ fig = figo('Bar', result_df["Percentage"][:-1], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
635
+ st.plotly_chart(fig, use_container_width=True)
636
+ else:
637
+ st.error("No columns matching the entered pattern were found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
 
639
+ elif uni_option == "Score answer":
640
+ var = st.text_input("Please enter the name of the desired column:")
641
+ if var:
642
+ subset_df = df[var]
643
+ result_df = score_answer(subset_df)
644
+
645
+ st.subheader("Score Answer Analysis Results")
646
+ st.dataframe(result_df)
647
+
648
+ fig = figo('Bar', result_df["Percentage"][:-2], title='Percentage Histogram', xlabel=var, ylabel='Percentage', colorscale='Plotly3')
649
+ st.plotly_chart(fig, use_container_width=True)
650
+ else:
651
+ st.error("No columns matching the entered pattern were found.")
652
+
653
+ elif tabulation_option == "Multivariate":
654
+ st.subheader("Multivariate Analysis")
655
+ var1 = st.text_input("Please enter the name of the first column:")
656
+ var2 = st.text_input("Please enter the name of the second column:")
657
+
658
+ if var1 and var2:
659
+ type1 = st.selectbox("Select the type of analysis for the first column:", ["Multiple answer", "Single answer"], key='type1')
660
+ type2 = st.selectbox("Select the type of analysis for the second column:", ["Multiple answer", "Single answer", "Score answer"], key='type2')
661
+
662
+ if type1 == "Single answer" and type2 == "Single answer":
663
+ percentile_df, frequency_df = two_variable_ss(df[[var1, var2]], var1, var2)
664
+ st.subheader("Percentage Table")
665
+ st.write(z_test_data(percentile_df))
666
+
667
+ st.subheader("Frequency Table")
668
+ st.dataframe(frequency_df)
669
+
670
+ row, col = df.shape
671
+ fig = figo('Scatter', percentile_df.iloc[:-1,:], title='Percentage Scatter plot', width=(col*5)+5, height=(row*25) + 10)
672
+ st.plotly_chart(fig, use_container_width=True)
673
+
674
+ elif type1 == "Single answer" and type2 == "Multiple answer":
675
+ matching_cols = [col for col in df.columns if is_matching_pattern(col, var2)]
676
+ if matching_cols:
677
+ percentile_df, frequency_df = two_variable_sm(df[[var1] + matching_cols], var1, matching_cols)
678
+ st.subheader("Percentage Table")
679
+ st.write(z_test_data(percentile_df))
680
+
681
+ st.subheader("Frequency Table")
682
+ st.dataframe(frequency_df)
683
+
684
+ row, col = df.shape
685
+ fig = figo('Scatter', percentile_df.iloc[:-1,:], title='Percentage Scatter plot', width=(col*5)+5, height=(row*25) + 10)
686
+ st.plotly_chart(fig, use_container_width=True)
687
 
688
+ else:
689
+ st.error("No columns matching the entered pattern were found.")
 
 
 
 
 
 
 
 
690
 
691
+ elif type1 == "Multiple answer" and type2 == "Multiple answer":
692
+ matching_cols1 = [col for col in df.columns if is_matching_pattern(col, var1)]
693
+ matching_cols2 = [col for col in df.columns if is_matching_pattern(col, var2)]
694
+ if matching_cols1 and matching_cols2:
695
+ percentile_df, frequency_df = two_variable_mm(df[matching_cols1 + matching_cols2], matching_cols1, matching_cols2)
696
+ st.subheader("Percentage Table")
697
+ st.write(z_test_data(percentile_df))
 
 
698
 
699
+ st.subheader("Frequency Table")
700
+ st.dataframe(frequency_df)
 
 
 
701
 
702
+ row, col = df.shape
703
+ fig = figo('Scatter', percentile_df.iloc[:-1,:], title='Percentage Scatter plot', width=(col*5)+5, height=(row*25) + 10)
704
+ st.plotly_chart(fig, use_container_width=True)
 
 
705
 
706
+ elif type1 == "Single answer" and type2 == "Score answer":
 
 
707
 
708
+ mean_df = two_variable_ssc(df[[var1, var2]], var1, var2)
709
+ st.subheader("Mean Table")
710
+ st.write(t_test_data(mean_df))
711
 
712
+ row, col = df.shape
713
+ fig = figo('Bar', mean_df["Mean"][:-1], title='Mean Histogram', xlabel=var1, ylabel='Mean', colorscale='Plotly3')
714
+ st.plotly_chart(fig, use_container_width=True)
715
 
 
 
716
 
717
+ elif type1 == "Multiple answer" and type2 == "Score answer":
718
+ matching_cols1 = [col for col in df.columns if is_matching_pattern(col, var1)]
719
+ if matching_cols1:
720
+ mean_df = two_variable_msc(df[matching_cols1 + [var2]], matching_cols1, var2)
721
+ st.subheader("Mean Table")
722
+ st.write(t_test_data(mean_df))
723
+
724
+ row, col = df.shape
725
+ fig = figo('Bar', mean_df["Mean"][:-1], title='Mean Histogram', xlabel=var1, ylabel='Mean', colorscale='Plotly3')
726
+ st.plotly_chart(fig, use_container_width=True)
727
  else:
728
+ st.info("This section of the program is under development.")
729
+
730
+ elif main_option == "Funnel Analysis":
731
+ st.header("Funnel")
732
+
733
+ st.sidebar.header("Funnel Settings")
734
+ single_list = st.sidebar.multiselect(
735
+ 'Single answer questions',
736
+ cols,
737
+ default=[]
738
+ )
739
+
740
+ multi_list = st.sidebar.multiselect(
741
+ 'Multi answer questions',
742
+ cols,
743
+ default=[]
744
+ )
745
+ selected_dict = {}
746
+
747
+ for option in single_list:
748
+ selected_dict[option] = "Single"
749
+ for option in multi_list:
750
+ selected_dict[option] = "Multi"
751
+
752
+ funnel_frequency, funnel_percentage = funnel(df, selected_dict)
753
+ st.subheader("Percentage Table")
754
+ st.dataframe(funnel_percentage)
755
+
756
+ st.subheader("Frequency Table")
757
+ st.dataframe(funnel_frequency)
758
+
759
+ st.sidebar.header("Chart Settings")
760
+ bar_columns = st.sidebar.multiselect('Which columns should be displayed as bar charts?', sorted(funnel_percentage.columns))
761
+ line_columns = st.sidebar.multiselect('Which columns should be displayed as line charts?', sorted(funnel_percentage.columns))
762
+
763
+ funnel_percentage_cleaned = funnel_percentage.dropna(axis=0, how='all')
764
+
765
+ fig = go.Figure()
766
+
767
+ # Define modern and diverse color palette
768
+ modern_colors = [
769
+ "#FF6F61", "#6B5B95", "#88B04B", "#F7CAC9", "#92A8D1",
770
+ "#955251", "#B565A7", "#009B77", "#DD4124", "#45B8AC"
771
+ ]
772
+
773
+ # Add Bar traces with transparency and custom colors
774
+ for idx, col in enumerate(bar_columns):
775
+ funnel_percentage_col = funnel_percentage_cleaned[col]
776
+ fig.add_trace(
777
+ go.Bar(
778
+ x=funnel_percentage_cleaned.index,
779
+ y=funnel_percentage_col,
780
+ name=col,
781
+ marker_color=modern_colors[idx % len(modern_colors)], # Cycle through colors
782
+ opacity=0.8 # Set transparency
783
+ )
784
+ )
785
+
786
+ # Add Line traces with transparency and custom colors
787
+ for idx, col in enumerate(line_columns):
788
+ funnel_percentage_col = funnel_percentage_cleaned[col]
789
+ fig.add_trace(
790
+ go.Scatter(
791
+ x=funnel_percentage_cleaned.index,
792
+ y=funnel_percentage_col,
793
+ mode='lines',
794
+ name=col,
795
+ line=dict(color=modern_colors[(idx + len(bar_columns)) % len(modern_colors)]), # Cycle through colors
796
+ opacity=0.8 # Set transparency
797
+ )
798
+ )
799
+
800
+ fig.update_layout(
801
+ title="Combined Bar and Line Chart",
802
+ xaxis_title="Brands",
803
+ yaxis_title="Percentage",
804
+ template="plotly_dark",
805
+ barmode="group",
806
+ xaxis=dict(tickmode='linear')
807
+ )
808
+
809
+ st.plotly_chart(fig)
810
+
811
+ elif main_option == "Segmentation Analysis":
812
+ st.header("Segmentation Analysis")
813
+
814
+ st.sidebar.header("Selection of questions")
815
+ single_list = st.sidebar.multiselect(
816
+ 'Single answer questions',
817
+ cols,
818
+ default=[]
819
+ )
820
+
821
+ multi_list = st.sidebar.multiselect(
822
+ 'Multi answer questions',
823
+ cols,
824
+ default=[]
825
+ )
826
+
827
+ score_list = st.sidebar.multiselect(
828
+ 'Score answer questions',
829
+ cols,
830
+ default=[]
831
+ )
832
+
833
+ matching_cols1 = []
834
+ for i in multi_list:
835
+ matching_cols1 += [col for col in df.columns if is_matching_pattern(col, i)]
836
+
837
+ df_clean = process_dataframe(df[single_list + matching_cols1])
838
+ st.subheader("Selected Table")
839
+ st.dataframe(df_clean)
840
+
841
+ linkage_method = st.sidebar.selectbox("Select the Linkage Method of Segmentation Analysis:", ['average', 'single', 'complete', 'weighted', 'centroid', 'median', 'ward'])
842
 
843
+ df_cluster = hierarchical_clustering_with_plotly(df_clean, linkage_method)
844
+
845
+ st.subheader("Cluster Table")
846
+ st.dataframe(df_clean)
847
 
848
+ elif main_option == "Hypothesis test":
849
+ st.header("Hypothesis Testing")
850
+ hypothesis_option = st.selectbox("Please select the type of hypothesis test:", ["Z test", "T test", "Chi-Square test", "ANOVA test"])
851
+
852
+ if hypothesis_option != "Z test":
853
+ st.info("This section of the program is under development.")
854
+ else:
855
+ uploaded_file = st.file_uploader("Please upload your Excel file for Z-Test", type=["xlsx", "xls"])
856
+ if uploaded_file:
857
+ result = analyze_z_test(uploaded_file)
858
+ if result:
859
+ st.success("Z-Test analysis completed successfully.")
860
+
861
+ elif main_option in ["Machine Learning", "Coding"]:
862
+ st.info("This section of the program is under development.")