Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -492,6 +492,24 @@ def process_dataframe(df):
|
|
492 |
df[col] = pd.Categorical(df[col])
|
493 |
return df
|
494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
|
496 |
|
497 |
with main_col:
|
@@ -808,9 +826,17 @@ if uploaded_file:
|
|
808 |
|
809 |
matching_cols1 = []
|
810 |
for i in multi_list:
|
811 |
-
matching_cols1 += [col for col in df.columns if is_matching_pattern(col,
|
812 |
|
813 |
df_clean = process_dataframe(df[single_list + matching_cols1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
814 |
st.dataframe(df_clean)
|
815 |
|
816 |
|
|
|
492 |
df[col] = pd.Categorical(df[col])
|
493 |
return df
|
494 |
|
495 |
+
def hierarchical_clustering_with_plotly(df, linkage_method):
|
496 |
+
|
497 |
+
df_encoded = df.apply(lambda x: pd.factorize(x)[0])
|
498 |
+
|
499 |
+
Z = linkage(df_encoded, method=linkage_method)
|
500 |
+
|
501 |
+
fig = ff.create_dendrogram(df_encoded, linkagefun=lambda x: Z, orientation='bottom')
|
502 |
+
fig.update_layout(width=800, height=500)
|
503 |
+
st.plotly_chart(fig)
|
504 |
+
|
505 |
+
num_clusters = int(input("Enter the desired number of clusters: "))
|
506 |
+
|
507 |
+
clusters = fcluster(Z, num_clusters, criterion='maxclust')
|
508 |
+
|
509 |
+
df['Cluster'] = clusters
|
510 |
+
|
511 |
+
return df
|
512 |
+
|
513 |
empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
|
514 |
|
515 |
with main_col:
|
|
|
826 |
|
827 |
matching_cols1 = []
|
828 |
for i in multi_list:
|
829 |
+
matching_cols1 += [col for col in df.columns if is_matching_pattern(col, i)]
|
830 |
|
831 |
df_clean = process_dataframe(df[single_list + matching_cols1])
|
832 |
+
st.subheader("Selected Table")
|
833 |
+
st.dataframe(df_clean)
|
834 |
+
|
835 |
+
linkage_method = st.sidebar.selectbox("Select the Linkage Method of Segmentation Analysis:", ["Hierarchical Clustering"])
|
836 |
+
|
837 |
+
df_cluster = hierarchical_clustering_with_plotly(df_clean, linkage_method)
|
838 |
+
|
839 |
+
st.subheader("Cluster Table")
|
840 |
st.dataframe(df_clean)
|
841 |
|
842 |
|