Mattral commited on
Commit
ae65663
·
verified ·
1 Parent(s): 30a2c91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -20
app.py CHANGED
@@ -7,6 +7,7 @@ import seaborn as sns
7
  import plotly.express as px
8
  import time
9
  from PIL import Image
 
10
 
11
  # Config
12
  page_icon = Image.open("./assets/logo.png")
@@ -176,14 +177,11 @@ if st.session_state.df is None:
176
 
177
  # Select
178
  elif uploading_way == "select":
179
- selected = st.selectbox("Select Dataset", ["Select", "Titanic Dataset", "Iris Dataset", "Wine Dataset", "Breast Cancer Dataset",
180
  "Diabetes Dataset", "Digits Dataset",
181
  "Olivetti Faces Dataset", "California Housing Dataset",
182
  "Covid-19 Dataset"])
183
 
184
- if selected == "Titanic Dataset":
185
- df = load_data("./data/titanic.csv")
186
- st.session_state.df = df
187
 
188
  elif selected == "Iris Dataset":
189
  from sklearn.datasets import load_iris
@@ -199,20 +197,6 @@ if st.session_state.df is None:
199
  df['target'] = wine.target
200
  st.session_state.df = df
201
 
202
- elif selected == "Breast Cancer Dataset":
203
- from sklearn.datasets import load_breast_cancer
204
- cancer = load_breast_cancer()
205
- df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
206
- df['target'] = cancer.target
207
- st.session_state.df = df
208
-
209
-
210
- elif selected == "Diabetes Dataset":
211
- from sklearn.datasets import load_diabetes
212
- diabetes = load_diabetes()
213
- df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
214
- df['target'] = diabetes.target
215
- st.session_state.df = df
216
 
217
  elif selected == "Digits Dataset":
218
  from sklearn.datasets import load_digits
@@ -310,7 +294,18 @@ if st.session_state.df is not None:
310
  st.write(pd.DataFrame(df.columns, columns=['Columns']).T)
311
  new_line()
312
 
313
-
 
 
 
 
 
 
 
 
 
 
 
314
  # Describe Numerical
315
  describe = st.checkbox("Show Description **(Numerical Features)**", value=False)
316
  new_line()
@@ -318,6 +313,12 @@ if st.session_state.df is not None:
318
  st.dataframe(df.describe(), use_container_width=True)
319
  new_line()
320
 
 
 
 
 
 
 
321
  # Describe Categorical
322
  describe_cat = st.checkbox("Show Description **(Categorical Features)**", value=False)
323
  new_line()
@@ -469,7 +470,7 @@ df.drop(columns={col_to_delete}, inplace=True)
469
 
470
  # Feature Importance (Only if a model has been trained)
471
  if 'trained_model' in st.session_state and st.session_state.trained_model is not None:
472
- feature_importance = st.checkbox("Show Feature Importance", value=False)
473
  new_line()
474
  if feature_importance:
475
  model = st.session_state.trained_model
@@ -479,7 +480,66 @@ df.drop(columns={col_to_delete}, inplace=True)
479
  ax.set_title('Feature Importance')
480
  st.pyplot(fig)
481
  new_line()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  # Interactive Data Tables
484
  interactive_table = st.checkbox("Show Interactive Data Table", value=False)
485
  new_line()
 
7
  import plotly.express as px
8
  import time
9
  from PIL import Image
10
+ from wordcloud import WordCloud
11
 
12
  # Config
13
  page_icon = Image.open("./assets/logo.png")
 
177
 
178
  # Select
179
  elif uploading_way == "select":
180
+ selected = st.selectbox("Select Dataset", ["Select", "Titanic Dataset","Iris Dataset", "Wine Dataset",
181
  "Diabetes Dataset", "Digits Dataset",
182
  "Olivetti Faces Dataset", "California Housing Dataset",
183
  "Covid-19 Dataset"])
184
 
 
 
 
185
 
186
  elif selected == "Iris Dataset":
187
  from sklearn.datasets import load_iris
 
197
  df['target'] = wine.target
198
  st.session_state.df = df
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  elif selected == "Digits Dataset":
202
  from sklearn.datasets import load_digits
 
294
  st.write(pd.DataFrame(df.columns, columns=['Columns']).T)
295
  new_line()
296
 
297
+ if st.checkbox("Check Data Types", value=False):
298
+ st.write(df.dtypes)
299
+ new_line()
300
+
301
+ if st.checkbox("Show Skewness and Kurtosis", value=False):
302
+ skew_kurt = pd.DataFrame(data={
303
+ 'Skewness': df.skew(),
304
+ 'Kurtosis': df.kurtosis()
305
+ })
306
+ st.write(skew_kurt)
307
+ new_line()
308
+
309
  # Describe Numerical
310
  describe = st.checkbox("Show Description **(Numerical Features)**", value=False)
311
  new_line()
 
313
  st.dataframe(df.describe(), use_container_width=True)
314
  new_line()
315
 
316
+ if st.checkbox("Unique Value Count", value=False):
317
+ unique_counts = pd.DataFrame(df.nunique()).rename(columns={0: 'Unique Count'})
318
+ st.write(unique_counts)
319
+ new_line()
320
+
321
+
322
  # Describe Categorical
323
  describe_cat = st.checkbox("Show Description **(Categorical Features)**", value=False)
324
  new_line()
 
470
 
471
  # Feature Importance (Only if a model has been trained)
472
  if 'trained_model' in st.session_state and st.session_state.trained_model is not None:
473
+ feature_importance = st.checkbox("Show Feature Importance (Only click after training or it will throw error)", value=False)
474
  new_line()
475
  if feature_importance:
476
  model = st.session_state.trained_model
 
480
  ax.set_title('Feature Importance')
481
  st.pyplot(fig)
482
  new_line()
483
+
484
+ if st.checkbox("Identify Outliers", value=False):
485
+ numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
486
+ col_for_outliers = st.selectbox("Select Column to Check Outliers", options=numeric_cols)
487
+ fig, ax = plt.subplots()
488
+ sns.boxplot(x=df[col_for_outliers], ax=ax)
489
+ ax.set_title(f'Outliers in {col_for_outliers}')
490
+ st.pyplot(fig)
491
+ new_line()
492
+
493
+ if st.checkbox("Show Pairwise Scatter Plots", value=False):
494
+ selected_cols = st.multiselect("Select Columns", options=df.columns, default=df.columns[:2])
495
+ sns.pairplot(df[selected_cols])
496
+ st.pyplot()
497
+ new_line()
498
+
499
+ if st.checkbox("Show Cross-tabulations", value=False):
500
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
501
+ x_col = st.selectbox("Select X-axis Column for Cross-tab", options=categorical_cols, index=0)
502
+ y_col = st.selectbox("Select Y-axis Column for Cross-tab", options=categorical_cols, index=1 if len(categorical_cols) > 1 else 0)
503
+ cross_tab = pd.crosstab(df[x_col], df[y_col])
504
+ st.write(cross_tab)
505
+ new_line()
506
+
507
+ if st.checkbox("Segmented Analysis", value=False):
508
+ segments = st.selectbox("Select Segment", options=df.columns)
509
+ segment_values = df[segments].dropna().unique()
510
+ selected_segment = st.selectbox("Choose Segment Value", options=segment_values)
511
+ segmented_data = df[df[segments] == selected_segment]
512
+ st.write(segmented_data)
513
+ new_line()
514
+
515
+ # Assuming 'date_column' is the name of your datetime column
516
+ if st.checkbox("Temporal Analysis", value=False):
517
+ fig, ax = plt.subplots()
518
+ df.set_index('date_column')['some_value'].plot(ax=ax)
519
+ ax.set_title('Trend Over Time')
520
+ st.pyplot(fig)
521
+ new_line()
522
+
523
 
524
+ if st.checkbox("Show Word Cloud", value=False):
525
+ text_col = st.selectbox("Select Text Column for Word Cloud", options=df.select_dtypes(include=[np.object]).columns.tolist())
526
+ text_data = ' '.join(df[text_col].dropna())
527
+ wordcloud = WordCloud(width=800, height=400).generate(text_data)
528
+ fig, ax = plt.subplots()
529
+ ax.imshow(wordcloud, interpolation='bilinear')
530
+ ax.axis('off')
531
+ st.pyplot(fig)
532
+ new_line()
533
+
534
+ if st.checkbox("Show Text Statistics", value=False):
535
+ text_col = st.selectbox("Select Text Column for Statistics", options=df.select_dtypes(include=[np.object]).columns.tolist())
536
+ text_stats = df[text_col].dropna().apply(lambda x: {'length': len(x), 'word_count': len(x.split())})
537
+ text_stats_df = pd.DataFrame(list(text_stats))
538
+ st.write(text_stats_df.describe())
539
+ new_line()
540
+
541
+
542
+
543
  # Interactive Data Tables
544
  interactive_table = st.checkbox("Show Interactive Data Table", value=False)
545
  new_line()