Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import seaborn as sns
|
|
7 |
import plotly.express as px
|
8 |
import time
|
9 |
from PIL import Image
|
|
|
10 |
|
11 |
# Config
|
12 |
page_icon = Image.open("./assets/logo.png")
|
@@ -176,14 +177,11 @@ if st.session_state.df is None:
|
|
176 |
|
177 |
# Select
|
178 |
elif uploading_way == "select":
|
179 |
-
selected = st.selectbox("Select Dataset", ["Select", "Titanic Dataset",
|
180 |
"Diabetes Dataset", "Digits Dataset",
|
181 |
"Olivetti Faces Dataset", "California Housing Dataset",
|
182 |
"Covid-19 Dataset"])
|
183 |
|
184 |
-
if selected == "Titanic Dataset":
|
185 |
-
df = load_data("./data/titanic.csv")
|
186 |
-
st.session_state.df = df
|
187 |
|
188 |
elif selected == "Iris Dataset":
|
189 |
from sklearn.datasets import load_iris
|
@@ -199,20 +197,6 @@ if st.session_state.df is None:
|
|
199 |
df['target'] = wine.target
|
200 |
st.session_state.df = df
|
201 |
|
202 |
-
elif selected == "Breast Cancer Dataset":
|
203 |
-
from sklearn.datasets import load_breast_cancer
|
204 |
-
cancer = load_breast_cancer()
|
205 |
-
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
|
206 |
-
df['target'] = cancer.target
|
207 |
-
st.session_state.df = df
|
208 |
-
|
209 |
-
|
210 |
-
elif selected == "Diabetes Dataset":
|
211 |
-
from sklearn.datasets import load_diabetes
|
212 |
-
diabetes = load_diabetes()
|
213 |
-
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
|
214 |
-
df['target'] = diabetes.target
|
215 |
-
st.session_state.df = df
|
216 |
|
217 |
elif selected == "Digits Dataset":
|
218 |
from sklearn.datasets import load_digits
|
@@ -310,7 +294,18 @@ if st.session_state.df is not None:
|
|
310 |
st.write(pd.DataFrame(df.columns, columns=['Columns']).T)
|
311 |
new_line()
|
312 |
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
# Describe Numerical
|
315 |
describe = st.checkbox("Show Description **(Numerical Features)**", value=False)
|
316 |
new_line()
|
@@ -318,6 +313,12 @@ if st.session_state.df is not None:
|
|
318 |
st.dataframe(df.describe(), use_container_width=True)
|
319 |
new_line()
|
320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
# Describe Categorical
|
322 |
describe_cat = st.checkbox("Show Description **(Categorical Features)**", value=False)
|
323 |
new_line()
|
@@ -469,7 +470,7 @@ df.drop(columns={col_to_delete}, inplace=True)
|
|
469 |
|
470 |
# Feature Importance (Only if a model has been trained)
|
471 |
if 'trained_model' in st.session_state and st.session_state.trained_model is not None:
|
472 |
-
feature_importance = st.checkbox("Show Feature Importance", value=False)
|
473 |
new_line()
|
474 |
if feature_importance:
|
475 |
model = st.session_state.trained_model
|
@@ -479,7 +480,66 @@ df.drop(columns={col_to_delete}, inplace=True)
|
|
479 |
ax.set_title('Feature Importance')
|
480 |
st.pyplot(fig)
|
481 |
new_line()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
# Interactive Data Tables
|
484 |
interactive_table = st.checkbox("Show Interactive Data Table", value=False)
|
485 |
new_line()
|
|
|
7 |
import plotly.express as px
|
8 |
import time
|
9 |
from PIL import Image
|
10 |
+
from wordcloud import WordCloud
|
11 |
|
12 |
# Config
|
13 |
page_icon = Image.open("./assets/logo.png")
|
|
|
177 |
|
178 |
# Select
|
179 |
elif uploading_way == "select":
|
180 |
+
selected = st.selectbox("Select Dataset", ["Select", "Titanic Dataset","Iris Dataset", "Wine Dataset",
|
181 |
"Diabetes Dataset", "Digits Dataset",
|
182 |
"Olivetti Faces Dataset", "California Housing Dataset",
|
183 |
"Covid-19 Dataset"])
|
184 |
|
|
|
|
|
|
|
185 |
|
186 |
elif selected == "Iris Dataset":
|
187 |
from sklearn.datasets import load_iris
|
|
|
197 |
df['target'] = wine.target
|
198 |
st.session_state.df = df
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
elif selected == "Digits Dataset":
|
202 |
from sklearn.datasets import load_digits
|
|
|
294 |
st.write(pd.DataFrame(df.columns, columns=['Columns']).T)
|
295 |
new_line()
|
296 |
|
297 |
+
if st.checkbox("Check Data Types", value=False):
|
298 |
+
st.write(df.dtypes)
|
299 |
+
new_line()
|
300 |
+
|
301 |
+
if st.checkbox("Show Skewness and Kurtosis", value=False):
|
302 |
+
skew_kurt = pd.DataFrame(data={
|
303 |
+
'Skewness': df.skew(),
|
304 |
+
'Kurtosis': df.kurtosis()
|
305 |
+
})
|
306 |
+
st.write(skew_kurt)
|
307 |
+
new_line()
|
308 |
+
|
309 |
# Describe Numerical
|
310 |
describe = st.checkbox("Show Description **(Numerical Features)**", value=False)
|
311 |
new_line()
|
|
|
313 |
st.dataframe(df.describe(), use_container_width=True)
|
314 |
new_line()
|
315 |
|
316 |
+
if st.checkbox("Unique Value Count", value=False):
|
317 |
+
unique_counts = pd.DataFrame(df.nunique()).rename(columns={0: 'Unique Count'})
|
318 |
+
st.write(unique_counts)
|
319 |
+
new_line()
|
320 |
+
|
321 |
+
|
322 |
# Describe Categorical
|
323 |
describe_cat = st.checkbox("Show Description **(Categorical Features)**", value=False)
|
324 |
new_line()
|
|
|
470 |
|
471 |
# Feature Importance (Only if a model has been trained)
|
472 |
if 'trained_model' in st.session_state and st.session_state.trained_model is not None:
|
473 |
+
feature_importance = st.checkbox("Show Feature Importance (Only click after training or it will throw error)", value=False)
|
474 |
new_line()
|
475 |
if feature_importance:
|
476 |
model = st.session_state.trained_model
|
|
|
480 |
ax.set_title('Feature Importance')
|
481 |
st.pyplot(fig)
|
482 |
new_line()
|
483 |
+
|
484 |
+
if st.checkbox("Identify Outliers", value=False):
|
485 |
+
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
|
486 |
+
col_for_outliers = st.selectbox("Select Column to Check Outliers", options=numeric_cols)
|
487 |
+
fig, ax = plt.subplots()
|
488 |
+
sns.boxplot(x=df[col_for_outliers], ax=ax)
|
489 |
+
ax.set_title(f'Outliers in {col_for_outliers}')
|
490 |
+
st.pyplot(fig)
|
491 |
+
new_line()
|
492 |
+
|
493 |
+
if st.checkbox("Show Pairwise Scatter Plots", value=False):
|
494 |
+
selected_cols = st.multiselect("Select Columns", options=df.columns, default=df.columns[:2])
|
495 |
+
sns.pairplot(df[selected_cols])
|
496 |
+
st.pyplot()
|
497 |
+
new_line()
|
498 |
+
|
499 |
+
if st.checkbox("Show Cross-tabulations", value=False):
|
500 |
+
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
|
501 |
+
x_col = st.selectbox("Select X-axis Column for Cross-tab", options=categorical_cols, index=0)
|
502 |
+
y_col = st.selectbox("Select Y-axis Column for Cross-tab", options=categorical_cols, index=1 if len(categorical_cols) > 1 else 0)
|
503 |
+
cross_tab = pd.crosstab(df[x_col], df[y_col])
|
504 |
+
st.write(cross_tab)
|
505 |
+
new_line()
|
506 |
+
|
507 |
+
if st.checkbox("Segmented Analysis", value=False):
|
508 |
+
segments = st.selectbox("Select Segment", options=df.columns)
|
509 |
+
segment_values = df[segments].dropna().unique()
|
510 |
+
selected_segment = st.selectbox("Choose Segment Value", options=segment_values)
|
511 |
+
segmented_data = df[df[segments] == selected_segment]
|
512 |
+
st.write(segmented_data)
|
513 |
+
new_line()
|
514 |
+
|
515 |
+
# Assuming 'date_column' is the name of your datetime column
|
516 |
+
if st.checkbox("Temporal Analysis", value=False):
|
517 |
+
fig, ax = plt.subplots()
|
518 |
+
df.set_index('date_column')['some_value'].plot(ax=ax)
|
519 |
+
ax.set_title('Trend Over Time')
|
520 |
+
st.pyplot(fig)
|
521 |
+
new_line()
|
522 |
+
|
523 |
|
524 |
+
if st.checkbox("Show Word Cloud", value=False):
|
525 |
+
text_col = st.selectbox("Select Text Column for Word Cloud", options=df.select_dtypes(include=[np.object]).columns.tolist())
|
526 |
+
text_data = ' '.join(df[text_col].dropna())
|
527 |
+
wordcloud = WordCloud(width=800, height=400).generate(text_data)
|
528 |
+
fig, ax = plt.subplots()
|
529 |
+
ax.imshow(wordcloud, interpolation='bilinear')
|
530 |
+
ax.axis('off')
|
531 |
+
st.pyplot(fig)
|
532 |
+
new_line()
|
533 |
+
|
534 |
+
if st.checkbox("Show Text Statistics", value=False):
|
535 |
+
text_col = st.selectbox("Select Text Column for Statistics", options=df.select_dtypes(include=[np.object]).columns.tolist())
|
536 |
+
text_stats = df[text_col].dropna().apply(lambda x: {'length': len(x), 'word_count': len(x.split())})
|
537 |
+
text_stats_df = pd.DataFrame(list(text_stats))
|
538 |
+
st.write(text_stats_df.describe())
|
539 |
+
new_line()
|
540 |
+
|
541 |
+
|
542 |
+
|
543 |
# Interactive Data Tables
|
544 |
interactive_table = st.checkbox("Show Interactive Data Table", value=False)
|
545 |
new_line()
|