Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py
Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py
CHANGED
@@ -2,257 +2,10 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import json
|
4 |
import xml.etree.ElementTree as ET
|
5 |
-
pip install matplotlib
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
-
import numpy as np
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
# Inject custom CSS to style the buttons
|
12 |
-
st.markdown("""
|
13 |
-
<style>
|
14 |
-
.stButton>button {
|
15 |
-
background-color: #4CAF50;
|
16 |
-
color: white;
|
17 |
-
width: 100%;
|
18 |
-
}
|
19 |
-
</style>
|
20 |
-
""", unsafe_allow_html=True)
|
21 |
-
|
22 |
-
# Initialize page navigation state
|
23 |
-
if 'page' not in st.session_state:
|
24 |
-
st.session_state.page = "home" # Default page is "home"
|
25 |
-
|
26 |
-
# ----------------- Home Page -----------------
|
27 |
-
def home_page():
|
28 |
-
st.title(":green[Lifecycle of a Machine Learning Project]")
|
29 |
-
st.markdown("Click on a stage to learn more about it.")
|
30 |
-
|
31 |
-
# Buttons for various stages of the ML project lifecycle
|
32 |
-
if st.button(":blue[π Data Collection]"):
|
33 |
-
st.session_state.page = "data_collection"
|
34 |
-
|
35 |
-
if st.button(":blue[π Problem Statement]"):
|
36 |
-
st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
|
37 |
-
|
38 |
-
if st.button(":blue[π οΈ Simple EDA]"):
|
39 |
-
st.markdown("### Simple EDA\nPerform exploratory data analysis to understand data distributions and relationships.")
|
40 |
-
|
41 |
-
if st.button(":blue[π§Ή Data Pre-Processing]"):
|
42 |
-
st.markdown("### Data Pre-Processing\nConvert raw data into cleaned data.")
|
43 |
-
|
44 |
-
if st.button(":blue[π Exploratory Data Analysis (EDA)]"):
|
45 |
-
st.markdown("### Exploratory Data Analysis (EDA)\nVisualize and analyze the data to understand its distributions and relationships.")
|
46 |
-
|
47 |
-
if st.button(":blue[ποΈ Feature Engineering]"):
|
48 |
-
st.markdown("### Feature Engineering\nCreate new features from existing data.")
|
49 |
-
|
50 |
-
if st.button(":blue[π€ Model Training]"):
|
51 |
-
st.markdown("### Model Training\nTrain the model using the training data and optimize its parameters.")
|
52 |
-
|
53 |
-
if st.button(":blue[π§ Model Testing]"):
|
54 |
-
st.markdown("### Model Testing\nAssess the model's performance using various metrics and cross-validation techniques.")
|
55 |
-
|
56 |
-
if st.button(":blue[π Model Deployment]"):
|
57 |
-
st.markdown("### Model Deployment\nIntegrate the trained model into a production environment and monitor its performance.")
|
58 |
-
|
59 |
-
if st.button(":blue[π Monitoring]"):
|
60 |
-
st.markdown("### Monitoring\nPeriodically retrain the model with new data and update features as needed.")
|
61 |
-
|
62 |
-
# ----------------- Data Collection Page -----------------
|
63 |
-
def data_collection_page():
|
64 |
-
st.title(":red[Data Collection]")
|
65 |
-
st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
|
66 |
-
st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
|
67 |
-
|
68 |
-
if st.button(":blue[π Structured Data]"):
|
69 |
-
st.session_state.page = "structured_data"
|
70 |
-
|
71 |
-
if st.button(":blue[π· Unstructured Data]"):
|
72 |
-
st.session_state.page = "unstructured_data"
|
73 |
-
|
74 |
-
if st.button(":blue[ποΈ Semi-Structured Data]"):
|
75 |
-
st.session_state.page = "semi_structured_data"
|
76 |
-
|
77 |
-
if st.button("Back to Home"):
|
78 |
-
st.session_state.page = "home"
|
79 |
-
|
80 |
-
# ----------------- Structured Data Page -----------------
|
81 |
-
def structured_data_page():
|
82 |
-
st.title(":blue[Structured Data]")
|
83 |
-
st.markdown("""
|
84 |
-
Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
|
85 |
-
""")
|
86 |
-
st.markdown("### Examples: Excel files")
|
87 |
-
|
88 |
-
if st.button(":green[π Excel]"):
|
89 |
-
st.session_state.page = "excel"
|
90 |
-
|
91 |
-
if st.button("Back to Data Collection"):
|
92 |
-
st.session_state.page = "data_collection"
|
93 |
-
|
94 |
-
# ----------------- Excel Data Page -----------------
|
95 |
-
def excel_page():
|
96 |
-
st.title(":green[Excel Data Format]")
|
97 |
-
|
98 |
-
st.write("### What is Excel?")
|
99 |
-
st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
|
100 |
-
|
101 |
-
st.write("### How to Read Excel Files")
|
102 |
-
st.code("""
|
103 |
-
import pandas as pd
|
104 |
-
|
105 |
-
# Read an Excel file
|
106 |
-
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
|
107 |
-
print(df)
|
108 |
-
""", language='python')
|
109 |
-
|
110 |
-
st.write("### Issues Encountered")
|
111 |
-
st.write("""
|
112 |
-
- **File not found**: Incorrect file path.
|
113 |
-
- **Sheet name error**: Specified sheet doesn't exist.
|
114 |
-
- **Missing libraries**: openpyxl or xlrd might be missing.
|
115 |
-
""")
|
116 |
-
|
117 |
-
st.write("### Solutions to These Issues")
|
118 |
-
st.code("""
|
119 |
-
# Install required libraries
|
120 |
-
# pip install openpyxl xlrd
|
121 |
-
|
122 |
-
# Handle missing file
|
123 |
-
try:
|
124 |
-
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
|
125 |
-
except FileNotFoundError:
|
126 |
-
print("File not found. Check the file path.")
|
127 |
-
|
128 |
-
# List available sheet names
|
129 |
-
excel_file = pd.ExcelFile('data.xlsx')
|
130 |
-
print(excel_file.sheet_names)
|
131 |
-
""", language='python')
|
132 |
-
|
133 |
-
st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
|
134 |
-
|
135 |
-
|
136 |
-
if st.button("Back to Structured Data"):
|
137 |
-
st.session_state.page = "structured_data"
|
138 |
-
|
139 |
-
# ----------------- Unstructured Data Page -----------------
|
140 |
-
|
141 |
-
from PIL import Image
|
142 |
-
|
143 |
-
|
144 |
-
def unstructured_data_page():
|
145 |
-
st.title(":blue[Unstructured Data]")
|
146 |
-
|
147 |
-
st.markdown("""
|
148 |
-
*Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
|
149 |
-
Examples include:
|
150 |
-
- Images (e.g., .jpg, .png)
|
151 |
-
- Videos (e.g., .mp4, .avi)
|
152 |
-
- Social media posts
|
153 |
-
""")
|
154 |
-
|
155 |
-
### Handling Image Data Section
|
156 |
-
st.header("πΌοΈ Handling Image Data")
|
157 |
-
st.markdown("""
|
158 |
-
Image data can be processed using libraries like OpenCV and PIL (Pillow). Images often need to be preprocessed for tasks like analysis, classification, or feature extraction. Common operations include:
|
159 |
-
- **Reading and displaying images**
|
160 |
-
- **Converting to grayscale**
|
161 |
-
- **Resizing and cropping**
|
162 |
-
- **Rotating and flipping**
|
163 |
-
- **Applying filters**
|
164 |
-
- **Edge detection and other transformations**
|
165 |
-
""")
|
166 |
-
|
167 |
-
st.code("""
|
168 |
from PIL import Image
|
169 |
import numpy as np
|
170 |
import matplotlib.pyplot as plt
|
171 |
|
172 |
-
# Open an image file
|
173 |
-
image = Image.open('sample_image.jpg')
|
174 |
-
image.show()
|
175 |
-
|
176 |
-
# Convert image to grayscale
|
177 |
-
gray_image = image.convert('L')
|
178 |
-
gray_image.show()
|
179 |
-
|
180 |
-
# Resize the image
|
181 |
-
resized_image = image.resize((200, 200))
|
182 |
-
resized_image.show()
|
183 |
-
|
184 |
-
# Rotate the image by 90 degrees
|
185 |
-
rotated_image = image.rotate(90)
|
186 |
-
rotated_image.show()
|
187 |
-
|
188 |
-
# Convert the image to a NumPy array and display its shape
|
189 |
-
image_array = np.array(image)
|
190 |
-
print(image_array.shape)
|
191 |
-
|
192 |
-
# Display the image array as a plot
|
193 |
-
plt.imshow(image)
|
194 |
-
plt.title("Original Image")
|
195 |
-
plt.axis('off')
|
196 |
-
plt.show()
|
197 |
-
""", language='python')
|
198 |
-
|
199 |
-
st.markdown("""
|
200 |
-
**Common Image Processing Techniques:**
|
201 |
-
- **Resizing**: Adjust the dimensions of an image for uniformity in models.
|
202 |
-
- **Cropping**: Extract a region of interest (ROI) from an image.
|
203 |
-
- **Grayscale Conversion**: Simplify image data by reducing it to a single channel.
|
204 |
-
- **Rotation/Flipping**: Perform augmentations to increase the dataset for model training.
|
205 |
-
- **Edge Detection**: Identify edges in images using filters like the Sobel or Canny filters.
|
206 |
-
""")
|
207 |
-
|
208 |
-
### Challenges and Solutions Section
|
209 |
-
st.markdown("### Challenges with Unstructured Data")
|
210 |
-
st.write("""
|
211 |
-
- *Noise and Inconsistency*: Data is often incomplete or noisy.
|
212 |
-
- *Storage Requirements*: Large size and variability in data types.
|
213 |
-
- *Processing Time*: Analyzing unstructured data is computationally expensive.
|
214 |
-
""")
|
215 |
-
|
216 |
-
st.markdown("### Solutions")
|
217 |
-
st.write("""
|
218 |
-
- *Data Cleaning*: Preprocess data to remove noise.
|
219 |
-
- *Efficient Storage*: Use NoSQL databases (e.g., MongoDB) or cloud storage.
|
220 |
-
- *Parallel Processing*: Utilize frameworks like Apache Spark.
|
221 |
-
""")
|
222 |
-
|
223 |
-
# Button to Navigate to Introduction to Image
|
224 |
-
if st.button("Introduction to Image"):
|
225 |
-
st.session_state.page = "introduction_to_image"
|
226 |
-
|
227 |
-
# Navigation Button
|
228 |
-
if st.button("Back to Data Collection"):
|
229 |
-
st.session_state.page = "data_collection"
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
# ----------------- Semi-Structured Data Page -----------------
|
235 |
-
def semi_structured_data_page():
|
236 |
-
st.title(":orange[Semi-Structured Data]")
|
237 |
-
st.markdown("""
|
238 |
-
Semi-structured data does not follow the rigid structure of relational databases but still has some organizational properties. Examples include:
|
239 |
-
- JSON files
|
240 |
-
- XML files
|
241 |
-
""")
|
242 |
-
|
243 |
-
if st.button(":green[πΎ JSON]"):
|
244 |
-
st.session_state.page = "json"
|
245 |
-
|
246 |
-
if st.button(":green[π CSV]"):
|
247 |
-
st.session_state.page = "csv"
|
248 |
-
|
249 |
-
if st.button(":green[π XML]"):
|
250 |
-
st.session_state.page = "xml"
|
251 |
-
import streamlit as st
|
252 |
-
import pandas as pd
|
253 |
-
import json
|
254 |
-
import xml.etree.ElementTree as ET
|
255 |
-
|
256 |
# Inject custom CSS to style the buttons
|
257 |
st.markdown("""
|
258 |
<style>
|
@@ -382,11 +135,6 @@ print(excel_file.sheet_names)
|
|
382 |
st.session_state.page = "structured_data"
|
383 |
|
384 |
# ----------------- Unstructured Data Page -----------------
|
385 |
-
|
386 |
-
from PIL import Image
|
387 |
-
import numpy as np
|
388 |
-
import matplotlib.pyplot as plt
|
389 |
-
|
390 |
def unstructured_data_page():
|
391 |
st.title(":blue[Unstructured Data]")
|
392 |
|
@@ -474,9 +222,6 @@ plt.show()
|
|
474 |
if st.button("Back to Data Collection"):
|
475 |
st.session_state.page = "data_collection"
|
476 |
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
# ----------------- Semi-Structured Data Page -----------------
|
481 |
def semi_structured_data_page():
|
482 |
st.title(":orange[Semi-Structured Data]")
|
@@ -504,7 +249,7 @@ def json_page():
|
|
504 |
|
505 |
st.write("### What is JSON?")
|
506 |
st.write("""
|
507 |
-
JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files,
|
508 |
""")
|
509 |
|
510 |
st.write("### Reading JSON Files")
|
|
|
2 |
import pandas as pd
|
3 |
import json
|
4 |
import xml.etree.ElementTree as ET
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from PIL import Image
|
6 |
import numpy as np
|
7 |
import matplotlib.pyplot as plt
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# Inject custom CSS to style the buttons
|
10 |
st.markdown("""
|
11 |
<style>
|
|
|
135 |
st.session_state.page = "structured_data"
|
136 |
|
137 |
# ----------------- Unstructured Data Page -----------------
|
|
|
|
|
|
|
|
|
|
|
138 |
def unstructured_data_page():
|
139 |
st.title(":blue[Unstructured Data]")
|
140 |
|
|
|
222 |
if st.button("Back to Data Collection"):
|
223 |
st.session_state.page = "data_collection"
|
224 |
|
|
|
|
|
|
|
225 |
# ----------------- Semi-Structured Data Page -----------------
|
226 |
def semi_structured_data_page():
|
227 |
st.title(":orange[Semi-Structured Data]")
|
|
|
249 |
|
250 |
st.write("### What is JSON?")
|
251 |
st.write("""
|
252 |
+
JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
|
253 |
""")
|
254 |
|
255 |
st.write("### Reading JSON Files")
|