ronakreddy18 commited on
Commit
7126712
Β·
verified Β·
1 Parent(s): 42b77b5

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -2,257 +2,10 @@ import streamlit as st
2
  import pandas as pd
3
  import json
4
  import xml.etree.ElementTree as ET
5
- pip install matplotlib
6
- import matplotlib.pyplot as plt
7
- import numpy as np
8
-
9
-
10
-
11
- # Inject custom CSS to style the buttons
12
- st.markdown("""
13
- <style>
14
- .stButton>button {
15
- background-color: #4CAF50;
16
- color: white;
17
- width: 100%;
18
- }
19
- </style>
20
- """, unsafe_allow_html=True)
21
-
22
- # Initialize page navigation state
23
- if 'page' not in st.session_state:
24
- st.session_state.page = "home" # Default page is "home"
25
-
26
- # ----------------- Home Page -----------------
27
- def home_page():
28
- st.title(":green[Lifecycle of a Machine Learning Project]")
29
- st.markdown("Click on a stage to learn more about it.")
30
-
31
- # Buttons for various stages of the ML project lifecycle
32
- if st.button(":blue[πŸ“Š Data Collection]"):
33
- st.session_state.page = "data_collection"
34
-
35
- if st.button(":blue[🌟 Problem Statement]"):
36
- st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
37
-
38
- if st.button(":blue[πŸ› οΈ Simple EDA]"):
39
- st.markdown("### Simple EDA\nPerform exploratory data analysis to understand data distributions and relationships.")
40
-
41
- if st.button(":blue[🧹 Data Pre-Processing]"):
42
- st.markdown("### Data Pre-Processing\nConvert raw data into cleaned data.")
43
-
44
- if st.button(":blue[πŸ“ˆ Exploratory Data Analysis (EDA)]"):
45
- st.markdown("### Exploratory Data Analysis (EDA)\nVisualize and analyze the data to understand its distributions and relationships.")
46
-
47
- if st.button(":blue[πŸ‹οΈ Feature Engineering]"):
48
- st.markdown("### Feature Engineering\nCreate new features from existing data.")
49
-
50
- if st.button(":blue[πŸ€– Model Training]"):
51
- st.markdown("### Model Training\nTrain the model using the training data and optimize its parameters.")
52
-
53
- if st.button(":blue[πŸ”§ Model Testing]"):
54
- st.markdown("### Model Testing\nAssess the model's performance using various metrics and cross-validation techniques.")
55
-
56
- if st.button(":blue[πŸš€ Model Deployment]"):
57
- st.markdown("### Model Deployment\nIntegrate the trained model into a production environment and monitor its performance.")
58
-
59
- if st.button(":blue[πŸ“ Monitoring]"):
60
- st.markdown("### Monitoring\nPeriodically retrain the model with new data and update features as needed.")
61
-
62
- # ----------------- Data Collection Page -----------------
63
- def data_collection_page():
64
- st.title(":red[Data Collection]")
65
- st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
66
- st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
67
-
68
- if st.button(":blue[🌟 Structured Data]"):
69
- st.session_state.page = "structured_data"
70
-
71
- if st.button(":blue[πŸ“· Unstructured Data]"):
72
- st.session_state.page = "unstructured_data"
73
-
74
- if st.button(":blue[πŸ—ƒοΈ Semi-Structured Data]"):
75
- st.session_state.page = "semi_structured_data"
76
-
77
- if st.button("Back to Home"):
78
- st.session_state.page = "home"
79
-
80
- # ----------------- Structured Data Page -----------------
81
- def structured_data_page():
82
- st.title(":blue[Structured Data]")
83
- st.markdown("""
84
- Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
85
- """)
86
- st.markdown("### Examples: Excel files")
87
-
88
- if st.button(":green[πŸ“Š Excel]"):
89
- st.session_state.page = "excel"
90
-
91
- if st.button("Back to Data Collection"):
92
- st.session_state.page = "data_collection"
93
-
94
- # ----------------- Excel Data Page -----------------
95
- def excel_page():
96
- st.title(":green[Excel Data Format]")
97
-
98
- st.write("### What is Excel?")
99
- st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
100
-
101
- st.write("### How to Read Excel Files")
102
- st.code("""
103
- import pandas as pd
104
-
105
- # Read an Excel file
106
- df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
107
- print(df)
108
- """, language='python')
109
-
110
- st.write("### Issues Encountered")
111
- st.write("""
112
- - **File not found**: Incorrect file path.
113
- - **Sheet name error**: Specified sheet doesn't exist.
114
- - **Missing libraries**: openpyxl or xlrd might be missing.
115
- """)
116
-
117
- st.write("### Solutions to These Issues")
118
- st.code("""
119
- # Install required libraries
120
- # pip install openpyxl xlrd
121
-
122
- # Handle missing file
123
- try:
124
- df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
125
- except FileNotFoundError:
126
- print("File not found. Check the file path.")
127
-
128
- # List available sheet names
129
- excel_file = pd.ExcelFile('data.xlsx')
130
- print(excel_file.sheet_names)
131
- """, language='python')
132
-
133
- st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
134
-
135
-
136
- if st.button("Back to Structured Data"):
137
- st.session_state.page = "structured_data"
138
-
139
- # ----------------- Unstructured Data Page -----------------
140
-
141
- from PIL import Image
142
-
143
-
144
- def unstructured_data_page():
145
- st.title(":blue[Unstructured Data]")
146
-
147
- st.markdown("""
148
- *Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
149
- Examples include:
150
- - Images (e.g., .jpg, .png)
151
- - Videos (e.g., .mp4, .avi)
152
- - Social media posts
153
- """)
154
-
155
- ### Handling Image Data Section
156
- st.header("πŸ–ΌοΈ Handling Image Data")
157
- st.markdown("""
158
- Image data can be processed using libraries like OpenCV and PIL (Pillow). Images often need to be preprocessed for tasks like analysis, classification, or feature extraction. Common operations include:
159
- - **Reading and displaying images**
160
- - **Converting to grayscale**
161
- - **Resizing and cropping**
162
- - **Rotating and flipping**
163
- - **Applying filters**
164
- - **Edge detection and other transformations**
165
- """)
166
-
167
- st.code("""
168
  from PIL import Image
169
  import numpy as np
170
  import matplotlib.pyplot as plt
171
 
172
- # Open an image file
173
- image = Image.open('sample_image.jpg')
174
- image.show()
175
-
176
- # Convert image to grayscale
177
- gray_image = image.convert('L')
178
- gray_image.show()
179
-
180
- # Resize the image
181
- resized_image = image.resize((200, 200))
182
- resized_image.show()
183
-
184
- # Rotate the image by 90 degrees
185
- rotated_image = image.rotate(90)
186
- rotated_image.show()
187
-
188
- # Convert the image to a NumPy array and display its shape
189
- image_array = np.array(image)
190
- print(image_array.shape)
191
-
192
- # Display the image array as a plot
193
- plt.imshow(image)
194
- plt.title("Original Image")
195
- plt.axis('off')
196
- plt.show()
197
- """, language='python')
198
-
199
- st.markdown("""
200
- **Common Image Processing Techniques:**
201
- - **Resizing**: Adjust the dimensions of an image for uniformity in models.
202
- - **Cropping**: Extract a region of interest (ROI) from an image.
203
- - **Grayscale Conversion**: Simplify image data by reducing it to a single channel.
204
- - **Rotation/Flipping**: Perform augmentations to increase the dataset for model training.
205
- - **Edge Detection**: Identify edges in images using filters like the Sobel or Canny filters.
206
- """)
207
-
208
- ### Challenges and Solutions Section
209
- st.markdown("### Challenges with Unstructured Data")
210
- st.write("""
211
- - *Noise and Inconsistency*: Data is often incomplete or noisy.
212
- - *Storage Requirements*: Large size and variability in data types.
213
- - *Processing Time*: Analyzing unstructured data is computationally expensive.
214
- """)
215
-
216
- st.markdown("### Solutions")
217
- st.write("""
218
- - *Data Cleaning*: Preprocess data to remove noise.
219
- - *Efficient Storage*: Use NoSQL databases (e.g., MongoDB) or cloud storage.
220
- - *Parallel Processing*: Utilize frameworks like Apache Spark.
221
- """)
222
-
223
- # Button to Navigate to Introduction to Image
224
- if st.button("Introduction to Image"):
225
- st.session_state.page = "introduction_to_image"
226
-
227
- # Navigation Button
228
- if st.button("Back to Data Collection"):
229
- st.session_state.page = "data_collection"
230
-
231
-
232
-
233
-
234
- # ----------------- Semi-Structured Data Page -----------------
235
- def semi_structured_data_page():
236
- st.title(":orange[Semi-Structured Data]")
237
- st.markdown("""
238
- Semi-structured data does not follow the rigid structure of relational databases but still has some organizational properties. Examples include:
239
- - JSON files
240
- - XML files
241
- """)
242
-
243
- if st.button(":green[πŸ’Ύ JSON]"):
244
- st.session_state.page = "json"
245
-
246
- if st.button(":green[πŸ“„ CSV]"):
247
- st.session_state.page = "csv"
248
-
249
- if st.button(":green[πŸ“„ XML]"):
250
- st.session_state.page = "xml"
251
- import streamlit as st
252
- import pandas as pd
253
- import json
254
- import xml.etree.ElementTree as ET
255
-
256
  # Inject custom CSS to style the buttons
257
  st.markdown("""
258
  <style>
@@ -382,11 +135,6 @@ print(excel_file.sheet_names)
382
  st.session_state.page = "structured_data"
383
 
384
  # ----------------- Unstructured Data Page -----------------
385
-
386
- from PIL import Image
387
- import numpy as np
388
- import matplotlib.pyplot as plt
389
-
390
  def unstructured_data_page():
391
  st.title(":blue[Unstructured Data]")
392
 
@@ -474,9 +222,6 @@ plt.show()
474
  if st.button("Back to Data Collection"):
475
  st.session_state.page = "data_collection"
476
 
477
-
478
-
479
-
480
  # ----------------- Semi-Structured Data Page -----------------
481
  def semi_structured_data_page():
482
  st.title(":orange[Semi-Structured Data]")
@@ -504,7 +249,7 @@ def json_page():
504
 
505
  st.write("### What is JSON?")
506
  st.write("""
507
- JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
508
  """)
509
 
510
  st.write("### Reading JSON Files")
 
2
  import pandas as pd
3
  import json
4
  import xml.etree.ElementTree as ET
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from PIL import Image
6
  import numpy as np
7
  import matplotlib.pyplot as plt
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Inject custom CSS to style the buttons
10
  st.markdown("""
11
  <style>
 
135
  st.session_state.page = "structured_data"
136
 
137
  # ----------------- Unstructured Data Page -----------------
 
 
 
 
 
138
  def unstructured_data_page():
139
  st.title(":blue[Unstructured Data]")
140
 
 
222
  if st.button("Back to Data Collection"):
223
  st.session_state.page = "data_collection"
224
 
 
 
 
225
  # ----------------- Semi-Structured Data Page -----------------
226
  def semi_structured_data_page():
227
  st.title(":orange[Semi-Structured Data]")
 
249
 
250
  st.write("### What is JSON?")
251
  st.write("""
252
+ JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
253
  """)
254
 
255
  st.write("### Reading JSON Files")