ronakreddy18 commited on
Commit
0ad3418
Β·
verified Β·
1 Parent(s): 66b2688

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -1,6 +1,17 @@
1
  import streamlit as st
2
 
3
- # Page navigation state
 
 
 
 
 
 
 
 
 
 
 
4
  if 'page' not in st.session_state:
5
  st.session_state.page = "home" # Default page is "home"
6
 
@@ -9,11 +20,10 @@ def home_page():
9
  st.title(":green[Lifecycle of a Machine Learning Project]")
10
  st.markdown("Click on a stage to learn more about it.")
11
 
12
- # Button for Data Collection (Redirects to 'data_collection' page)
13
- if st.button(":orange[πŸ“Š Data Collection]"):
14
  st.session_state.page = "data_collection"
15
 
16
- # Buttons for other stages with brief explanations
17
  if st.button(":blue[🌟 Problem Statement]"):
18
  st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
19
 
@@ -47,19 +57,15 @@ def data_collection_page():
47
  st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
48
  st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
49
 
50
- # Button for Structured Data
51
  if st.button(":blue[🌟 Structured Data]"):
52
  st.session_state.page = "structured_data"
53
 
54
- # Button for Unstructured Data
55
  if st.button(":blue[πŸ“· Unstructured Data]"):
56
  st.session_state.page = "unstructured_data"
57
 
58
- # Button for Semi-Structured Data
59
  if st.button(":blue[πŸ—ƒοΈ Semi-Structured Data]"):
60
  st.session_state.page = "semi_structured_data"
61
 
62
- # Back to Home button
63
  if st.button("Back to Home"):
64
  st.session_state.page = "home"
65
 
@@ -71,11 +77,9 @@ def structured_data_page():
71
  """)
72
  st.markdown("### Examples: Excel files, CSV files")
73
 
74
- # Button for Excel Details
75
  if st.button(":green[πŸ“Š Excel]"):
76
  st.session_state.page = "excel"
77
 
78
- # Back to Data Collection
79
  if st.button("Back to Data Collection"):
80
  st.session_state.page = "data_collection"
81
 
@@ -83,11 +87,9 @@ def structured_data_page():
83
  def excel_page():
84
  st.title(":green[Excel Data Format]")
85
 
86
- # 4a. What it is
87
  st.write("### What is Excel?")
88
- st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: `.xls`, `.xlsx`.")
89
 
90
- # 4b. How to read Excel files
91
  st.write("### How to Read Excel Files")
92
  st.code("""
93
  import pandas as pd
@@ -97,15 +99,13 @@ df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
97
  print(df)
98
  """, language='python')
99
 
100
- # 4c. Issues encountered
101
  st.write("### Issues Encountered")
102
  st.write("""
103
  - **File not found**: Incorrect file path.
104
  - **Sheet name error**: Specified sheet doesn't exist.
105
- - **Missing libraries**: `openpyxl` or `xlrd` might be missing.
106
  """)
107
 
108
- # 4d. Solutions
109
  st.write("### Solutions to These Issues")
110
  st.code("""
111
  # Install required libraries
@@ -122,7 +122,7 @@ excel_file = pd.ExcelFile('data.xlsx')
122
  print(excel_file.sheet_names)
123
  """, language='python')
124
 
125
- # Download Button for Jupyter Notebook
126
  with open("excel_handling_guide.ipynb", "rb") as file:
127
  st.download_button(
128
  label="Download Jupyter Notebook",
@@ -131,31 +131,188 @@ print(excel_file.sheet_names)
131
  mime="application/octet-stream"
132
  )
133
 
134
- # Back to Structured Data
135
  if st.button("Back to Structured Data"):
136
  st.session_state.page = "structured_data"
137
 
138
  # ----------------- Unstructured Data Page -----------------
139
  def unstructured_data_page():
140
  st.title(":blue[Unstructured Data]")
 
 
 
 
 
 
 
 
 
 
 
 
141
  st.markdown("""
142
- Unstructured data does not have a predefined format. Examples include text documents, images, videos, and audio files.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  """)
144
 
145
  # Back to Data Collection
146
  if st.button("Back to Data Collection"):
147
- st.session_state.page = "data_collection"
148
 
149
  # ----------------- Semi-Structured Data Page -----------------
150
  def semi_structured_data_page():
151
  st.title(":blue[Semi-Structured Data]")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  st.markdown("""
153
- Semi-structured data has some organizational properties but doesn't fit into strict tables. Examples: JSON, XML files.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  """)
155
 
156
  # Back to Data Collection
157
  if st.button("Back to Data Collection"):
158
- st.session_state.page = "data_collection"
159
 
160
  # ----------------- Router -----------------
161
  def router():
@@ -175,5 +332,3 @@ def router():
175
  # Run the router function
176
  if __name__ == "__main__":
177
  router()
178
-
179
-
 
1
  import streamlit as st
2
 
3
+ # Inject custom CSS to style the buttons
4
+ st.markdown("""
5
+ <style>
6
+ .stButton>button {
7
+ background-color: #4CAF50;
8
+ color: white;
9
+ width: 100%;
10
+ }
11
+ </style>
12
+ """, unsafe_allow_html=True)
13
+
14
+ # Initialize page navigation state
15
  if 'page' not in st.session_state:
16
  st.session_state.page = "home" # Default page is "home"
17
 
 
20
  st.title(":green[Lifecycle of a Machine Learning Project]")
21
  st.markdown("Click on a stage to learn more about it.")
22
 
23
+ # Buttons for various stages of the ML project lifecycle
24
+ if st.button(":blue[πŸ“Š Data Collection]"):
25
  st.session_state.page = "data_collection"
26
 
 
27
  if st.button(":blue[🌟 Problem Statement]"):
28
  st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
29
 
 
57
  st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
58
  st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
59
 
 
60
  if st.button(":blue[🌟 Structured Data]"):
61
  st.session_state.page = "structured_data"
62
 
 
63
  if st.button(":blue[πŸ“· Unstructured Data]"):
64
  st.session_state.page = "unstructured_data"
65
 
 
66
  if st.button(":blue[πŸ—ƒοΈ Semi-Structured Data]"):
67
  st.session_state.page = "semi_structured_data"
68
 
 
69
  if st.button("Back to Home"):
70
  st.session_state.page = "home"
71
 
 
77
  """)
78
  st.markdown("### Examples: Excel files, CSV files")
79
 
 
80
  if st.button(":green[πŸ“Š Excel]"):
81
  st.session_state.page = "excel"
82
 
 
83
  if st.button("Back to Data Collection"):
84
  st.session_state.page = "data_collection"
85
 
 
87
  def excel_page():
88
  st.title(":green[Excel Data Format]")
89
 
 
90
  st.write("### What is Excel?")
91
+ st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
92
 
 
93
  st.write("### How to Read Excel Files")
94
  st.code("""
95
  import pandas as pd
 
99
  print(df)
100
  """, language='python')
101
 
 
102
  st.write("### Issues Encountered")
103
  st.write("""
104
  - **File not found**: Incorrect file path.
105
  - **Sheet name error**: Specified sheet doesn't exist.
106
+ - **Missing libraries**: openpyxl or xlrd might be missing.
107
  """)
108
 
 
109
  st.write("### Solutions to These Issues")
110
  st.code("""
111
  # Install required libraries
 
122
  print(excel_file.sheet_names)
123
  """, language='python')
124
 
125
+ # Download button for a sample Jupyter notebook
126
  with open("excel_handling_guide.ipynb", "rb") as file:
127
  st.download_button(
128
  label="Download Jupyter Notebook",
 
131
  mime="application/octet-stream"
132
  )
133
 
 
134
  if st.button("Back to Structured Data"):
135
  st.session_state.page = "structured_data"
136
 
137
  # ----------------- Unstructured Data Page -----------------
138
  def unstructured_data_page():
139
  st.title(":blue[Unstructured Data]")
140
+
141
+ st.markdown("""
142
+ **Unstructured data** does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
143
+ Examples include:
144
+ - Text documents (e.g., .txt, .docx)
145
+ - Images (e.g., .jpg, .png)
146
+ - Videos (e.g., .mp4, .avi)
147
+ - Audio files (e.g., .mp3, .wav)
148
+ - Social media posts
149
+ """)
150
+
151
+ st.header("πŸ“„ Handling Text Data")
152
  st.markdown("""
153
+ Text data can be analyzed using Natural Language Processing (NLP) techniques.
154
+ """)
155
+ st.code("""
156
+ # Reading text data
157
+ with open('sample.txt', 'r') as file:
158
+ text = file.read()
159
+ print(text)
160
+
161
+ # Basic text processing using NLTK
162
+ import nltk
163
+ from nltk.tokenize import word_tokenize
164
+
165
+ nltk.download('punkt')
166
+ tokens = word_tokenize(text)
167
+ print(tokens)
168
+ """, language='python')
169
+
170
+ st.header("πŸ–ΌοΈ Handling Image Data")
171
+ st.markdown("""
172
+ Image data can be processed using libraries like OpenCV and PIL (Pillow).
173
+ """)
174
+ st.code("""
175
+ from PIL import Image
176
+
177
+ # Open an image file
178
+ image = Image.open('sample_image.jpg')
179
+ image.show()
180
+
181
+ # Convert image to grayscale
182
+ gray_image = image.convert('L')
183
+ gray_image.show()
184
+ """, language='python')
185
+
186
+ st.header("πŸŽ₯ Handling Video Data")
187
+ st.markdown("""
188
+ Videos can be processed frame by frame using OpenCV.
189
+ """)
190
+ st.code("""
191
+ import cv2
192
+
193
+ # Capture video
194
+ video = cv2.VideoCapture('sample_video.mp4')
195
+
196
+ while video.isOpened():
197
+ ret, frame = video.read()
198
+ if not ret:
199
+ break
200
+ cv2.imshow('Frame', frame)
201
+ if cv2.waitKey(25) & 0xFF == ord('q'):
202
+ break
203
+
204
+ video.release()
205
+ cv2.destroyAllWindows()
206
+ """, language='python')
207
+
208
+ st.header("πŸ”Š Handling Audio Data")
209
+ st.markdown("""
210
+ Audio data can be handled using libraries like librosa.
211
+ """)
212
+ st.code("""
213
+ import librosa
214
+ import librosa.display
215
+ import matplotlib.pyplot as plt
216
+
217
+ # Load audio file
218
+ y, sr = librosa.load('sample_audio.mp3')
219
+ librosa.display.waveshow(y, sr=sr)
220
+ plt.title('Waveform')
221
+ plt.show()
222
+ """, language='python')
223
+
224
+ st.markdown("### Challenges with Unstructured Data")
225
+ st.write("""
226
+ - **Noise and Inconsistency**: Data is often incomplete or noisy.
227
+ - **Storage Requirements**: Large size and variability in data types.
228
+ - **Processing Time**: Analyzing unstructured data is computationally expensive.
229
+ """)
230
+
231
+ st.markdown("### Solutions")
232
+ st.write("""
233
+ - **Data Cleaning**: Preprocess data to remove noise.
234
+ - **Efficient Storage**: Use NoSQL databases (e.g., MongoDB) or cloud storage.
235
+ - **Parallel Processing**: Utilize frameworks like Apache Spark.
236
  """)
237
 
238
  # Back to Data Collection
239
  if st.button("Back to Data Collection"):
240
+ st.session_state.page = "data_collection"
241
 
242
  # ----------------- Semi-Structured Data Page -----------------
243
  def semi_structured_data_page():
244
  st.title(":blue[Semi-Structured Data]")
245
+
246
+ st.markdown("""
247
+ **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
248
+ - JSON (JavaScript Object Notation) files
249
+ - XML (Extensible Markup Language) files
250
+ - YAML (Yet Another Markup Language)
251
+ """)
252
+
253
+ st.header("πŸ”Ή JSON Data")
254
+ st.markdown("""
255
+ JSON is a popular format for storing and exchanging data.
256
+ """)
257
+ st.code("""
258
+ # Sample JSON data
259
+ data = '''
260
+ {
261
+ "name": "Alice",
262
+ "age": 25,
263
+ "skills": ["Python", "Machine Learning"]
264
+ }
265
+ '''
266
+
267
+ # Parse JSON
268
+ parsed_data = json.loads(data)
269
+ print(parsed_data['name']) # Output: Alice
270
+ """, language='python')
271
+
272
+ st.header("πŸ”Ή Reading JSON Files")
273
+ st.code("""
274
+ # Reading a JSON file
275
+ with open('data.json', 'r') as file:
276
+ data = json.load(file)
277
+ print(data)
278
+ """, language='python')
279
+
280
+ st.header("πŸ”Ή XML Data")
281
  st.markdown("""
282
+ XML is a markup language that defines a set of rules for encoding documents.
283
+ """)
284
+ st.code("""
285
+ import xml.etree.ElementTree as ET
286
+
287
+ # Sample XML data
288
+ xml_data = '''
289
+ <person>
290
+ <name>Bob</name>
291
+ <age>30</age>
292
+ <city>New York</city>
293
+ </person>
294
+ '''
295
+
296
+ # Parse XML
297
+ root = ET.fromstring(xml_data)
298
+ print(root.find('name').text) # Output: Bob
299
+ """, language='python')
300
+
301
+ st.markdown("### Challenges with Semi-Structured Data")
302
+ st.write("""
303
+ - **Complex Parsing**: Requires specialized parsers.
304
+ - **Nested Data**: Can be deeply nested, making it harder to process.
305
+ """)
306
+
307
+ st.markdown("### Solutions")
308
+ st.write("""
309
+ - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
310
+ - **Validation**: Validate data formats to avoid parsing errors.
311
  """)
312
 
313
  # Back to Data Collection
314
  if st.button("Back to Data Collection"):
315
+ st.session_state.page = "data_collection"
316
 
317
  # ----------------- Router -----------------
318
  def router():
 
332
  # Run the router function
333
  if __name__ == "__main__":
334
  router()