ronakreddy18 commited on
Commit
4914bcc
Β·
verified Β·
1 Parent(s): 49fc73f

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -72,6 +72,7 @@ def data_collection_page():
72
  if st.button("Back to Home"):
73
  st.session_state.page = "home"
74
 
 
75
  # ----------------- Structured Data Page -----------------
76
  def structured_data_page():
77
  st.title(":blue[Structured Data]")
@@ -80,13 +81,13 @@ def structured_data_page():
80
  """)
81
  st.markdown("### Examples: Excel files, CSV files, JSON files")
82
 
83
- if st.button(":green[πŸ“Š Excel]"):
84
  st.session_state.page = "excel"
85
 
86
- if st.button(":green[πŸ“„ CSV]"):
87
  st.session_state.page = "csv"
88
 
89
- if st.button(":green[πŸ”Ή JSON]"):
90
  st.session_state.page = "json"
91
 
92
  if st.button("Back to Data Collection"):
@@ -152,6 +153,22 @@ df = pd.read_csv('data.csv')
152
  print(df)
153
  """, language='python')
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_csv_guide_link")
156
 
157
  if st.button("Back to Structured Data"):
@@ -178,7 +195,188 @@ with open('data.json', 'r') as file:
178
  st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_json_guide_link")
179
 
180
  if st.button("Back to Structured Data"):
181
- st.session_state.page = "structured_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  # ----------------- Router -----------------
184
  def router():
@@ -192,8 +390,11 @@ def router():
192
  excel_page()
193
  elif st.session_state.page == "csv":
194
  csv_page()
195
- elif st.session_state.page == "json":
196
- json_page()
 
 
197
 
198
  # Run the router function
199
- router()
 
 
72
  if st.button("Back to Home"):
73
  st.session_state.page = "home"
74
 
75
+
76
  # ----------------- Structured Data Page -----------------
77
  def structured_data_page():
78
  st.title(":blue[Structured Data]")
 
81
  """)
82
  st.markdown("### Examples: Excel files, CSV files, JSON files")
83
 
84
+ if st.button(":green[\ud83d\udcca Excel]"):
85
  st.session_state.page = "excel"
86
 
87
+ if st.button(":green[\ud83d\udcc4 CSV]"):
88
  st.session_state.page = "csv"
89
 
90
+ if st.button(":green[\ud83d\udd39 JSON]"):
91
  st.session_state.page = "json"
92
 
93
  if st.button("Back to Data Collection"):
 
153
  print(df)
154
  """, language='python')
155
 
156
+ st.write("### Error Handling for CSV Files")
157
+ st.code("""
158
+ import pandas as pd
159
+
160
+ try:
161
+ df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
162
+ print("CSV File Loaded Successfully!")
163
+ print(df)
164
+ except FileNotFoundError:
165
+ print("Error: File not found. Please check the file path.")
166
+ except pd.errors.ParserError:
167
+ print("Error: The file is not a valid CSV format.")
168
+ except UnicodeDecodeError:
169
+ print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
170
+ """, language='python')
171
+
172
  st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_csv_guide_link")
173
 
174
  if st.button("Back to Structured Data"):
 
195
  st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_json_guide_link")
196
 
197
  if st.button("Back to Structured Data"):
198
+ st.session_state.page = "structured
199
+
200
+
201
+ # ----------------- Unstructured Data Page -----------------
202
+ def unstructured_data_page():
203
+ st.title(":blue[Unstructured Data]")
204
+
205
+ st.markdown("""
206
+ **Unstructured data** does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
207
+ Examples include:
208
+ - Text documents (e.g., .txt, .docx)
209
+ - Images (e.g., .jpg, .png)
210
+ - Videos (e.g., .mp4, .avi)
211
+ - Audio files (e.g., .mp3, .wav)
212
+ - Social media posts
213
+ """)
214
+
215
+ st.header("πŸ“„ Handling Text Data")
216
+ st.markdown("""
217
+ Text data can be analyzed using Natural Language Processing (NLP) techniques.
218
+ """)
219
+ st.code("""
220
+ # Reading text data
221
+ with open('sample.txt', 'r') as file:
222
+ text = file.read()
223
+ print(text)
224
+
225
+ # Basic text processing using NLTK
226
+ import nltk
227
+ from nltk.tokenize import word_tokenize
228
+
229
+ nltk.download('punkt')
230
+ tokens = word_tokenize(text)
231
+ print(tokens)
232
+ """, language='python')
233
+
234
+ st.header("πŸ–ΌοΈ Handling Image Data")
235
+ st.markdown("""
236
+ Image data can be processed using libraries like OpenCV and PIL (Pillow).
237
+ """)
238
+ st.code("""
239
+ from PIL import Image
240
+
241
+ # Open an image file
242
+ image = Image.open('sample_image.jpg')
243
+ image.show()
244
+
245
+ # Convert image to grayscale
246
+ gray_image = image.convert('L')
247
+ gray_image.show()
248
+ """, language='python')
249
+
250
+ st.header("πŸŽ₯ Handling Video Data")
251
+ st.markdown("""
252
+ Videos can be processed frame by frame using OpenCV.
253
+ """)
254
+ st.code("""
255
+ import cv2
256
+
257
+ # Capture video
258
+ video = cv2.VideoCapture('sample_video.mp4')
259
+
260
+ while video.isOpened():
261
+ ret, frame = video.read()
262
+ if not ret:
263
+ break
264
+ cv2.imshow('Frame', frame)
265
+ if cv2.waitKey(25) & 0xFF == ord('q'):
266
+ break
267
+
268
+ video.release()
269
+ cv2.destroyAllWindows()
270
+ """, language='python')
271
+
272
+ st.header("πŸ”Š Handling Audio Data")
273
+ st.markdown("""
274
+ Audio data can be handled using libraries like librosa.
275
+ """)
276
+ st.code("""
277
+ import librosa
278
+ import librosa.display
279
+ import matplotlib.pyplot as plt
280
+
281
+ # Load audio file
282
+ y, sr = librosa.load('sample_audio.mp3')
283
+ librosa.display.waveshow(y, sr=sr)
284
+ plt.title('Waveform')
285
+ plt.show()
286
+ """, language='python')
287
+
288
+ st.markdown("### Challenges with Unstructured Data")
289
+ st.write("""
290
+ - **Noise and Inconsistency**: Data is often incomplete or noisy.
291
+ - **Storage Requirements**: Large size and variability in data types.
292
+ - **Processing Time**: Analyzing unstructured data is computationally expensive.
293
+ """)
294
+
295
+ st.markdown("### Solutions")
296
+ st.write("""
297
+ - **Data Cleaning**: Preprocess data to remove noise.
298
+ - **Efficient Storage**: Use NoSQL databases (e.g., MongoDB) or cloud storage.
299
+ - **Parallel Processing**: Utilize frameworks like Apache Spark.
300
+ """)
301
+
302
+ # Back to Data Collection
303
+ if st.button("Back to Data Collection"):
304
+ st.session_state.page = "data_collection"
305
+
306
+ # ----------------- Semi-Structured Data Page -----------------
307
+ def semi_structured_data_page():
308
+ st.title(":blue[Semi-Structured Data]")
309
+
310
+ st.markdown("""
311
+ **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
312
+ - JSON (JavaScript Object Notation) files
313
+ - XML (Extensible Markup Language) files
314
+ - YAML (Yet Another Markup Language)
315
+ """)
316
+
317
+ st.header("πŸ”Ή JSON Data")
318
+ st.markdown("""
319
+ JSON is a popular format for storing and exchanging data.
320
+ """)
321
+ st.code("""
322
+ # Sample JSON data
323
+ data = '''
324
+ {
325
+ "name": "Alice",
326
+ "age": 25,
327
+ "skills": ["Python", "Machine Learning"]
328
+ }
329
+ '''
330
+
331
+ # Parse JSON
332
+ parsed_data = json.loads(data)
333
+ print(parsed_data['name']) # Output: Alice
334
+ """, language='python')
335
+
336
+ st.header("πŸ”Ή Reading JSON Files")
337
+ st.code("""
338
+ # Reading a JSON file
339
+ with open('data.json', 'r') as file:
340
+ data = json.load(file)
341
+ print(data)
342
+ """, language='python')
343
+
344
+ st.header("πŸ”Ή XML Data")
345
+ st.markdown("""
346
+ XML is a markup language that defines a set of rules for encoding documents.
347
+ """)
348
+ st.code("""
349
+ import xml.etree.ElementTree as ET
350
+
351
+ # Sample XML data
352
+ xml_data = '''
353
+ <person>
354
+ <name>Bob</name>
355
+ <age>30</age>
356
+ <city>New York</city>
357
+ </person>
358
+ '''
359
+
360
+ # Parse XML
361
+ root = ET.fromstring(xml_data)
362
+ print(root.find('name').text) # Output: Bob
363
+ """, language='python')
364
+
365
+ st.markdown("### Challenges with Semi-Structured Data")
366
+ st.write("""
367
+ - **Complex Parsing**: Requires specialized parsers.
368
+ - **Nested Data**: Can be deeply nested, making it harder to process.
369
+ """)
370
+
371
+ st.markdown("### Solutions")
372
+ st.write("""
373
+ - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
374
+ - **Validation**: Validate data formats to avoid parsing errors.
375
+ """)
376
+
377
+ # Back to Data Collection
378
+ if st.button("Back to Data Collection"):
379
+ st.session_state.page = "data_collection"
380
 
381
  # ----------------- Router -----------------
382
  def router():
 
390
  excel_page()
391
  elif st.session_state.page == "csv":
392
  csv_page()
393
+ elif st.session_state.page == "unstructured_data":
394
+ unstructured_data_page()
395
+ elif st.session_state.page == "semi_structured_data":
396
+ semi_structured_data_page()
397
 
398
  # Run the router function
399
+ if __name__ == "__main__":
400
+ router()