ronakreddy18 commited on
Commit
27ba035
Β·
verified Β·
1 Parent(s): d3d0026

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -227,6 +227,252 @@ plt.show()
227
 
228
 
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  # ----------------- Semi-Structured Data Page -----------------
231
  def semi_structured_data_page():
232
  st.title(":orange[Semi-Structured Data]")
@@ -273,7 +519,8 @@ import json
273
  data = {
274
  "name": "Alice",
275
  "age": 25,
276
- "skills": ["Python", "Machine Learning"]
 
277
  }
278
  with open('data.json', 'w') as file:
279
  json.dump(data, file, indent=4)
 
227
 
228
 
229
 
230
+ # ----------------- Semi-Structured Data Page -----------------
231
+ def semi_structured_data_page():
232
+ st.title(":orange[Semi-Structured Data]")
233
+ st.markdown("""
234
+ Semi-structured data does not follow the rigid structure of relational databases but still has some organizational properties. Examples include:
235
+ - JSON files
236
+ - XML files
237
+ """)
238
+
239
+ if st.button(":green[πŸ’Ύ JSON]"):
240
+ st.session_state.page = "json"
241
+
242
+ if st.button(":green[πŸ“„ CSV]"):
243
+ st.session_state.page = "csv"
244
+
245
+ if st.button(":green[πŸ“„ XML]"):
246
+ st.session_state.page = "xml"
247
+ import streamlit as st
248
+ import pandas as pd
249
+ import json
250
+ import xml.etree.ElementTree as ET
251
+
252
+ # Inject custom CSS to style the buttons
253
+ st.markdown("""
254
+ <style>
255
+ .stButton>button {
256
+ background-color: #4CAF50;
257
+ color: white;
258
+ width: 100%;
259
+ }
260
+ </style>
261
+ """, unsafe_allow_html=True)
262
+
263
+ # Initialize page navigation state
264
+ if 'page' not in st.session_state:
265
+ st.session_state.page = "home" # Default page is "home"
266
+
267
+ # ----------------- Home Page -----------------
268
+ def home_page():
269
+ st.title(":green[Lifecycle of a Machine Learning Project]")
270
+ st.markdown("Click on a stage to learn more about it.")
271
+
272
+ # Buttons for various stages of the ML project lifecycle
273
+ if st.button(":blue[πŸ“Š Data Collection]"):
274
+ st.session_state.page = "data_collection"
275
+
276
+ if st.button(":blue[🌟 Problem Statement]"):
277
+ st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
278
+
279
+ if st.button(":blue[πŸ› οΈ Simple EDA]"):
280
+ st.markdown("### Simple EDA\nPerform exploratory data analysis to understand data distributions and relationships.")
281
+
282
+ if st.button(":blue[🧹 Data Pre-Processing]"):
283
+ st.markdown("### Data Pre-Processing\nConvert raw data into cleaned data.")
284
+
285
+ if st.button(":blue[πŸ“ˆ Exploratory Data Analysis (EDA)]"):
286
+ st.markdown("### Exploratory Data Analysis (EDA)\nVisualize and analyze the data to understand its distributions and relationships.")
287
+
288
+ if st.button(":blue[πŸ‹οΈ Feature Engineering]"):
289
+ st.markdown("### Feature Engineering\nCreate new features from existing data.")
290
+
291
+ if st.button(":blue[πŸ€– Model Training]"):
292
+ st.markdown("### Model Training\nTrain the model using the training data and optimize its parameters.")
293
+
294
+ if st.button(":blue[πŸ”§ Model Testing]"):
295
+ st.markdown("### Model Testing\nAssess the model's performance using various metrics and cross-validation techniques.")
296
+
297
+ if st.button(":blue[πŸš€ Model Deployment]"):
298
+ st.markdown("### Model Deployment\nIntegrate the trained model into a production environment and monitor its performance.")
299
+
300
+ if st.button(":blue[πŸ“ Monitoring]"):
301
+ st.markdown("### Monitoring\nPeriodically retrain the model with new data and update features as needed.")
302
+
303
+ # ----------------- Data Collection Page -----------------
304
+ def data_collection_page():
305
+ st.title(":red[Data Collection]")
306
+ st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
307
+ st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
308
+
309
+ if st.button(":blue[🌟 Structured Data]"):
310
+ st.session_state.page = "structured_data"
311
+
312
+ if st.button(":blue[πŸ“· Unstructured Data]"):
313
+ st.session_state.page = "unstructured_data"
314
+
315
+ if st.button(":blue[πŸ—ƒοΈ Semi-Structured Data]"):
316
+ st.session_state.page = "semi_structured_data"
317
+
318
+ if st.button("Back to Home"):
319
+ st.session_state.page = "home"
320
+
321
+ # ----------------- Structured Data Page -----------------
322
+ def structured_data_page():
323
+ st.title(":blue[Structured Data]")
324
+ st.markdown("""
325
+ Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
326
+ """)
327
+ st.markdown("### Examples: Excel files")
328
+
329
+ if st.button(":green[πŸ“Š Excel]"):
330
+ st.session_state.page = "excel"
331
+
332
+ if st.button("Back to Data Collection"):
333
+ st.session_state.page = "data_collection"
334
+
335
+ # ----------------- Excel Data Page -----------------
336
+ def excel_page():
337
+ st.title(":green[Excel Data Format]")
338
+
339
+ st.write("### What is Excel?")
340
+ st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
341
+
342
+ st.write("### How to Read Excel Files")
343
+ st.code("""
344
+ import pandas as pd
345
+
346
+ # Read an Excel file
347
+ df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
348
+ print(df)
349
+ """, language='python')
350
+
351
+ st.write("### Issues Encountered")
352
+ st.write("""
353
+ - **File not found**: Incorrect file path.
354
+ - **Sheet name error**: Specified sheet doesn't exist.
355
+ - **Missing libraries**: openpyxl or xlrd might be missing.
356
+ """)
357
+
358
+ st.write("### Solutions to These Issues")
359
+ st.code("""
360
+ # Install required libraries
361
+ # pip install openpyxl xlrd
362
+
363
+ # Handle missing file
364
+ try:
365
+ df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
366
+ except FileNotFoundError:
367
+ print("File not found. Check the file path.")
368
+
369
+ # List available sheet names
370
+ excel_file = pd.ExcelFile('data.xlsx')
371
+ print(excel_file.sheet_names)
372
+ """, language='python')
373
+
374
+ st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
375
+
376
+
377
+ if st.button("Back to Structured Data"):
378
+ st.session_state.page = "structured_data"
379
+
380
+ # ----------------- Unstructured Data Page -----------------
381
+
382
+ from PIL import Image
383
+ import numpy as np
384
+ import matplotlib.pyplot as plt
385
+
386
+ def unstructured_data_page():
387
+ st.title(":blue[Unstructured Data]")
388
+
389
+ st.markdown("""
390
+ *Unstructured data* does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
391
+ Examples include:
392
+ - Images (e.g., .jpg, .png)
393
+ - Videos (e.g., .mp4, .avi)
394
+ - Social media posts
395
+ """)
396
+
397
+ ### Handling Image Data Section
398
+ st.header("πŸ–ΌοΈ Handling Image Data")
399
+ st.markdown("""
400
+ Image data can be processed using libraries like OpenCV and PIL (Pillow). Images often need to be preprocessed for tasks like analysis, classification, or feature extraction. Common operations include:
401
+ - **Reading and displaying images**
402
+ - **Converting to grayscale**
403
+ - **Resizing and cropping**
404
+ - **Rotating and flipping**
405
+ - **Applying filters**
406
+ - **Edge detection and other transformations**
407
+ """)
408
+
409
+ st.code("""
410
+ from PIL import Image
411
+ import numpy as np
412
+ import matplotlib.pyplot as plt
413
+
414
+ # Open an image file
415
+ image = Image.open('sample_image.jpg')
416
+ image.show()
417
+
418
+ # Convert image to grayscale
419
+ gray_image = image.convert('L')
420
+ gray_image.show()
421
+
422
+ # Resize the image
423
+ resized_image = image.resize((200, 200))
424
+ resized_image.show()
425
+
426
+ # Rotate the image by 90 degrees
427
+ rotated_image = image.rotate(90)
428
+ rotated_image.show()
429
+
430
+ # Convert the image to a NumPy array and display its shape
431
+ image_array = np.array(image)
432
+ print(image_array.shape)
433
+
434
+ # Display the image array as a plot
435
+ plt.imshow(image)
436
+ plt.title("Original Image")
437
+ plt.axis('off')
438
+ plt.show()
439
+ """, language='python')
440
+
441
+ st.markdown("""
442
+ **Common Image Processing Techniques:**
443
+ - **Resizing**: Adjust the dimensions of an image for uniformity in models.
444
+ - **Cropping**: Extract a region of interest (ROI) from an image.
445
+ - **Grayscale Conversion**: Simplify image data by reducing it to a single channel.
446
+ - **Rotation/Flipping**: Perform augmentations to increase the dataset for model training.
447
+ - **Edge Detection**: Identify edges in images using filters like the Sobel or Canny filters.
448
+ """)
449
+
450
+ ### Challenges and Solutions Section
451
+ st.markdown("### Challenges with Unstructured Data")
452
+ st.write("""
453
+ - *Noise and Inconsistency*: Data is often incomplete or noisy.
454
+ - *Storage Requirements*: Large size and variability in data types.
455
+ - *Processing Time*: Analyzing unstructured data is computationally expensive.
456
+ """)
457
+
458
+ st.markdown("### Solutions")
459
+ st.write("""
460
+ - *Data Cleaning*: Preprocess data to remove noise.
461
+ - *Efficient Storage*: Use NoSQL databases (e.g., MongoDB) or cloud storage.
462
+ - *Parallel Processing*: Utilize frameworks like Apache Spark.
463
+ """)
464
+
465
+ # Button to Navigate to Introduction to Image
466
+ if st.button("Introduction to Image"):
467
+ st.session_state.page = "introduction_to_image"
468
+
469
+ # Navigation Button
470
+ if st.button("Back to Data Collection"):
471
+ st.session_state.page = "data_collection"
472
+
473
+
474
+
475
+
476
  # ----------------- Semi-Structured Data Page -----------------
477
  def semi_structured_data_page():
478
  st.title(":orange[Semi-Structured Data]")
 
519
  data = {
520
  "name": "Alice",
521
  "age": 25,
522
+ "skills
523
+ : ["Python", "Machine Learning"]
524
  }
525
  with open('data.json', 'w') as file:
526
  json.dump(data, file, indent=4)