ronakreddy18 commited on
Commit
b4f594e
·
verified ·
1 Parent(s): 887e319

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -130,12 +130,10 @@ print(excel_file.sheet_names)
130
 
131
  st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
132
 
133
-
134
  if st.button("Back to Structured Data"):
135
  st.session_state.page = "structured_data"
136
 
137
  # ----------------- Unstructured Data Page -----------------
138
-
139
  def unstructured_data_page():
140
  st.title(":blue[Unstructured Data]")
141
 
@@ -147,23 +145,34 @@ def unstructured_data_page():
147
  - Social media posts
148
  """)
149
 
150
-
151
-
152
  # Button to Navigate to Introduction to Image
153
  if st.button("Introduction to Image"):
154
  st.session_state.page = "introduction_to_image"
155
 
156
  def image():
157
- st.header("🖼️ Handling Image Data")
158
  st.markdown("""
159
- Image data can be processed using libraries like OpenCV and PIL (Pillow). Images often need to be preprocessed for tasks like analysis, classification, or feature extraction. Common operations include:
160
-
161
- Reading and displaying images
162
- Converting to grayscale
163
- Resizing and cropping
164
- Rotating and flipping
165
- Applying filters
166
- Edge detection and other transformations
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  """)
168
 
169
  st.code("""
@@ -171,57 +180,46 @@ from PIL import Image
171
  import numpy as np
172
  import matplotlib.pyplot as plt
173
 
174
- Open an image file
175
  image = Image.open('sample_image.jpg')
176
  image.show()
177
 
178
- Convert image to grayscale
179
  gray_image = image.convert('L')
180
  gray_image.show()
181
 
182
- Resize the image
183
  resized_image = image.resize((200, 200))
184
  resized_image.show()
185
 
186
- Rotate the image by 90 degrees
187
  rotated_image = image.rotate(90)
188
  rotated_image.show()
189
 
190
- Convert the image to a NumPy array and display its shape
191
  image_array = np.array(image)
192
  print(image_array.shape)
193
- Display the image array as a plot
 
194
  plt.imshow(image)
195
  plt.title("Original Image")
196
  plt.axis('off')
197
  plt.show()
198
  """, language='python')
199
 
200
- st.markdown("""
201
- Common Image Processing Techniques:
202
-
203
- Resizing: Adjust the dimensions of an image for uniformity in models.
204
- Cropping: Extract a region of interest (ROI) from an image.
205
- Grayscale Conversion: Simplify image data by reducing it to a single channel.
206
- Rotation/Flipping: Perform augmentations to increase the dataset for model training.
207
- Edge Detection: Identify edges in images using filters like the Sobel or Canny filters.
208
- """)
209
-
210
- ### Challenges and Solutions Section
211
- st.markdown("### Challenges with Unstructured Data")
212
- st.write("""
213
-
214
- Noise and Inconsistency: Data is often incomplete or noisy.
215
- Storage Requirements: Large size and variability in data types.
216
- Processing Time: Analyzing unstructured data is computationally expensive.
217
- """)
218
-
219
- st.markdown("### Solutions")
220
- st.write("""
221
-
222
- Data Cleaning: Preprocess data to remove noise.
223
- Efficient Storage: Use NoSQL databases (e.g., MongoDB) or cloud storage.
224
- Parallel Processing: Utilize frameworks like Apache Spark.
225
  """)
226
 
227
  # Navigation Button
@@ -255,7 +253,7 @@ def json_page():
255
 
256
  st.write("### What is JSON?")
257
  st.write("""
258
- JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
259
  """)
260
 
261
  st.write("### Reading JSON Files")
@@ -274,8 +272,7 @@ import json
274
  data = {
275
  "name": "Alice",
276
  "age": 25,
277
- "skills
278
- : ["Python", "Machine Learning"]
279
  }
280
  with open('data.json', 'w') as file:
281
  json.dump(data, file, indent=4)
@@ -288,133 +285,31 @@ with open('data.json', 'w') as file:
288
  - JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
289
  """)
290
 
291
- st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/json_file__handling.ipynb)')
292
-
293
- if st.button("Back to Semi-Structured Data"):
294
- st.session_state.page = "semi_structured_data"
295
-
296
- # ----------------- CSV Data Page -----------------
297
- def csv_page():
298
- st.title(":green[CSV Data Format]")
299
-
300
- st.write("### What is CSV?")
301
- st.write("""
302
- CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.
303
- """)
304
-
305
- st.write("### Reading CSV Files")
306
- st.code("""
307
- import pandas as pd
308
-
309
- # Read a CSV file
310
- df = pd.read_csv('data.csv')
311
- print(df)
312
- """, language='python')
313
-
314
- st.write("### Error Handling for CSV Files")
315
- st.code("""
316
- import pandas as pd
317
-
318
- try:
319
- df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
320
- print("CSV File Loaded Successfully!")
321
- print(df)
322
- except FileNotFoundError:
323
- print("Error: File not found. Please check the file path.")
324
- except pd.errors.ParserError:
325
- print("Error: The file is not a valid CSV format.")
326
- except UnicodeDecodeError:
327
- print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
328
- """, language='python')
329
-
330
- st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/CSV_HANDLING_GUIDE.ipynb)')
331
-
332
- if st.button("Back to Semi-Structured Data"):
333
- st.session_state.page = "semi_structured_data"
334
-
335
- # ----------------- XML Data Page -----------------
336
- def xml_page():
337
- st.title(":green[XML Data Format]")
338
-
339
- st.write("### What is XML?")
340
- st.write("""
341
- XML (Extensible Markup Language) is a markup language used for storing and exchanging structured data. It uses a hierarchical structure with tags to define elements.
342
- """)
343
-
344
- st.write("### Reading XML Files")
345
- st.code("""
346
- import xml.etree.ElementTree as ET
347
-
348
- # Load and parse an XML file
349
- tree = ET.parse('data.xml')
350
- root = tree.getroot()
351
-
352
- # Access elements
353
- for child in root:
354
- print(child.tag, child.text)
355
- """, language='python')
356
-
357
- st.write("### Sample XML Data")
358
- st.code("""
359
- <company>
360
- <employee>
361
- <name>John Doe</name>
362
- <role>Developer</role>
363
- </employee>
364
- <employee>
365
- <name>Jane Smith</name>
366
- <role>Manager</role>
367
- </employee>
368
- </company>
369
- """, language='xml')
370
-
371
- st.write("### Issues Encountered")
372
- st.write("""
373
- - **File not found**: The specified XML file path is incorrect.
374
- - **Malformed XML**: The XML structure has syntax errors.
375
- - **XPath Errors**: Incorrect XPath expressions when querying data.
376
- """)
377
-
378
- st.write("### Solutions to These Issues")
379
- st.code("""
380
- # Handle missing file
381
- try:
382
- tree = ET.parse('data.xml')
383
- except FileNotFoundError:
384
- print("File not found. Check the file path.")
385
-
386
- # Validate XML structure
387
- try:
388
- root = ET.fromstring(xml_data)
389
- except ET.ParseError:
390
- print("Malformed XML.")
391
- """, language='python')
392
 
393
- st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/XML_FILE_HANDLING.ipynb)')
394
-
395
-
396
- # Back to Semi-Structured Data
397
  if st.button("Back to Semi-Structured Data"):
398
  st.session_state.page = "semi_structured_data"
399
 
400
- # Main control to call appropriate page
401
- if st.session_state.page == "home":
402
- home_page()
403
- elif st.session_state.page == "data_collection":
404
- data_collection_page()
405
- elif st.session_state.page == "structured_data":
406
- structured_data_page()
407
- elif st.session_state.page == "excel":
408
- excel_page()
409
- elif st.session_state.page == "csv":
410
- csv_page()
411
- elif st.session_state.page == "json":
412
- json_page()
413
- elif st.session_state.page == "unstructured_data":
414
- unstructured_data_page()
415
- elif st.session_state.page == "semi_structured_data":
416
- semi_structured_data_page()
417
- elif st.session_state.page == "xml":
418
- xml_page()
419
- elif st.session_state.page == "introduction_to_image":
420
- image()
 
 
 
130
 
131
  st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
132
 
 
133
  if st.button("Back to Structured Data"):
134
  st.session_state.page = "structured_data"
135
 
136
  # ----------------- Unstructured Data Page -----------------
 
137
  def unstructured_data_page():
138
  st.title(":blue[Unstructured Data]")
139
 
 
145
  - Social media posts
146
  """)
147
 
 
 
148
  # Button to Navigate to Introduction to Image
149
  if st.button("Introduction to Image"):
150
  st.session_state.page = "introduction_to_image"
151
 
152
  def image():
153
+ st.header("🖼️ What is Image")
154
  st.markdown("""
155
+ An image is a two-dimensional visual representation of objects, people, scenes, or concepts. It can be captured using devices like cameras, scanners, or created digitally. Images are composed of individual units called pixels, which contain information about brightness and color.
156
+
157
+ Types of Images:
158
+ - **Raster Images (Bitmap)**: Composed of a grid of pixels. Common formats include:
159
+ - JPEG
160
+ - PNG
161
+ - GIF
162
+ - **Vector Images**: Defined by mathematical equations and geometric shapes like lines and curves. Common format:
163
+ - SVG (Scalable Vector Graphics)
164
+ - **3D Images**: Represent objects or scenes in three dimensions, often used for rendering and modeling.
165
+
166
+ Image Representation:
167
+ - **Grayscale Image**: Each pixel has a single intensity value, typically ranging from 0 (black) to 255 (white), representing different shades of gray.
168
+ - **Color Image**: Usually represented in the RGB color space, where each pixel consists of three values indicating the intensity of Red, Green, and Blue.
169
+
170
+ Applications of Images:
171
+ - **Photography & Visual Media**: Capturing moments and storytelling.
172
+ - **Medical Imaging**: Diagnosing conditions using X-rays, MRIs, etc.
173
+ - **Machine Learning & AI**: Tasks like image classification, object detection, and facial recognition.
174
+ - **Remote Sensing**: Analyzing geographic and environmental data using satellite imagery.
175
+ - **Graphic Design & Art**: Creating creative visual content for marketing and design.
176
  """)
177
 
178
  st.code("""
 
180
  import numpy as np
181
  import matplotlib.pyplot as plt
182
 
183
+ # Open an image file
184
  image = Image.open('sample_image.jpg')
185
  image.show()
186
 
187
+ # Convert image to grayscale
188
  gray_image = image.convert('L')
189
  gray_image.show()
190
 
191
+ # Resize the image
192
  resized_image = image.resize((200, 200))
193
  resized_image.show()
194
 
195
+ # Rotate the image by 90 degrees
196
  rotated_image = image.rotate(90)
197
  rotated_image.show()
198
 
199
+ # Convert the image to a NumPy array and display its shape
200
  image_array = np.array(image)
201
  print(image_array.shape)
202
+
203
+ # Display the image array as a plot
204
  plt.imshow(image)
205
  plt.title("Original Image")
206
  plt.axis('off')
207
  plt.show()
208
  """, language='python')
209
 
210
+ st.header("""
211
+ Color Spaces in Machine Learning
212
+ A color space is a mathematical model for representing colors. In machine learning, different color spaces can be used for preprocessing and analyzing image data, depending on the task.
213
+
214
+ Common Color Spaces:
215
+ - **RGB (Red, Green, Blue)**: The most common color space for digital images. Each pixel is represented by a combination of three values corresponding to the red, green, and blue channels.
216
+ - **Use Cases**: Image classification, general-purpose image analysis.
217
+ - **HSV (Hue, Saturation, Value)**: Separates color information (hue) from intensity (value), making it useful for tasks where distinguishing between color variations and intensity is important.
218
+ - **Use Cases**: Color-based object detection, image segmentation, color tracking.
219
+ - **CMYK (Cyan, Magenta, Yellow, Black)**: Primarily used for printing, not commonly used in machine learning, but useful for preparing images for printers.
220
+ - **Use Cases**: Printing applications.
221
+ - **LAB (Lightness, A, B)**: Designed to be perceptually uniform, meaning that the perceptual difference between colors is consistent across the space.
222
+ - **Use Cases**: Color correction, image processing tasks requiring color consistency.
 
 
 
 
 
 
 
 
 
 
 
 
223
  """)
224
 
225
  # Navigation Button
 
253
 
254
  st.write("### What is JSON?")
255
  st.write("""
256
+ JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
257
  """)
258
 
259
  st.write("### Reading JSON Files")
 
272
  data = {
273
  "name": "Alice",
274
  "age": 25,
275
+ "skills": ["Python", "Machine Learning"]
 
276
  }
277
  with open('data.json', 'w') as file:
278
  json.dump(data, file, indent=4)
 
285
  - JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
286
  """)
287
 
288
+ st.markdown('[Jupyter Notebook](https://huggingface.co/transformers/notebooks.html)')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
 
 
 
 
290
  if st.button("Back to Semi-Structured Data"):
291
  st.session_state.page = "semi_structured_data"
292
 
293
+ # ----------------- Main Execution -----------------
294
+ def main():
295
+ page = st.session_state.page
296
+
297
+ if page == "home":
298
+ home_page()
299
+ elif page == "data_collection":
300
+ data_collection_page()
301
+ elif page == "structured_data":
302
+ structured_data_page()
303
+ elif page == "excel":
304
+ excel_page()
305
+ elif page == "unstructured_data":
306
+ unstructured_data_page()
307
+ elif page == "semi_structured_data":
308
+ semi_structured_data_page()
309
+ elif page == "json":
310
+ json_page()
311
+ elif page == "image":
312
+ image()
313
+
314
+ if __name__ == "__main__":
315
+ main()