Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py
Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py
CHANGED
@@ -130,12 +130,10 @@ print(excel_file.sheet_names)
|
|
130 |
|
131 |
st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
|
132 |
|
133 |
-
|
134 |
if st.button("Back to Structured Data"):
|
135 |
st.session_state.page = "structured_data"
|
136 |
|
137 |
# ----------------- Unstructured Data Page -----------------
|
138 |
-
|
139 |
def unstructured_data_page():
|
140 |
st.title(":blue[Unstructured Data]")
|
141 |
|
@@ -147,23 +145,34 @@ def unstructured_data_page():
|
|
147 |
- Social media posts
|
148 |
""")
|
149 |
|
150 |
-
|
151 |
-
|
152 |
# Button to Navigate to Introduction to Image
|
153 |
if st.button("Introduction to Image"):
|
154 |
st.session_state.page = "introduction_to_image"
|
155 |
|
156 |
def image():
|
157 |
-
st.header("🖼️
|
158 |
st.markdown("""
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
""")
|
168 |
|
169 |
st.code("""
|
@@ -171,57 +180,46 @@ from PIL import Image
|
|
171 |
import numpy as np
|
172 |
import matplotlib.pyplot as plt
|
173 |
|
174 |
-
Open an image file
|
175 |
image = Image.open('sample_image.jpg')
|
176 |
image.show()
|
177 |
|
178 |
-
Convert image to grayscale
|
179 |
gray_image = image.convert('L')
|
180 |
gray_image.show()
|
181 |
|
182 |
-
Resize the image
|
183 |
resized_image = image.resize((200, 200))
|
184 |
resized_image.show()
|
185 |
|
186 |
-
Rotate the image by 90 degrees
|
187 |
rotated_image = image.rotate(90)
|
188 |
rotated_image.show()
|
189 |
|
190 |
-
Convert the image to a NumPy array and display its shape
|
191 |
image_array = np.array(image)
|
192 |
print(image_array.shape)
|
193 |
-
|
|
|
194 |
plt.imshow(image)
|
195 |
plt.title("Original Image")
|
196 |
plt.axis('off')
|
197 |
plt.show()
|
198 |
""", language='python')
|
199 |
|
200 |
-
st.
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
Noise and Inconsistency: Data is often incomplete or noisy.
|
215 |
-
Storage Requirements: Large size and variability in data types.
|
216 |
-
Processing Time: Analyzing unstructured data is computationally expensive.
|
217 |
-
""")
|
218 |
-
|
219 |
-
st.markdown("### Solutions")
|
220 |
-
st.write("""
|
221 |
-
|
222 |
-
Data Cleaning: Preprocess data to remove noise.
|
223 |
-
Efficient Storage: Use NoSQL databases (e.g., MongoDB) or cloud storage.
|
224 |
-
Parallel Processing: Utilize frameworks like Apache Spark.
|
225 |
""")
|
226 |
|
227 |
# Navigation Button
|
@@ -255,7 +253,7 @@ def json_page():
|
|
255 |
|
256 |
st.write("### What is JSON?")
|
257 |
st.write("""
|
258 |
-
JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files,
|
259 |
""")
|
260 |
|
261 |
st.write("### Reading JSON Files")
|
@@ -274,8 +272,7 @@ import json
|
|
274 |
data = {
|
275 |
"name": "Alice",
|
276 |
"age": 25,
|
277 |
-
"skills
|
278 |
-
: ["Python", "Machine Learning"]
|
279 |
}
|
280 |
with open('data.json', 'w') as file:
|
281 |
json.dump(data, file, indent=4)
|
@@ -288,133 +285,31 @@ with open('data.json', 'w') as file:
|
|
288 |
- JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
|
289 |
""")
|
290 |
|
291 |
-
st.markdown('[Jupyter Notebook](https://huggingface.co/
|
292 |
-
|
293 |
-
if st.button("Back to Semi-Structured Data"):
|
294 |
-
st.session_state.page = "semi_structured_data"
|
295 |
-
|
296 |
-
# ----------------- CSV Data Page -----------------
|
297 |
-
def csv_page():
|
298 |
-
st.title(":green[CSV Data Format]")
|
299 |
-
|
300 |
-
st.write("### What is CSV?")
|
301 |
-
st.write("""
|
302 |
-
CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.
|
303 |
-
""")
|
304 |
-
|
305 |
-
st.write("### Reading CSV Files")
|
306 |
-
st.code("""
|
307 |
-
import pandas as pd
|
308 |
-
|
309 |
-
# Read a CSV file
|
310 |
-
df = pd.read_csv('data.csv')
|
311 |
-
print(df)
|
312 |
-
""", language='python')
|
313 |
-
|
314 |
-
st.write("### Error Handling for CSV Files")
|
315 |
-
st.code("""
|
316 |
-
import pandas as pd
|
317 |
-
|
318 |
-
try:
|
319 |
-
df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
|
320 |
-
print("CSV File Loaded Successfully!")
|
321 |
-
print(df)
|
322 |
-
except FileNotFoundError:
|
323 |
-
print("Error: File not found. Please check the file path.")
|
324 |
-
except pd.errors.ParserError:
|
325 |
-
print("Error: The file is not a valid CSV format.")
|
326 |
-
except UnicodeDecodeError:
|
327 |
-
print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
|
328 |
-
""", language='python')
|
329 |
-
|
330 |
-
st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/CSV_HANDLING_GUIDE.ipynb)')
|
331 |
-
|
332 |
-
if st.button("Back to Semi-Structured Data"):
|
333 |
-
st.session_state.page = "semi_structured_data"
|
334 |
-
|
335 |
-
# ----------------- XML Data Page -----------------
|
336 |
-
def xml_page():
|
337 |
-
st.title(":green[XML Data Format]")
|
338 |
-
|
339 |
-
st.write("### What is XML?")
|
340 |
-
st.write("""
|
341 |
-
XML (Extensible Markup Language) is a markup language used for storing and exchanging structured data. It uses a hierarchical structure with tags to define elements.
|
342 |
-
""")
|
343 |
-
|
344 |
-
st.write("### Reading XML Files")
|
345 |
-
st.code("""
|
346 |
-
import xml.etree.ElementTree as ET
|
347 |
-
|
348 |
-
# Load and parse an XML file
|
349 |
-
tree = ET.parse('data.xml')
|
350 |
-
root = tree.getroot()
|
351 |
-
|
352 |
-
# Access elements
|
353 |
-
for child in root:
|
354 |
-
print(child.tag, child.text)
|
355 |
-
""", language='python')
|
356 |
-
|
357 |
-
st.write("### Sample XML Data")
|
358 |
-
st.code("""
|
359 |
-
<company>
|
360 |
-
<employee>
|
361 |
-
<name>John Doe</name>
|
362 |
-
<role>Developer</role>
|
363 |
-
</employee>
|
364 |
-
<employee>
|
365 |
-
<name>Jane Smith</name>
|
366 |
-
<role>Manager</role>
|
367 |
-
</employee>
|
368 |
-
</company>
|
369 |
-
""", language='xml')
|
370 |
-
|
371 |
-
st.write("### Issues Encountered")
|
372 |
-
st.write("""
|
373 |
-
- **File not found**: The specified XML file path is incorrect.
|
374 |
-
- **Malformed XML**: The XML structure has syntax errors.
|
375 |
-
- **XPath Errors**: Incorrect XPath expressions when querying data.
|
376 |
-
""")
|
377 |
-
|
378 |
-
st.write("### Solutions to These Issues")
|
379 |
-
st.code("""
|
380 |
-
# Handle missing file
|
381 |
-
try:
|
382 |
-
tree = ET.parse('data.xml')
|
383 |
-
except FileNotFoundError:
|
384 |
-
print("File not found. Check the file path.")
|
385 |
-
|
386 |
-
# Validate XML structure
|
387 |
-
try:
|
388 |
-
root = ET.fromstring(xml_data)
|
389 |
-
except ET.ParseError:
|
390 |
-
print("Malformed XML.")
|
391 |
-
""", language='python')
|
392 |
|
393 |
-
st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/XML_FILE_HANDLING.ipynb)')
|
394 |
-
|
395 |
-
|
396 |
-
# Back to Semi-Structured Data
|
397 |
if st.button("Back to Semi-Structured Data"):
|
398 |
st.session_state.page = "semi_structured_data"
|
399 |
|
400 |
-
# Main
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
|
|
|
|
|
130 |
|
131 |
st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
|
132 |
|
|
|
133 |
if st.button("Back to Structured Data"):
|
134 |
st.session_state.page = "structured_data"
|
135 |
|
136 |
# ----------------- Unstructured Data Page -----------------
|
|
|
137 |
def unstructured_data_page():
|
138 |
st.title(":blue[Unstructured Data]")
|
139 |
|
|
|
145 |
- Social media posts
|
146 |
""")
|
147 |
|
|
|
|
|
148 |
# Button to Navigate to Introduction to Image
|
149 |
if st.button("Introduction to Image"):
|
150 |
st.session_state.page = "introduction_to_image"
|
151 |
|
152 |
def image():
|
153 |
+
st.header("🖼️ What is Image")
|
154 |
st.markdown("""
|
155 |
+
An image is a two-dimensional visual representation of objects, people, scenes, or concepts. It can be captured using devices like cameras, scanners, or created digitally. Images are composed of individual units called pixels, which contain information about brightness and color.
|
156 |
+
|
157 |
+
Types of Images:
|
158 |
+
- **Raster Images (Bitmap)**: Composed of a grid of pixels. Common formats include:
|
159 |
+
- JPEG
|
160 |
+
- PNG
|
161 |
+
- GIF
|
162 |
+
- **Vector Images**: Defined by mathematical equations and geometric shapes like lines and curves. Common format:
|
163 |
+
- SVG (Scalable Vector Graphics)
|
164 |
+
- **3D Images**: Represent objects or scenes in three dimensions, often used for rendering and modeling.
|
165 |
+
|
166 |
+
Image Representation:
|
167 |
+
- **Grayscale Image**: Each pixel has a single intensity value, typically ranging from 0 (black) to 255 (white), representing different shades of gray.
|
168 |
+
- **Color Image**: Usually represented in the RGB color space, where each pixel consists of three values indicating the intensity of Red, Green, and Blue.
|
169 |
+
|
170 |
+
Applications of Images:
|
171 |
+
- **Photography & Visual Media**: Capturing moments and storytelling.
|
172 |
+
- **Medical Imaging**: Diagnosing conditions using X-rays, MRIs, etc.
|
173 |
+
- **Machine Learning & AI**: Tasks like image classification, object detection, and facial recognition.
|
174 |
+
- **Remote Sensing**: Analyzing geographic and environmental data using satellite imagery.
|
175 |
+
- **Graphic Design & Art**: Creating creative visual content for marketing and design.
|
176 |
""")
|
177 |
|
178 |
st.code("""
|
|
|
180 |
import numpy as np
|
181 |
import matplotlib.pyplot as plt
|
182 |
|
183 |
+
# Open an image file
|
184 |
image = Image.open('sample_image.jpg')
|
185 |
image.show()
|
186 |
|
187 |
+
# Convert image to grayscale
|
188 |
gray_image = image.convert('L')
|
189 |
gray_image.show()
|
190 |
|
191 |
+
# Resize the image
|
192 |
resized_image = image.resize((200, 200))
|
193 |
resized_image.show()
|
194 |
|
195 |
+
# Rotate the image by 90 degrees
|
196 |
rotated_image = image.rotate(90)
|
197 |
rotated_image.show()
|
198 |
|
199 |
+
# Convert the image to a NumPy array and display its shape
|
200 |
image_array = np.array(image)
|
201 |
print(image_array.shape)
|
202 |
+
|
203 |
+
# Display the image array as a plot
|
204 |
plt.imshow(image)
|
205 |
plt.title("Original Image")
|
206 |
plt.axis('off')
|
207 |
plt.show()
|
208 |
""", language='python')
|
209 |
|
210 |
+
st.header("""
|
211 |
+
Color Spaces in Machine Learning
|
212 |
+
A color space is a mathematical model for representing colors. In machine learning, different color spaces can be used for preprocessing and analyzing image data, depending on the task.
|
213 |
+
|
214 |
+
Common Color Spaces:
|
215 |
+
- **RGB (Red, Green, Blue)**: The most common color space for digital images. Each pixel is represented by a combination of three values corresponding to the red, green, and blue channels.
|
216 |
+
- **Use Cases**: Image classification, general-purpose image analysis.
|
217 |
+
- **HSV (Hue, Saturation, Value)**: Separates color information (hue) from intensity (value), making it useful for tasks where distinguishing between color variations and intensity is important.
|
218 |
+
- **Use Cases**: Color-based object detection, image segmentation, color tracking.
|
219 |
+
- **CMYK (Cyan, Magenta, Yellow, Black)**: Primarily used for printing, not commonly used in machine learning, but useful for preparing images for printers.
|
220 |
+
- **Use Cases**: Printing applications.
|
221 |
+
- **LAB (Lightness, A, B)**: Designed to be perceptually uniform, meaning that the perceptual difference between colors is consistent across the space.
|
222 |
+
- **Use Cases**: Color correction, image processing tasks requiring color consistency.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
""")
|
224 |
|
225 |
# Navigation Button
|
|
|
253 |
|
254 |
st.write("### What is JSON?")
|
255 |
st.write("""
|
256 |
+
JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
|
257 |
""")
|
258 |
|
259 |
st.write("### Reading JSON Files")
|
|
|
272 |
data = {
|
273 |
"name": "Alice",
|
274 |
"age": 25,
|
275 |
+
"skills": ["Python", "Machine Learning"]
|
|
|
276 |
}
|
277 |
with open('data.json', 'w') as file:
|
278 |
json.dump(data, file, indent=4)
|
|
|
285 |
- JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
|
286 |
""")
|
287 |
|
288 |
+
st.markdown('[Jupyter Notebook](https://huggingface.co/transformers/notebooks.html)')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
|
|
|
|
|
|
|
|
290 |
if st.button("Back to Semi-Structured Data"):
|
291 |
st.session_state.page = "semi_structured_data"
|
292 |
|
293 |
+
# ----------------- Main Execution -----------------
|
294 |
+
def main():
|
295 |
+
page = st.session_state.page
|
296 |
+
|
297 |
+
if page == "home":
|
298 |
+
home_page()
|
299 |
+
elif page == "data_collection":
|
300 |
+
data_collection_page()
|
301 |
+
elif page == "structured_data":
|
302 |
+
structured_data_page()
|
303 |
+
elif page == "excel":
|
304 |
+
excel_page()
|
305 |
+
elif page == "unstructured_data":
|
306 |
+
unstructured_data_page()
|
307 |
+
elif page == "semi_structured_data":
|
308 |
+
semi_structured_data_page()
|
309 |
+
elif page == "json":
|
310 |
+
json_page()
|
311 |
+
elif page == "image":
|
312 |
+
image()
|
313 |
+
|
314 |
+
if __name__ == "__main__":
|
315 |
+
main()
|