ronakreddy18 commited on
Commit
df988c2
Β·
verified Β·
1 Parent(s): e3d17a1

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
Files changed (1) hide show
  1. pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py +143 -143
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -78,17 +78,11 @@ def structured_data_page():
78
  st.markdown("""
79
  Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
80
  """)
81
- st.markdown("### Examples: Excel files, CSV files, JSON files")
82
 
83
  if st.button(":green[πŸ“Š Excel]"):
84
  st.session_state.page = "excel"
85
 
86
- if st.button(":green[πŸ“„ CSV]"):
87
- st.session_state.page = "csv"
88
-
89
- if st.button(":green[πŸ—ƒοΈ JSON]"):
90
- st.session_state.page = "json"
91
-
92
  if st.button("Back to Data Collection"):
93
  st.session_state.page = "data_collection"
94
 
@@ -137,95 +131,6 @@ print(excel_file.sheet_names)
137
  if st.button("Back to Structured Data"):
138
  st.session_state.page = "structured_data"
139
 
140
- # ----------------- CSV Data Page -----------------
141
- def csv_page():
142
- st.title(":green[CSV Data Format]")
143
-
144
- st.write("### What is CSV?")
145
- st.write("""
146
- CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.
147
- """)
148
-
149
- st.write("### Reading CSV Files")
150
- st.code("""
151
- import pandas as pd
152
-
153
- # Read a CSV file
154
- df = pd.read_csv('data.csv')
155
- print(df)
156
- """, language='python')
157
-
158
- st.write("### Error Handling for CSV Files")
159
- st.code("""
160
- import pandas as pd
161
-
162
- try:
163
- df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
164
- print("CSV File Loaded Successfully!")
165
- print(df)
166
- except FileNotFoundError:
167
- print("Error: File not found. Please check the file path.")
168
- except pd.errors.ParserError:
169
- print("Error: The file is not a valid CSV format.")
170
- except UnicodeDecodeError:
171
- print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
172
- """, language='python')
173
-
174
- st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/CSV_HANDLING_GUIDE.ipynb)')
175
-
176
- if st.button("Back to Structured Data"):
177
- st.session_state.page = "structured_data"
178
-
179
-
180
-
181
-
182
-
183
- # ----------------- JSON Data Page -----------------
184
-
185
- def json_page():
186
- st.title(":green[JSON Data Format]")
187
-
188
- st.write("### What is JSON?")
189
- st.write("""
190
- JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
191
- """)
192
-
193
- st.write("### Reading JSON Files")
194
- st.code("""
195
- import json
196
- # Read a JSON file
197
- with open('data.json', 'r') as file:
198
- data = json.load(file)
199
- print(data)
200
- """, language='python')
201
-
202
- st.write("### Writing JSON Files")
203
- st.code("""
204
- import json
205
- # Write data to JSON file
206
- data = {
207
- "name": "Alice",
208
- "age": 25,
209
- "skills": ["Python", "Machine Learning"]
210
- }
211
- with open('data.json', 'w') as file:
212
- json.dump(data, file, indent=4)
213
- """, language='python')
214
-
215
- st.markdown("### Tips for Handling JSON Files")
216
- st.write("""
217
- - JSON files can be nested, so you might need to navigate through dictionaries and lists.
218
- - If the structure is complex, you can use libraries like `json_normalize()` in pandas to flatten the JSON into a more tabular format for easier analysis.
219
- - JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
220
- """)
221
-
222
- st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/json_file__handling.ipynb)')
223
-
224
- if st.button("Back to Structured Data"):
225
- st.session_state.page = "structured_data"
226
-
227
-
228
-
229
  # ----------------- Unstructured Data Page -----------------
230
  def unstructured_data_page():
231
  st.title(":blue[Unstructured Data]")
@@ -333,78 +238,171 @@ plt.show()
333
 
334
  # ----------------- Semi-Structured Data Page -----------------
335
  def semi_structured_data_page():
336
- st.title(":blue[Semi-Structured Data]")
337
-
338
  st.markdown("""
339
- **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
340
- - JSON (JavaScript Object Notation) files
341
- - XML (Extensible Markup Language) files
342
- - YAML (Yet Another Markup Language)
343
  """)
344
 
345
- st.header("πŸ”Ή JSON Data")
346
- st.markdown("""
347
- JSON is a popular format for storing and exchanging data.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  """)
 
 
349
  st.code("""
350
- # Sample JSON data
351
- data = '''
352
- {
 
 
 
 
 
 
 
 
 
353
  "name": "Alice",
354
  "age": 25,
355
  "skills": ["Python", "Machine Learning"]
356
  }
357
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
- # Parse JSON
360
- parsed_data = json.loads(data)
361
- print(parsed_data['name']) # Output: Alice
362
  """, language='python')
363
 
364
- st.header("πŸ”Ή Reading JSON Files")
365
  st.code("""
366
- # Reading a JSON file
367
- with open('data.json', 'r') as file:
368
- data = json.load(file)
369
- print(data)
 
 
 
 
 
 
 
 
370
  """, language='python')
371
 
372
- st.header("πŸ”Ή XML Data")
373
- st.markdown("""
374
- XML is a markup language that defines a set of rules for encoding documents.
 
 
 
 
 
 
 
 
 
375
  """)
 
 
376
  st.code("""
377
  import xml.etree.ElementTree as ET
378
 
379
- # Sample XML data
380
- xml_data = '''
381
- <person>
382
- <name>Bob</name>
383
- <age>30</age>
384
- <city>New York</city>
385
- </person>
386
- '''
387
-
388
- # Parse XML
389
- root = ET.fromstring(xml_data)
390
- print(root.find('name').text) # Output: Bob
391
  """, language='python')
392
 
393
- st.markdown("### Challenges with Semi-Structured Data")
394
- st.write("""
395
- - **Complex Parsing**: Requires specialized parsers.
396
- - **Nested Data**: Can be deeply nested, making it harder to process.
397
- """)
 
 
 
 
 
 
 
 
398
 
399
- st.markdown("### Solutions")
400
  st.write("""
401
- - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
402
- - **Validation**: Validate data formats to avoid parsing errors.
 
403
  """)
404
 
405
- # Back to Data Collection
406
- if st.button("Back to Data Collection"):
407
- st.session_state.page = "data_collection"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
  # Main control to call appropriate page
410
  if st.session_state.page == "home":
@@ -423,3 +421,5 @@ elif st.session_state.page == "unstructured_data":
423
  unstructured_data_page()
424
  elif st.session_state.page == "semi_structured_data":
425
  semi_structured_data_page()
 
 
 
78
  st.markdown("""
79
  Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
80
  """)
81
+ st.markdown("### Examples: Excel files")
82
 
83
  if st.button(":green[πŸ“Š Excel]"):
84
  st.session_state.page = "excel"
85
 
 
 
 
 
 
 
86
  if st.button("Back to Data Collection"):
87
  st.session_state.page = "data_collection"
88
 
 
131
  if st.button("Back to Structured Data"):
132
  st.session_state.page = "structured_data"
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # ----------------- Unstructured Data Page -----------------
135
  def unstructured_data_page():
136
  st.title(":blue[Unstructured Data]")
 
238
 
239
  # ----------------- Semi-Structured Data Page -----------------
240
  def semi_structured_data_page():
241
+ st.title(":orange[Semi-Structured Data]")
 
242
  st.markdown("""
243
+ Semi-structured data does not follow the rigid structure of relational databases but still has some organizational properties. Examples include:
244
+ - JSON files
245
+ - XML files
 
246
  """)
247
 
248
+ if st.button(":green[πŸ’Ύ JSON]"):
249
+ st.session_state.page = "json"
250
+
251
+ if st.button(":green[πŸ“„ CSV]"):
252
+ st.session_state.page = "csv"
253
+
254
+ if st.button(":green[πŸ“„ XML]"):
255
+ st.session_state.page = "xml"
256
+
257
+ if st.button("Back to Data Collection"):
258
+ st.session_state.page = "data_collection"
259
+
260
+ # ----------------- JSON Data Page -----------------
261
+ def json_page():
262
+ st.title(":green[JSON Data Format]")
263
+
264
+ st.write("### What is JSON?")
265
+ st.write("""
266
+ JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
267
  """)
268
+
269
+ st.write("### Reading JSON Files")
270
  st.code("""
271
+ import json
272
+ # Read a JSON file
273
+ with open('data.json', 'r') as file:
274
+ data = json.load(file)
275
+ print(data)
276
+ """, language='python')
277
+
278
+ st.write("### Writing JSON Files")
279
+ st.code("""
280
+ import json
281
+ # Write data to JSON file
282
+ data = {
283
  "name": "Alice",
284
  "age": 25,
285
  "skills": ["Python", "Machine Learning"]
286
  }
287
+ with open('data.json', 'w') as file:
288
+ json.dump(data, file, indent=4)
289
+ """, language='python')
290
+
291
+ st.markdown("### Tips for Handling JSON Files")
292
+ st.write("""
293
+ - JSON files can be nested, so you might need to navigate through dictionaries and lists.
294
+ - If the structure is complex, you can use libraries like json_normalize() in pandas to flatten the JSON into a more tabular format for easier analysis.
295
+ - JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
296
+ """)
297
+
298
+ st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/json_file__handling.ipynb)')
299
+
300
+ if st.button("Back to Semi-Structured Data"):
301
+ st.session_state.page = "semi_structured_data"
302
+
303
+ # ----------------- CSV Data Page -----------------
304
+ def csv_page():
305
+ st.title(":green[CSV Data Format]")
306
+
307
+ st.write("### What is CSV?")
308
+ st.write("""
309
+ CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.
310
+ """)
311
+
312
+ st.write("### Reading CSV Files")
313
+ st.code("""
314
+ import pandas as pd
315
 
316
+ # Read a CSV file
317
+ df = pd.read_csv('data.csv')
318
+ print(df)
319
  """, language='python')
320
 
321
+ st.write("### Error Handling for CSV Files")
322
  st.code("""
323
+ import pandas as pd
324
+
325
+ try:
326
+ df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
327
+ print("CSV File Loaded Successfully!")
328
+ print(df)
329
+ except FileNotFoundError:
330
+ print("Error: File not found. Please check the file path.")
331
+ except pd.errors.ParserError:
332
+ print("Error: The file is not a valid CSV format.")
333
+ except UnicodeDecodeError:
334
+ print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
335
  """, language='python')
336
 
337
+ st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/CSV_HANDLING_GUIDE.ipynb)')
338
+
339
+ if st.button("Back to Semi-Structured Data"):
340
+ st.session_state.page = "semi_structured_data"
341
+
342
+ # ----------------- XML Data Page -----------------
343
+ def xml_page():
344
+ st.title(":green[XML Data Format]")
345
+
346
+ st.write("### What is XML?")
347
+ st.write("""
348
+ XML (Extensible Markup Language) is a markup language used for storing and exchanging structured data. It uses a hierarchical structure with tags to define elements.
349
  """)
350
+
351
+ st.write("### Reading XML Files")
352
  st.code("""
353
  import xml.etree.ElementTree as ET
354
 
355
+ # Load and parse an XML file
356
+ tree = ET.parse('data.xml')
357
+ root = tree.getroot()
358
+
359
+ # Access elements
360
+ for child in root:
361
+ print(child.tag, child.text)
 
 
 
 
 
362
  """, language='python')
363
 
364
+ st.write("### Sample XML Data")
365
+ st.code("""
366
+ <company>
367
+ <employee>
368
+ <name>John Doe</name>
369
+ <role>Developer</role>
370
+ </employee>
371
+ <employee>
372
+ <name>Jane Smith</name>
373
+ <role>Manager</role>
374
+ </employee>
375
+ </company>
376
+ """, language='xml')
377
 
378
+ st.write("### Issues Encountered")
379
  st.write("""
380
+ - **File not found**: The specified XML file path is incorrect.
381
+ - **Malformed XML**: The XML structure has syntax errors.
382
+ - **XPath Errors**: Incorrect XPath expressions when querying data.
383
  """)
384
 
385
+ st.write("### Solutions to These Issues")
386
+ st.code("""
387
+ # Handle missing file
388
+ try:
389
+ tree = ET.parse('data.xml')
390
+ except FileNotFoundError:
391
+ print("File not found. Check the file path.")
392
+
393
+ # Validate XML structure
394
+ try:
395
+ root = ET.fromstring(xml_data)
396
+ except ET.ParseError:
397
+ print("Malformed XML.")
398
+ """, language='python')
399
+
400
+ st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
401
+
402
+
403
+ # Back to Semi-Structured Data
404
+ if st.button("Back to Semi-Structured Data"):
405
+ st.session_state.page = "semi_structured_data"
406
 
407
  # Main control to call appropriate page
408
  if st.session_state.page == "home":
 
421
  unstructured_data_page()
422
  elif st.session_state.page == "semi_structured_data":
423
  semi_structured_data_page()
424
+ elif st.session_state.page == "xml":
425
+ xml_page()