Spaces:

ronakreddy18
/

Zerotoheroinmachinelearning

Sleeping

App Files Files Community

ronakreddy18 commited on Dec 13, 2024

Commit

df988c2

verified ·

1 Parent(s): e3d17a1

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files

Files changed (1) hide show

pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py +143 -143

pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED Viewed

@@ -78,17 +78,11 @@ def structured_data_page():
     st.markdown("""
     Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
     """)
-    st.markdown("### Examples: Excel files, CSV files, JSON files")
     if st.button(":green[📊 Excel]"):
         st.session_state.page = "excel"
-    if st.button(":green[📄 CSV]"):
-        st.session_state.page = "csv"
-    if st.button(":green[🗃️ JSON]"):
-        st.session_state.page = "json"
     if st.button("Back to Data Collection"):
         st.session_state.page = "data_collection"
@@ -137,95 +131,6 @@ print(excel_file.sheet_names)
     if st.button("Back to Structured Data"):
         st.session_state.page = "structured_data"
-# ----------------- CSV Data Page -----------------
-def csv_page():
-    st.title(":green[CSV Data Format]")
-    st.write("### What is CSV?")
-    st.write("""
-    CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.
-    """)
-    st.write("### Reading CSV Files")
-    st.code("""
-import pandas as pd
-# Read a CSV file
-df = pd.read_csv('data.csv')
-print(df)
-    """, language='python')
-    st.write("### Error Handling for CSV Files")
-    st.code("""
-import pandas as pd
-try:
-    df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
-    print("CSV File Loaded Successfully!")
-    print(df)
-except FileNotFoundError:
-    print("Error: File not found. Please check the file path.")
-except pd.errors.ParserError:
-    print("Error: The file is not a valid CSV format.")
-except UnicodeDecodeError:
-    print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
-    """, language='python')
-    st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/CSV_HANDLING_GUIDE.ipynb)')
-    if st.button("Back to Structured Data"):
-        st.session_state.page = "structured_data"
-# ----------------- JSON Data Page -----------------
-def json_page():
-    st.title(":green[JSON Data Format]")
-    st.write("### What is JSON?")
-    st.write("""
-    JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
-    """)
-    st.write("### Reading JSON Files")
-    st.code("""
-import json
-# Read a JSON file
-with open('data.json', 'r') as file:
-    data = json.load(file)
-    print(data)
-    """, language='python')
-    st.write("### Writing JSON Files")
-    st.code("""
-import json
-# Write data to JSON file
-data = {
-    "name": "Alice",
-    "age": 25,
-    "skills": ["Python", "Machine Learning"]
-}
-with open('data.json', 'w') as file:
-    json.dump(data, file, indent=4)
-    """, language='python')
-    st.markdown("### Tips for Handling JSON Files")
-    st.write("""
-    - JSON files can be nested, so you might need to navigate through dictionaries and lists.
-    - If the structure is complex, you can use libraries like `json_normalize()` in pandas to flatten the JSON into a more tabular format for easier analysis.
-    - JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
-    """)
-    st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/json_file__handling.ipynb)')
-    if st.button("Back to Structured Data"):
-        st.session_state.page = "structured_data"
 # ----------------- Unstructured Data Page -----------------
 def unstructured_data_page():
     st.title(":blue[Unstructured Data]")
@@ -333,78 +238,171 @@ plt.show()
 # ----------------- Semi-Structured Data Page -----------------
 def semi_structured_data_page():
-    st.title(":blue[Semi-Structured Data]")
     st.markdown("""
-    **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
-    - JSON (JavaScript Object Notation) files
-    - XML (Extensible Markup Language) files
-    - YAML (Yet Another Markup Language)
     """)
-    st.header("🔹 JSON Data")
-    st.markdown("""
-    JSON is a popular format for storing and exchanging data.
     """)
     st.code("""
-# Sample JSON data
-data = '''
-{
     "name": "Alice",
     "age": 25,
     "skills": ["Python", "Machine Learning"]
 }
-'''
-# Parse JSON
-parsed_data = json.loads(data)
-print(parsed_data['name'])  # Output: Alice
     """, language='python')
-    st.header("🔹 Reading JSON Files")
     st.code("""
-# Reading a JSON file
-with open('data.json', 'r') as file:
-    data = json.load(file)
-    print(data)
     """, language='python')
-    st.header("🔹 XML Data")
-    st.markdown("""
-    XML is a markup language that defines a set of rules for encoding documents.
     """)
     st.code("""
 import xml.etree.ElementTree as ET
-# Sample XML data
-xml_data = '''
-<person>
-    <name>Bob</name>
-    <age>30</age>
-    <city>New York</city>
-</person>
-'''
-# Parse XML
-root = ET.fromstring(xml_data)
-print(root.find('name').text)  # Output: Bob
     """, language='python')
-    st.markdown("### Challenges with Semi-Structured Data")
-    st.write("""
-    - **Complex Parsing**: Requires specialized parsers.
-    - **Nested Data**: Can be deeply nested, making it harder to process.
-    """)
-    st.markdown("### Solutions")
     st.write("""
-    - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
-    - **Validation**: Validate data formats to avoid parsing errors.
     """)
-    # Back to Data Collection
-    if st.button("Back to Data Collection"):
-        st.session_state.page = "data_collection"
 # Main control to call appropriate page
 if st.session_state.page == "home":
@@ -423,3 +421,5 @@ elif st.session_state.page == "unstructured_data":
     unstructured_data_page()
 elif st.session_state.page == "semi_structured_data":
     semi_structured_data_page()

     st.markdown("""
     Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
     """)
+    st.markdown("### Examples: Excel files")
     if st.button(":green[📊 Excel]"):
         st.session_state.page = "excel"
     if st.button("Back to Data Collection"):
         st.session_state.page = "data_collection"
     if st.button("Back to Structured Data"):
         st.session_state.page = "structured_data"
 # ----------------- Unstructured Data Page -----------------
 def unstructured_data_page():
     st.title(":blue[Unstructured Data]")
 # ----------------- Semi-Structured Data Page -----------------
 def semi_structured_data_page():
+    st.title(":orange[Semi-Structured Data]")
     st.markdown("""
+    Semi-structured data does not follow the rigid structure of relational databases but still has some organizational properties. Examples include:
+    - JSON files
+    - XML files
     """)
+    if st.button(":green[💾 JSON]"):
+        st.session_state.page = "json"
+    if st.button(":green[📄 CSV]"):
+        st.session_state.page = "csv"
+    if st.button(":green[📄 XML]"):
+        st.session_state.page = "xml"
+    if st.button("Back to Data Collection"):
+        st.session_state.page = "data_collection"
+# ----------------- JSON Data Page -----------------
+def json_page():
+    st.title(":green[JSON Data Format]")
+    st.write("### What is JSON?")
+    st.write("""
+    JSON (JavaScript Object Notation) is a lightweight data-interchange format that's easy for humans to read and write, and easy for machines to parse and generate. JSON is often used in APIs, configuration files, and data transfer applications.
     """)
+    st.write("### Reading JSON Files")
     st.code("""
+import json
+# Read a JSON file
+with open('data.json', 'r') as file:
+    data = json.load(file)
+    print(data)
+    """, language='python')
+    st.write("### Writing JSON Files")
+    st.code("""
+import json
+# Write data to JSON file
+data = {
     "name": "Alice",
     "age": 25,
     "skills": ["Python", "Machine Learning"]
 }
+with open('data.json', 'w') as file:
+    json.dump(data, file, indent=4)
+    """, language='python')
+    st.markdown("### Tips for Handling JSON Files")
+    st.write("""
+    - JSON files can be nested, so you might need to navigate through dictionaries and lists.
+    - If the structure is complex, you can use libraries like json_normalize() in pandas to flatten the JSON into a more tabular format for easier analysis.
+    - JSON supports both strings and numbers, and other types like arrays and booleans, making it versatile for various data types.
+    """)
+    st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/json_file__handling.ipynb)')
+    if st.button("Back to Semi-Structured Data"):
+        st.session_state.page = "semi_structured_data"
+# ----------------- CSV Data Page -----------------
+def csv_page():
+    st.title(":green[CSV Data Format]")
+    st.write("### What is CSV?")
+    st.write("""
+    CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.
+    """)
+    st.write("### Reading CSV Files")
+    st.code("""
+import pandas as pd
+# Read a CSV file
+df = pd.read_csv('data.csv')
+print(df)
     """, language='python')
+    st.write("### Error Handling for CSV Files")
     st.code("""
+import pandas as pd
+try:
+    df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
+    print("CSV File Loaded Successfully!")
+    print(df)
+except FileNotFoundError:
+    print("Error: File not found. Please check the file path.")
+except pd.errors.ParserError:
+    print("Error: The file is not a valid CSV format.")
+except UnicodeDecodeError:
+    print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
     """, language='python')
+    st.markdown('[Jupyter Notebook](https://huggingface.co/spaces/ronakreddy18/Zerotoheroinmachinelearning/blob/main/pages/CSV_HANDLING_GUIDE.ipynb)')
+    if st.button("Back to Semi-Structured Data"):
+        st.session_state.page = "semi_structured_data"
+# ----------------- XML Data Page -----------------
+def xml_page():
+    st.title(":green[XML Data Format]")
+    st.write("### What is XML?")
+    st.write("""
+    XML (Extensible Markup Language) is a markup language used for storing and exchanging structured data. It uses a hierarchical structure with tags to define elements.
     """)
+    st.write("### Reading XML Files")
     st.code("""
 import xml.etree.ElementTree as ET
+# Load and parse an XML file
+tree = ET.parse('data.xml')
+root = tree.getroot()
+# Access elements
+for child in root:
+    print(child.tag, child.text)
     """, language='python')
+    st.write("### Sample XML Data")
+    st.code("""
+<company>
+    <employee>
+        <name>John Doe</name>
+        <role>Developer</role>
+    </employee>
+    <employee>
+        <name>Jane Smith</name>
+        <role>Manager</role>
+    </employee>
+</company>
+    """, language='xml')
+    st.write("### Issues Encountered")
     st.write("""
+    - **File not found**: The specified XML file path is incorrect.
+    - **Malformed XML**: The XML structure has syntax errors.
+    - **XPath Errors**: Incorrect XPath expressions when querying data.
     """)
+    st.write("### Solutions to These Issues")
+    st.code("""
+# Handle missing file
+try:
+    tree = ET.parse('data.xml')
+except FileNotFoundError:
+    print("File not found. Check the file path.")
+# Validate XML structure
+try:
+    root = ET.fromstring(xml_data)
+except ET.ParseError:
+    print("Malformed XML.")
+    """, language='python')
+    st.markdown('[Jupyter Notebook](https://colab.research.google.com/drive/1Dv68m9hcRzXsLRlRit0uZc-8CB8U6VV3?usp=sharing)')
+    # Back to Semi-Structured Data
+    if st.button("Back to Semi-Structured Data"):
+        st.session_state.page = "semi_structured_data"
 # Main control to call appropriate page
 if st.session_state.page == "home":
     unstructured_data_page()
 elif st.session_state.page == "semi_structured_data":
     semi_structured_data_page()
+elif st.session_state.page == "xml":
+    xml_page()