Spaces:

ronakreddy18
/

Zerotoheroinmachinelearning

Sleeping

App Files Files Community

ronakreddy18 commited on Dec 12, 2024

Commit

4914bcc

verified ·

1 Parent(s): 49fc73f

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files

Files changed (1) hide show

pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py +208 -7

pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED Viewed

@@ -72,6 +72,7 @@ def data_collection_page():
     if st.button("Back to Home"):
         st.session_state.page = "home"
 # ----------------- Structured Data Page -----------------
 def structured_data_page():
     st.title(":blue[Structured Data]")
@@ -80,13 +81,13 @@ def structured_data_page():
     """)
     st.markdown("### Examples: Excel files, CSV files, JSON files")
-    if st.button(":green[📊 Excel]"):
         st.session_state.page = "excel"
-    if st.button(":green[📄 CSV]"):
         st.session_state.page = "csv"
-    if st.button(":green[🔹 JSON]"):
         st.session_state.page = "json"
     if st.button("Back to Data Collection"):
@@ -152,6 +153,22 @@ df = pd.read_csv('data.csv')
 print(df)
     """, language='python')
     st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_csv_guide_link")
     if st.button("Back to Structured Data"):
@@ -178,7 +195,188 @@ with open('data.json', 'r') as file:
     st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_json_guide_link")
     if st.button("Back to Structured Data"):
-        st.session_state.page = "structured_data"
 # ----------------- Router -----------------
 def router():
@@ -192,8 +390,11 @@ def router():
         excel_page()
     elif st.session_state.page == "csv":
         csv_page()
-    elif st.session_state.page == "json":
-        json_page()
 # Run the router function
-router()

     if st.button("Back to Home"):
         st.session_state.page = "home"
 # ----------------- Structured Data Page -----------------
 def structured_data_page():
     st.title(":blue[Structured Data]")
     """)
     st.markdown("### Examples: Excel files, CSV files, JSON files")
+    if st.button(":green[\ud83d\udcca Excel]"):
         st.session_state.page = "excel"
+    if st.button(":green[\ud83d\udcc4 CSV]"):
         st.session_state.page = "csv"
+    if st.button(":green[\ud83d\udd39 JSON]"):
         st.session_state.page = "json"
     if st.button("Back to Data Collection"):
 print(df)
     """, language='python')
+    st.write("### Error Handling for CSV Files")
+    st.code("""
+import pandas as pd
+try:
+    df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
+    print("CSV File Loaded Successfully!")
+    print(df)
+except FileNotFoundError:
+    print("Error: File not found. Please check the file path.")
+except pd.errors.ParserError:
+    print("Error: The file is not a valid CSV format.")
+except UnicodeDecodeError:
+    print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
+    """, language='python')
     st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_csv_guide_link")
     if st.button("Back to Structured Data"):
     st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_json_guide_link")
     if st.button("Back to Structured Data"):
+        st.session_state.page = "structured
+# ----------------- Unstructured Data Page -----------------
+def unstructured_data_page():
+    st.title(":blue[Unstructured Data]")
+    st.markdown("""
+    **Unstructured data** does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
+    Examples include:
+    - Text documents (e.g., .txt, .docx)
+    - Images (e.g., .jpg, .png)
+    - Videos (e.g., .mp4, .avi)
+    - Audio files (e.g., .mp3, .wav)
+    - Social media posts
+    """)
+    st.header("📄 Handling Text Data")
+    st.markdown("""
+    Text data can be analyzed using Natural Language Processing (NLP) techniques.
+    """)
+    st.code("""
+# Reading text data
+with open('sample.txt', 'r') as file:
+    text = file.read()
+    print(text)
+# Basic text processing using NLTK
+import nltk
+from nltk.tokenize import word_tokenize
+nltk.download('punkt')
+tokens = word_tokenize(text)
+print(tokens)
+    """, language='python')
+    st.header("🖼️ Handling Image Data")
+    st.markdown("""
+    Image data can be processed using libraries like OpenCV and PIL (Pillow).
+    """)
+    st.code("""
+from PIL import Image
+# Open an image file
+image = Image.open('sample_image.jpg')
+image.show()
+# Convert image to grayscale
+gray_image = image.convert('L')
+gray_image.show()
+    """, language='python')
+    st.header("🎥 Handling Video Data")
+    st.markdown("""
+    Videos can be processed frame by frame using OpenCV.
+    """)
+    st.code("""
+import cv2
+# Capture video
+video = cv2.VideoCapture('sample_video.mp4')
+while video.isOpened():
+    ret, frame = video.read()
+    if not ret:
+        break
+    cv2.imshow('Frame', frame)
+    if cv2.waitKey(25) & 0xFF == ord('q'):
+        break
+video.release()
+cv2.destroyAllWindows()
+    """, language='python')
+    st.header("🔊 Handling Audio Data")
+    st.markdown("""
+    Audio data can be handled using libraries like librosa.
+    """)
+    st.code("""
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+# Load audio file
+y, sr = librosa.load('sample_audio.mp3')
+librosa.display.waveshow(y, sr=sr)
+plt.title('Waveform')
+plt.show()
+    """, language='python')
+    st.markdown("### Challenges with Unstructured Data")
+    st.write("""
+    - **Noise and Inconsistency**: Data is often incomplete or noisy.
+    - **Storage Requirements**: Large size and variability in data types.
+    - **Processing Time**: Analyzing unstructured data is computationally expensive.
+    """)
+    st.markdown("### Solutions")
+    st.write("""
+    - **Data Cleaning**: Preprocess data to remove noise.
+    - **Efficient Storage**: Use NoSQL databases (e.g., MongoDB) or cloud storage.
+    - **Parallel Processing**: Utilize frameworks like Apache Spark.
+    """)
+    # Back to Data Collection
+    if st.button("Back to Data Collection"):
+        st.session_state.page = "data_collection"
+# ----------------- Semi-Structured Data Page -----------------
+def semi_structured_data_page():
+    st.title(":blue[Semi-Structured Data]")
+    st.markdown("""
+    **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
+    - JSON (JavaScript Object Notation) files
+    - XML (Extensible Markup Language) files
+    - YAML (Yet Another Markup Language)
+    """)
+    st.header("🔹 JSON Data")
+    st.markdown("""
+    JSON is a popular format for storing and exchanging data.
+    """)
+    st.code("""
+# Sample JSON data
+data = '''
+{
+    "name": "Alice",
+    "age": 25,
+    "skills": ["Python", "Machine Learning"]
+}
+'''
+# Parse JSON
+parsed_data = json.loads(data)
+print(parsed_data['name'])  # Output: Alice
+    """, language='python')
+    st.header("🔹 Reading JSON Files")
+    st.code("""
+# Reading a JSON file
+with open('data.json', 'r') as file:
+    data = json.load(file)
+    print(data)
+    """, language='python')
+    st.header("🔹 XML Data")
+    st.markdown("""
+    XML is a markup language that defines a set of rules for encoding documents.
+    """)
+    st.code("""
+import xml.etree.ElementTree as ET
+# Sample XML data
+xml_data = '''
+<person>
+    <name>Bob</name>
+    <age>30</age>
+    <city>New York</city>
+</person>
+'''
+# Parse XML
+root = ET.fromstring(xml_data)
+print(root.find('name').text)  # Output: Bob
+    """, language='python')
+    st.markdown("### Challenges with Semi-Structured Data")
+    st.write("""
+    - **Complex Parsing**: Requires specialized parsers.
+    - **Nested Data**: Can be deeply nested, making it harder to process.
+    """)
+    st.markdown("### Solutions")
+    st.write("""
+    - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
+    - **Validation**: Validate data formats to avoid parsing errors.
+    """)
+    # Back to Data Collection
+    if st.button("Back to Data Collection"):
+        st.session_state.page = "data_collection"
 # ----------------- Router -----------------
 def router():
         excel_page()
     elif st.session_state.page == "csv":
         csv_page()
+    elif st.session_state.page == "unstructured_data":
+        unstructured_data_page()
+    elif st.session_state.page == "semi_structured_data":
+        semi_structured_data_page()
 # Run the router function
+if __name__ == "__main__":
+    router()