Spaces:

ronakreddy18
/

Zerotoheroinmachinelearning

Sleeping

App Files Files Community

ronakreddy18 commited on Dec 11, 2024

Commit

0ad3418

verified ·

1 Parent(s): 66b2688

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files

Files changed (1) hide show

pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py +179 -24

pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED Viewed

@@ -1,6 +1,17 @@
 import streamlit as st
-# Page navigation state
 if 'page' not in st.session_state:
     st.session_state.page = "home"  # Default page is "home"
@@ -9,11 +20,10 @@ def home_page():
     st.title(":green[Lifecycle of a Machine Learning Project]")
     st.markdown("Click on a stage to learn more about it.")
-    # Button for Data Collection (Redirects to 'data_collection' page)
-    if st.button(":orange[📊 Data Collection]"):
         st.session_state.page = "data_collection"
-    # Buttons for other stages with brief explanations
     if st.button(":blue[🌟 Problem Statement]"):
         st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
@@ -47,19 +57,15 @@ def data_collection_page():
     st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
     st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
-    # Button for Structured Data
     if st.button(":blue[🌟 Structured Data]"):
         st.session_state.page = "structured_data"
-    # Button for Unstructured Data
     if st.button(":blue[📷 Unstructured Data]"):
         st.session_state.page = "unstructured_data"
-    # Button for Semi-Structured Data
     if st.button(":blue[🗃️ Semi-Structured Data]"):
         st.session_state.page = "semi_structured_data"
-    # Back to Home button
     if st.button("Back to Home"):
         st.session_state.page = "home"
@@ -71,11 +77,9 @@ def structured_data_page():
     """)
     st.markdown("### Examples: Excel files, CSV files")
-    # Button for Excel Details
     if st.button(":green[📊 Excel]"):
         st.session_state.page = "excel"
-    # Back to Data Collection
     if st.button("Back to Data Collection"):
         st.session_state.page = "data_collection"
@@ -83,11 +87,9 @@ def structured_data_page():
 def excel_page():
     st.title(":green[Excel Data Format]")
-    # 4a. What it is
     st.write("### What is Excel?")
-    st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: `.xls`, `.xlsx`.")
-    # 4b. How to read Excel files
     st.write("### How to Read Excel Files")
     st.code("""
 import pandas as pd
@@ -97,15 +99,13 @@ df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
 print(df)
     """, language='python')
-    # 4c. Issues encountered
     st.write("### Issues Encountered")
     st.write("""
 - **File not found**: Incorrect file path.
 - **Sheet name error**: Specified sheet doesn't exist.
-- **Missing libraries**: `openpyxl` or `xlrd` might be missing.
 """)
-    # 4d. Solutions
     st.write("### Solutions to These Issues")
     st.code("""
 # Install required libraries
@@ -122,7 +122,7 @@ excel_file = pd.ExcelFile('data.xlsx')
 print(excel_file.sheet_names)
     """, language='python')
-    # Download Button for Jupyter Notebook
     with open("excel_handling_guide.ipynb", "rb") as file:
         st.download_button(
             label="Download Jupyter Notebook",
@@ -131,31 +131,188 @@ print(excel_file.sheet_names)
             mime="application/octet-stream"
         )
-    # Back to Structured Data
     if st.button("Back to Structured Data"):
         st.session_state.page = "structured_data"
 # ----------------- Unstructured Data Page -----------------
 def unstructured_data_page():
     st.title(":blue[Unstructured Data]")
     st.markdown("""
-    Unstructured data does not have a predefined format. Examples include text documents, images, videos, and audio files.
     """)
     # Back to Data Collection
     if st.button("Back to Data Collection"):
-        st.session_state.page = "data_collection"
 # ----------------- Semi-Structured Data Page -----------------
 def semi_structured_data_page():
     st.title(":blue[Semi-Structured Data]")
     st.markdown("""
-    Semi-structured data has some organizational properties but doesn't fit into strict tables. Examples: JSON, XML files.
     """)
     # Back to Data Collection
     if st.button("Back to Data Collection"):
-        st.session_state.page = "data_collection"
 # ----------------- Router -----------------
 def router():
@@ -175,5 +332,3 @@ def router():
 # Run the router function
 if __name__ == "__main__":
     router()

 import streamlit as st
+# Inject custom CSS to style the buttons
+st.markdown("""
+    <style>
+    .stButton>button {
+        background-color: #4CAF50;
+        color: white;
+        width: 100%;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# Initialize page navigation state
 if 'page' not in st.session_state:
     st.session_state.page = "home"  # Default page is "home"
     st.title(":green[Lifecycle of a Machine Learning Project]")
     st.markdown("Click on a stage to learn more about it.")
+    # Buttons for various stages of the ML project lifecycle
+    if st.button(":blue[📊 Data Collection]"):
         st.session_state.page = "data_collection"
     if st.button(":blue[🌟 Problem Statement]"):
         st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")
     st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
     st.markdown("Types of Data: **Structured**, **Unstructured**, **Semi-Structured**")
     if st.button(":blue[🌟 Structured Data]"):
         st.session_state.page = "structured_data"
     if st.button(":blue[📷 Unstructured Data]"):
         st.session_state.page = "unstructured_data"
     if st.button(":blue[🗃️ Semi-Structured Data]"):
         st.session_state.page = "semi_structured_data"
     if st.button("Back to Home"):
         st.session_state.page = "home"
     """)
     st.markdown("### Examples: Excel files, CSV files")
     if st.button(":green[📊 Excel]"):
         st.session_state.page = "excel"
     if st.button("Back to Data Collection"):
         st.session_state.page = "data_collection"
 def excel_page():
     st.title(":green[Excel Data Format]")
     st.write("### What is Excel?")
+    st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")
     st.write("### How to Read Excel Files")
     st.code("""
 import pandas as pd
 print(df)
     """, language='python')
     st.write("### Issues Encountered")
     st.write("""
 - **File not found**: Incorrect file path.
 - **Sheet name error**: Specified sheet doesn't exist.
+- **Missing libraries**: openpyxl or xlrd might be missing.
 """)
     st.write("### Solutions to These Issues")
     st.code("""
 # Install required libraries
 print(excel_file.sheet_names)
     """, language='python')
+    # Download button for a sample Jupyter notebook
     with open("excel_handling_guide.ipynb", "rb") as file:
         st.download_button(
             label="Download Jupyter Notebook",
             mime="application/octet-stream"
         )
     if st.button("Back to Structured Data"):
         st.session_state.page = "structured_data"
 # ----------------- Unstructured Data Page -----------------
 def unstructured_data_page():
     st.title(":blue[Unstructured Data]")
+    st.markdown("""
+    **Unstructured data** does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
+    Examples include:
+    - Text documents (e.g., .txt, .docx)
+    - Images (e.g., .jpg, .png)
+    - Videos (e.g., .mp4, .avi)
+    - Audio files (e.g., .mp3, .wav)
+    - Social media posts
+    """)
+    st.header("📄 Handling Text Data")
     st.markdown("""
+    Text data can be analyzed using Natural Language Processing (NLP) techniques.
+    """)
+    st.code("""
+# Reading text data
+with open('sample.txt', 'r') as file:
+    text = file.read()
+    print(text)
+# Basic text processing using NLTK
+import nltk
+from nltk.tokenize import word_tokenize
+nltk.download('punkt')
+tokens = word_tokenize(text)
+print(tokens)
+    """, language='python')
+    st.header("🖼️ Handling Image Data")
+    st.markdown("""
+    Image data can be processed using libraries like OpenCV and PIL (Pillow).
+    """)
+    st.code("""
+from PIL import Image
+# Open an image file
+image = Image.open('sample_image.jpg')
+image.show()
+# Convert image to grayscale
+gray_image = image.convert('L')
+gray_image.show()
+    """, language='python')
+    st.header("🎥 Handling Video Data")
+    st.markdown("""
+    Videos can be processed frame by frame using OpenCV.
+    """)
+    st.code("""
+import cv2
+# Capture video
+video = cv2.VideoCapture('sample_video.mp4')
+while video.isOpened():
+    ret, frame = video.read()
+    if not ret:
+        break
+    cv2.imshow('Frame', frame)
+    if cv2.waitKey(25) & 0xFF == ord('q'):
+        break
+video.release()
+cv2.destroyAllWindows()
+    """, language='python')
+    st.header("🔊 Handling Audio Data")
+    st.markdown("""
+    Audio data can be handled using libraries like librosa.
+    """)
+    st.code("""
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+# Load audio file
+y, sr = librosa.load('sample_audio.mp3')
+librosa.display.waveshow(y, sr=sr)
+plt.title('Waveform')
+plt.show()
+    """, language='python')
+    st.markdown("### Challenges with Unstructured Data")
+    st.write("""
+    - **Noise and Inconsistency**: Data is often incomplete or noisy.
+    - **Storage Requirements**: Large size and variability in data types.
+    - **Processing Time**: Analyzing unstructured data is computationally expensive.
+    """)
+    st.markdown("### Solutions")
+    st.write("""
+    - **Data Cleaning**: Preprocess data to remove noise.
+    - **Efficient Storage**: Use NoSQL databases (e.g., MongoDB) or cloud storage.
+    - **Parallel Processing**: Utilize frameworks like Apache Spark.
     """)
     # Back to Data Collection
     if st.button("Back to Data Collection"):
+        st.session_state.page = "data_collection"
 # ----------------- Semi-Structured Data Page -----------------
 def semi_structured_data_page():
     st.title(":blue[Semi-Structured Data]")
+    st.markdown("""
+    **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
+    - JSON (JavaScript Object Notation) files
+    - XML (Extensible Markup Language) files
+    - YAML (Yet Another Markup Language)
+    """)
+    st.header("🔹 JSON Data")
+    st.markdown("""
+    JSON is a popular format for storing and exchanging data.
+    """)
+    st.code("""
+# Sample JSON data
+data = '''
+{
+    "name": "Alice",
+    "age": 25,
+    "skills": ["Python", "Machine Learning"]
+}
+'''
+# Parse JSON
+parsed_data = json.loads(data)
+print(parsed_data['name'])  # Output: Alice
+    """, language='python')
+    st.header("🔹 Reading JSON Files")
+    st.code("""
+# Reading a JSON file
+with open('data.json', 'r') as file:
+    data = json.load(file)
+    print(data)
+    """, language='python')
+    st.header("🔹 XML Data")
     st.markdown("""
+    XML is a markup language that defines a set of rules for encoding documents.
+    """)
+    st.code("""
+import xml.etree.ElementTree as ET
+# Sample XML data
+xml_data = '''
+<person>
+    <name>Bob</name>
+    <age>30</age>
+    <city>New York</city>
+</person>
+'''
+# Parse XML
+root = ET.fromstring(xml_data)
+print(root.find('name').text)  # Output: Bob
+    """, language='python')
+    st.markdown("### Challenges with Semi-Structured Data")
+    st.write("""
+    - **Complex Parsing**: Requires specialized parsers.
+    - **Nested Data**: Can be deeply nested, making it harder to process.
+    """)
+    st.markdown("### Solutions")
+    st.write("""
+    - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
+    - **Validation**: Validate data formats to avoid parsing errors.
     """)
     # Back to Data Collection
     if st.button("Back to Data Collection"):
+        st.session_state.page = "data_collection"
 # ----------------- Router -----------------
 def router():
 # Run the router function
 if __name__ == "__main__":
     router()