Spaces:

ronakreddy18
/

Zerotoheroinmachinelearning

Sleeping

App Files Files Community

Zerotoheroinmachinelearning / pages /LIFE_CYCLE_OF_MACHINE_LEARNING.py

ronakreddy18

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

4914bcc verified 11 months ago

raw

history blame

12.4 kB

	import streamlit as st
	import pandas as pd
	import json
	import xml.etree.ElementTree as ET

	# Inject custom CSS to style the buttons
	st.markdown("""
	<style>
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	width: 100%;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize page navigation state
	if 'page' not in st.session_state:
	st.session_state.page = "home" # Default page is "home"

	# ----------------- Home Page -----------------
	def home_page():
	st.title(":green[Lifecycle of a Machine Learning Project]")
	st.markdown("Click on a stage to learn more about it.")

	# Buttons for various stages of the ML project lifecycle
	if st.button(":blue[📊 Data Collection]"):
	st.session_state.page = "data_collection"

	if st.button(":blue[🌟 Problem Statement]"):
	st.markdown("### Problem Statement\nIdentify the problem you want to solve and set clear objectives and success criteria.")

	if st.button(":blue[🛠️ Simple EDA]"):
	st.markdown("### Simple EDA\nPerform exploratory data analysis to understand data distributions and relationships.")

	if st.button(":blue[Data Pre-Processing]"):
	st.markdown("### Data Pre-Processing\nConvert raw data into cleaned data.")

	if st.button(":blue[📈 Exploratory Data Analysis (EDA)]"):
	st.markdown("### Exploratory Data Analysis (EDA)\nVisualize and analyze the data to understand its distributions and relationships.")

	if st.button(":blue[🏋️ Feature Engineering]"):
	st.markdown("### Feature Engineering\nCreate new features from existing data.")

	if st.button(":blue[🤖 Model Training]"):
	st.markdown("### Model Training\nTrain the model using the training data and optimize its parameters.")

	if st.button(":blue[🔧 Model Testing]"):
	st.markdown("### Model Testing\nAssess the model's performance using various metrics and cross-validation techniques.")

	if st.button(":blue[🚀 Model Deployment]"):
	st.markdown("### Model Deployment\nIntegrate the trained model into a production environment and monitor its performance.")

	if st.button(":blue[📝 Monitoring]"):
	st.markdown("### Monitoring\nPeriodically retrain the model with new data and update features as needed.")

	# ----------------- Data Collection Page -----------------
	def data_collection_page():
	st.title(":red[Data Collection]")
	st.markdown("### Data Collection\nThis page discusses the process of Data Collection.")
	st.markdown("Types of Data: Structured, Unstructured, Semi-Structured")

	if st.button(":blue[🌟 Structured Data]"):
	st.session_state.page = "structured_data"

	if st.button(":blue[📷 Unstructured Data]"):
	st.session_state.page = "unstructured_data"

	if st.button(":blue[🗃️ Semi-Structured Data]"):
	st.session_state.page = "semi_structured_data"

	if st.button("Back to Home"):
	st.session_state.page = "home"


	# ----------------- Structured Data Page -----------------
	def structured_data_page():
	st.title(":blue[Structured Data]")
	st.markdown("""
	Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
	""")
	st.markdown("### Examples: Excel files, CSV files, JSON files")

	if st.button(":green[\ud83d\udcca Excel]"):
	st.session_state.page = "excel"

	if st.button(":green[\ud83d\udcc4 CSV]"):
	st.session_state.page = "csv"

	if st.button(":green[\ud83d\udd39 JSON]"):
	st.session_state.page = "json"

	if st.button("Back to Data Collection"):
	st.session_state.page = "data_collection"

	# ----------------- Excel Data Page -----------------
	def excel_page():
	st.title(":green[Excel Data Format]")

	st.write("### What is Excel?")
	st.write("Excel is a spreadsheet tool for storing data in tabular format with rows and columns. Common file extensions: .xls, .xlsx.")

	st.write("### How to Read Excel Files")
	st.code("""
	import pandas as pd

	# Read an Excel file
	df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
	print(df)
	""", language='python')

	st.write("### Issues Encountered")
	st.write("""
	- File not found: Incorrect file path.
	- Sheet name error: Specified sheet doesn't exist.
	- Missing libraries: openpyxl or xlrd might be missing.
	""")

	st.write("### Solutions to These Issues")
	st.code("""
	# Install required libraries
	# pip install openpyxl xlrd

	# Handle missing file
	try:
	df = pd.read_excel('data.xlsx', sheet_name='Sheet1')
	except FileNotFoundError:
	print("File not found. Check the file path.")

	# List available sheet names
	excel_file = pd.ExcelFile('data.xlsx')
	print(excel_file.sheet_names)
	""", language='python')

	st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/1ZTKWTknL-4IQ9QbAfcyKzIP-_lNxmz2P?usp=sharing")

	if st.button("Back to Structured Data"):
	st.session_state.page = "structured_data"

	# ----------------- CSV Data Page -----------------
	def csv_page():
	st.title(":green[CSV Data Format]")

	st.write("### What is CSV?")
	st.write("CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.")

	st.write("### How to Read CSV Files")
	st.code("""
	import pandas as pd

	# Read a CSV file
	df = pd.read_csv('data.csv')
	print(df)
	""", language='python')

	st.write("### Error Handling for CSV Files")
	st.code("""
	import pandas as pd

	try:
	df = pd.read_csv('data.csv', encoding='utf-8', delimiter=',')
	print("CSV File Loaded Successfully!")
	print(df)
	except FileNotFoundError:
	print("Error: File not found. Please check the file path.")
	except pd.errors.ParserError:
	print("Error: The file is not a valid CSV format.")
	except UnicodeDecodeError:
	print("Error: Encoding issue. Try specifying a different encoding like 'latin1' or 'utf-8'.")
	""", language='python')

	st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_csv_guide_link")

	if st.button("Back to Structured Data"):
	st.session_state.page = "structured_data"

	# ----------------- JSON Data Page -----------------
	def json_page():
	st.title(":green[JSON Data Format]")

	st.write("### What is JSON?")
	st.write("""
	JSON (JavaScript Object Notation) is a lightweight data-interchange format.
	""")

	st.code("""
	import json

	# Read a JSON file
	with open('data.json', 'r') as file:
	data = json.load(file)
	print(data)
	""", language='python')

	st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_json_guide_link")

	if st.button("Back to Structured Data"):
	st.session_state.page = "structured


	# ----------------- Unstructured Data Page -----------------
	def unstructured_data_page():
	st.title(":blue[Unstructured Data]")

	st.markdown("""
	Unstructured data does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
	Examples include:
	- Text documents (e.g., .txt, .docx)
	- Images (e.g., .jpg, .png)
	- Videos (e.g., .mp4, .avi)
	- Audio files (e.g., .mp3, .wav)
	- Social media posts
	""")

	st.header("📄 Handling Text Data")
	st.markdown("""
	Text data can be analyzed using Natural Language Processing (NLP) techniques.
	""")
	st.code("""
	# Reading text data
	with open('sample.txt', 'r') as file:
	text = file.read()
	print(text)

	# Basic text processing using NLTK
	import nltk
	from nltk.tokenize import word_tokenize

	nltk.download('punkt')
	tokens = word_tokenize(text)
	print(tokens)
	""", language='python')

	st.header("🖼️ Handling Image Data")
	st.markdown("""
	Image data can be processed using libraries like OpenCV and PIL (Pillow).
	""")
	st.code("""
	from PIL import Image

	# Open an image file
	image = Image.open('sample_image.jpg')
	image.show()

	# Convert image to grayscale
	gray_image = image.convert('L')
	gray_image.show()
	""", language='python')

	st.header("🎥 Handling Video Data")
	st.markdown("""
	Videos can be processed frame by frame using OpenCV.
	""")
	st.code("""
	import cv2

	# Capture video
	video = cv2.VideoCapture('sample_video.mp4')

	while video.isOpened():
	ret, frame = video.read()
	if not ret:
	break
	cv2.imshow('Frame', frame)
	if cv2.waitKey(25) & 0xFF == ord('q'):
	break

	video.release()
	cv2.destroyAllWindows()
	""", language='python')

	st.header("🔊 Handling Audio Data")
	st.markdown("""
	Audio data can be handled using libraries like librosa.
	""")
	st.code("""
	import librosa
	import librosa.display
	import matplotlib.pyplot as plt

	# Load audio file
	y, sr = librosa.load('sample_audio.mp3')
	librosa.display.waveshow(y, sr=sr)
	plt.title('Waveform')
	plt.show()
	""", language='python')

	st.markdown("### Challenges with Unstructured Data")
	st.write("""
	- Noise and Inconsistency: Data is often incomplete or noisy.
	- Storage Requirements: Large size and variability in data types.
	- Processing Time: Analyzing unstructured data is computationally expensive.
	""")

	st.markdown("### Solutions")
	st.write("""
	- Data Cleaning: Preprocess data to remove noise.
	- Efficient Storage: Use NoSQL databases (e.g., MongoDB) or cloud storage.
	- Parallel Processing: Utilize frameworks like Apache Spark.
	""")

	# Back to Data Collection
	if st.button("Back to Data Collection"):
	st.session_state.page = "data_collection"

	# ----------------- Semi-Structured Data Page -----------------
	def semi_structured_data_page():
	st.title(":blue[Semi-Structured Data]")

	st.markdown("""
	Semi-structured data does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
	- JSON (JavaScript Object Notation) files
	- XML (Extensible Markup Language) files
	- YAML (Yet Another Markup Language)
	""")

	st.header("🔹 JSON Data")
	st.markdown("""
	JSON is a popular format for storing and exchanging data.
	""")
	st.code("""
	# Sample JSON data
	data = '''
	{
	"name": "Alice",
	"age": 25,
	"skills": ["Python", "Machine Learning"]
	}
	'''

	# Parse JSON
	parsed_data = json.loads(data)
	print(parsed_data['name']) # Output: Alice
	""", language='python')

	st.header("🔹 Reading JSON Files")
	st.code("""
	# Reading a JSON file
	with open('data.json', 'r') as file:
	data = json.load(file)
	print(data)
	""", language='python')

	st.header("🔹 XML Data")
	st.markdown("""
	XML is a markup language that defines a set of rules for encoding documents.
	""")
	st.code("""
	import xml.etree.ElementTree as ET

	# Sample XML data
	xml_data = '''
	<person>
	<name>Bob</name>
	<age>30</age>
	<city>New York</city>
	</person>
	'''

	# Parse XML
	root = ET.fromstring(xml_data)
	print(root.find('name').text) # Output: Bob
	""", language='python')

	st.markdown("### Challenges with Semi-Structured Data")
	st.write("""
	- Complex Parsing: Requires specialized parsers.
	- Nested Data: Can be deeply nested, making it harder to process.
	""")

	st.markdown("### Solutions")
	st.write("""
	- Libraries: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
	- Validation: Validate data formats to avoid parsing errors.
	""")

	# Back to Data Collection
	if st.button("Back to Data Collection"):
	st.session_state.page = "data_collection"

	# ----------------- Router -----------------
	def router():
	if st.session_state.page == "home":
	home_page()
	elif st.session_state.page == "data_collection":
	data_collection_page()
	elif st.session_state.page == "structured_data":
	structured_data_page()
	elif st.session_state.page == "excel":
	excel_page()
	elif st.session_state.page == "csv":
	csv_page()
	elif st.session_state.page == "unstructured_data":
	unstructured_data_page()
	elif st.session_state.page == "semi_structured_data":
	semi_structured_data_page()

	# Run the router function
	if __name__ == "__main__":
	router()