ronakreddy18 commited on
Commit
49fc73f
·
verified ·
1 Parent(s): 10e71ae

Update pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py

Browse files
pages/LIFE_CYCLE_OF_MACHINE_LEARNING.py CHANGED
@@ -78,11 +78,17 @@ def structured_data_page():
78
  st.markdown("""
79
  Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
80
  """)
81
- st.markdown("### Examples: Excel files, CSV files")
82
 
83
  if st.button(":green[📊 Excel]"):
84
  st.session_state.page = "excel"
85
 
 
 
 
 
 
 
86
  if st.button("Back to Data Collection"):
87
  st.session_state.page = "data_collection"
88
 
@@ -125,204 +131,54 @@ excel_file = pd.ExcelFile('data.xlsx')
125
  print(excel_file.sheet_names)
126
  """, language='python')
127
 
128
- # Download button for a sample Jupyter notebook
129
- # with open("excel_handling_guide.ipynb", "rb") as file:
130
- # st.download_button(
131
- # label="Download Jupyter Notebook",
132
- # data = file,
133
- # file_name="excel_handling_guide.ipynb",
134
- # mime="application/octet-stream")
135
-
136
-
137
- #test
138
- # with open("excel_handling_guide.ipynb", "rb") as file:
139
- # st.download_button("Download Jupyter Notebook",file)
140
-
141
- #test-2
142
- st.link_button("Jupyter Notebook","https://colab.research.google.com/drive/1ZTKWTknL-4IQ9QbAfcyKzIP-_lNxmz2P?usp=sharing")
143
 
144
  if st.button("Back to Structured Data"):
145
  st.session_state.page = "structured_data"
146
 
147
- # ----------------- Unstructured Data Page -----------------
148
- def unstructured_data_page():
149
- st.title(":blue[Unstructured Data]")
150
-
151
- st.markdown("""
152
- **Unstructured data** does not have a predefined format. It consists of various data types like text, images, videos, and audio files.
153
- Examples include:
154
- - Text documents (e.g., .txt, .docx)
155
- - Images (e.g., .jpg, .png)
156
- - Videos (e.g., .mp4, .avi)
157
- - Audio files (e.g., .mp3, .wav)
158
- - Social media posts
159
- """)
160
 
161
- st.header("📄 Handling Text Data")
162
- st.markdown("""
163
- Text data can be analyzed using Natural Language Processing (NLP) techniques.
164
- """)
165
- st.code("""
166
- # Reading text data
167
- with open('sample.txt', 'r') as file:
168
- text = file.read()
169
- print(text)
170
-
171
- # Basic text processing using NLTK
172
- import nltk
173
- from nltk.tokenize import word_tokenize
174
-
175
- nltk.download('punkt')
176
- tokens = word_tokenize(text)
177
- print(tokens)
178
- """, language='python')
179
 
180
- st.header("🖼️ Handling Image Data")
181
- st.markdown("""
182
- Image data can be processed using libraries like OpenCV and PIL (Pillow).
183
- """)
184
  st.code("""
185
- from PIL import Image
186
-
187
- # Open an image file
188
- image = Image.open('sample_image.jpg')
189
- image.show()
190
 
191
- # Convert image to grayscale
192
- gray_image = image.convert('L')
193
- gray_image.show()
194
  """, language='python')
195
 
196
- st.header("🎥 Handling Video Data")
197
- st.markdown("""
198
- Videos can be processed frame by frame using OpenCV.
199
- """)
200
- st.code("""
201
- import cv2
202
-
203
- # Capture video
204
- video = cv2.VideoCapture('sample_video.mp4')
205
-
206
- while video.isOpened():
207
- ret, frame = video.read()
208
- if not ret:
209
- break
210
- cv2.imshow('Frame', frame)
211
- if cv2.waitKey(25) & 0xFF == ord('q'):
212
- break
213
-
214
- video.release()
215
- cv2.destroyAllWindows()
216
- """, language='python')
217
 
218
- st.header("🔊 Handling Audio Data")
219
- st.markdown("""
220
- Audio data can be handled using libraries like librosa.
221
- """)
222
- st.code("""
223
- import librosa
224
- import librosa.display
225
- import matplotlib.pyplot as plt
226
-
227
- # Load audio file
228
- y, sr = librosa.load('sample_audio.mp3')
229
- librosa.display.waveshow(y, sr=sr)
230
- plt.title('Waveform')
231
- plt.show()
232
- """, language='python')
233
 
234
- st.markdown("### Challenges with Unstructured Data")
235
- st.write("""
236
- - **Noise and Inconsistency**: Data is often incomplete or noisy.
237
- - **Storage Requirements**: Large size and variability in data types.
238
- - **Processing Time**: Analyzing unstructured data is computationally expensive.
239
- """)
240
 
241
- st.markdown("### Solutions")
242
  st.write("""
243
- - **Data Cleaning**: Preprocess data to remove noise.
244
- - **Efficient Storage**: Use NoSQL databases (e.g., MongoDB) or cloud storage.
245
- - **Parallel Processing**: Utilize frameworks like Apache Spark.
246
- """)
247
-
248
- # Back to Data Collection
249
- if st.button("Back to Data Collection"):
250
- st.session_state.page = "data_collection"
251
-
252
- # ----------------- Semi-Structured Data Page -----------------
253
- def semi_structured_data_page():
254
- st.title(":blue[Semi-Structured Data]")
255
-
256
- st.markdown("""
257
- **Semi-structured data** does not conform strictly to a tabular structure but contains tags or markers to separate elements. Examples include:
258
- - JSON (JavaScript Object Notation) files
259
- - XML (Extensible Markup Language) files
260
- - YAML (Yet Another Markup Language)
261
  """)
262
 
263
- st.header("🔹 JSON Data")
264
- st.markdown("""
265
- JSON is a popular format for storing and exchanging data.
266
- """)
267
  st.code("""
268
- # Sample JSON data
269
- data = '''
270
- {
271
- "name": "Alice",
272
- "age": 25,
273
- "skills": ["Python", "Machine Learning"]
274
- }
275
- '''
276
-
277
- # Parse JSON
278
- parsed_data = json.loads(data)
279
- print(parsed_data['name']) # Output: Alice
280
- """, language='python')
281
 
282
- st.header("🔹 Reading JSON Files")
283
- st.code("""
284
- # Reading a JSON file
285
  with open('data.json', 'r') as file:
286
  data = json.load(file)
287
  print(data)
288
  """, language='python')
289
 
290
- st.header("🔹 XML Data")
291
- st.markdown("""
292
- XML is a markup language that defines a set of rules for encoding documents.
293
- """)
294
- st.code("""
295
- import xml.etree.ElementTree as ET
296
-
297
- # Sample XML data
298
- xml_data = '''
299
- <person>
300
- <name>Bob</name>
301
- <age>30</age>
302
- <city>New York</city>
303
- </person>
304
- '''
305
-
306
- # Parse XML
307
- root = ET.fromstring(xml_data)
308
- print(root.find('name').text) # Output: Bob
309
- """, language='python')
310
-
311
- st.markdown("### Challenges with Semi-Structured Data")
312
- st.write("""
313
- - **Complex Parsing**: Requires specialized parsers.
314
- - **Nested Data**: Can be deeply nested, making it harder to process.
315
- """)
316
 
317
- st.markdown("### Solutions")
318
- st.write("""
319
- - **Libraries**: Use libraries like json, xml.etree.ElementTree, and yaml for parsing.
320
- - **Validation**: Validate data formats to avoid parsing errors.
321
- """)
322
-
323
- # Back to Data Collection
324
- if st.button("Back to Data Collection"):
325
- st.session_state.page = "data_collection"
326
 
327
  # ----------------- Router -----------------
328
  def router():
@@ -334,11 +190,10 @@ def router():
334
  structured_data_page()
335
  elif st.session_state.page == "excel":
336
  excel_page()
337
- elif st.session_state.page == "unstructured_data":
338
- unstructured_data_page()
339
- elif st.session_state.page == "semi_structured_data":
340
- semi_structured_data_page()
341
-
342
- # Run te router function
343
- if __name__ == "__main__":
344
- router()
 
78
  st.markdown("""
79
  Structured data is highly organized and typically stored in tables like spreadsheets or databases. It is easy to search and analyze.
80
  """)
81
+ st.markdown("### Examples: Excel files, CSV files, JSON files")
82
 
83
  if st.button(":green[📊 Excel]"):
84
  st.session_state.page = "excel"
85
 
86
+ if st.button(":green[📄 CSV]"):
87
+ st.session_state.page = "csv"
88
+
89
+ if st.button(":green[🔹 JSON]"):
90
+ st.session_state.page = "json"
91
+
92
  if st.button("Back to Data Collection"):
93
  st.session_state.page = "data_collection"
94
 
 
131
  print(excel_file.sheet_names)
132
  """, language='python')
133
 
134
+ st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/1ZTKWTknL-4IQ9QbAfcyKzIP-_lNxmz2P?usp=sharing")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  if st.button("Back to Structured Data"):
137
  st.session_state.page = "structured_data"
138
 
139
+ # ----------------- CSV Data Page -----------------
140
+ def csv_page():
141
+ st.title(":green[CSV Data Format]")
 
 
 
 
 
 
 
 
 
 
142
 
143
+ st.write("### What is CSV?")
144
+ st.write("CSV (Comma-Separated Values) files store tabular data in plain text, where each line is a data record and columns are separated by commas.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ st.write("### How to Read CSV Files")
 
 
 
147
  st.code("""
148
+ import pandas as pd
 
 
 
 
149
 
150
+ # Read a CSV file
151
+ df = pd.read_csv('data.csv')
152
+ print(df)
153
  """, language='python')
154
 
155
+ st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_csv_guide_link")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ if st.button("Back to Structured Data"):
158
+ st.session_state.page = "structured_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ # ----------------- JSON Data Page -----------------
161
+ def json_page():
162
+ st.title(":green[JSON Data Format]")
 
 
 
163
 
164
+ st.write("### What is JSON?")
165
  st.write("""
166
+ JSON (JavaScript Object Notation) is a lightweight data-interchange format.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  """)
168
 
 
 
 
 
169
  st.code("""
170
+ import json
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
+ # Read a JSON file
 
 
173
  with open('data.json', 'r') as file:
174
  data = json.load(file)
175
  print(data)
176
  """, language='python')
177
 
178
+ st.link_button("Jupyter Notebook", "https://colab.research.google.com/drive/your_json_guide_link")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ if st.button("Back to Structured Data"):
181
+ st.session_state.page = "structured_data"
 
 
 
 
 
 
 
182
 
183
  # ----------------- Router -----------------
184
  def router():
 
190
  structured_data_page()
191
  elif st.session_state.page == "excel":
192
  excel_page()
193
+ elif st.session_state.page == "csv":
194
+ csv_page()
195
+ elif st.session_state.page == "json":
196
+ json_page()
197
+
198
+ # Run the router function
199
+ router()