diff --git "a/Data Analitics/Week 3/Lab2-1.ipynb" "b/Data Analitics/Week 3/Lab2-1.ipynb" deleted file mode 100644--- "a/Data Analitics/Week 3/Lab2-1.ipynb" +++ /dev/null @@ -1,11863 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from IPython.display import display, HTML\n", - "display(HTML(\"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
\n", - "
" - ], - "text/plain": [ - " PassengerId Survived Pclass \\\n", - "0 1 0 3 \n", - "1 2 1 1 \n", - "2 3 1 3 \n", - "3 4 1 1 \n", - "4 5 0 3 \n", - "5 6 0 3 \n", - "6 7 0 1 \n", - "7 8 0 3 \n", - "\n", - " Name Sex Age SibSp \\\n", - "0 Braund, Mr. Owen Harris male 22.0 1 \n", - "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", - "2 Heikkinen, Miss. Laina female 26.0 0 \n", - "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", - "4 Allen, Mr. William Henry male 35.0 0 \n", - "5 Moran, Mr. James male NaN 0 \n", - "6 McCarthy, Mr. Timothy J male 54.0 0 \n", - "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", - "\n", - " Parch Ticket Fare Cabin Embarked \n", - "0 0 A/5 21171 7.2500 NaN S \n", - "1 0 PC 17599 71.2833 C85 C \n", - "2 0 STON/O2. 3101282 7.9250 NaN S \n", - "3 0 113803 53.1000 C123 S \n", - "4 0 373450 8.0500 NaN S \n", - "5 0 330877 8.4583 NaN Q \n", - "6 0 17463 51.8625 E46 S \n", - "7 1 349909 21.0750 NaN S " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "#Change this next command to the location of train.csv on your Computer\n", - "df = pd.read_csv(r\"C:\\\\Users\\Rafael\\\\Documents\\\\DataScience\\\\Data Analitics\\\\Week 3\\\\train.csv\")\n", - "#df = pd.read_csv(\"C:\\Studies\\TU257\\DataAnalytics\\Week2\\train.csv\")\n", - "df.head(8)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SurvivedPclassSexAgeSibSpParchTicketCabinEmbarked
003male22.010A/5 21171NaNS
111female38.010PC 17599C85C
213female26.000STON/O2. 3101282NaNS
311female35.010113803C123S
403male35.000373450NaNS
503maleNaN00330877NaNQ
601male54.00017463E46S
703male2.031349909NaNS
\n", - "
" - ], - "text/plain": [ - " Survived Pclass Sex Age SibSp Parch Ticket Cabin \\\n", - "0 0 3 male 22.0 1 0 A/5 21171 NaN \n", - "1 1 1 female 38.0 1 0 PC 17599 C85 \n", - "2 1 3 female 26.0 0 0 STON/O2. 3101282 NaN \n", - "3 1 1 female 35.0 1 0 113803 C123 \n", - "4 0 3 male 35.0 0 0 373450 NaN \n", - "5 0 3 male NaN 0 0 330877 NaN \n", - "6 0 1 male 54.0 0 0 17463 E46 \n", - "7 0 3 male 2.0 3 1 349909 NaN \n", - "\n", - " Embarked \n", - "0 S \n", - "1 C \n", - "2 S \n", - "3 S \n", - "4 S \n", - "5 Q \n", - "6 S \n", - "7 S " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = df.iloc[:,[1,2,4,5,6,7,8,10,11]]\n", - "df2.head(8)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
SurvivedPclassAgeSibSpParch
count891.000000891.000000714.000000891.000000891.000000
mean0.3838382.30864229.6991180.5230080.381594
std0.4865920.83607114.5264971.1027430.806057
min0.0000001.0000000.4200000.0000000.000000
25%0.0000002.00000020.1250000.0000000.000000
50%0.0000003.00000028.0000000.0000000.000000
75%1.0000003.00000038.0000001.0000000.000000
max1.0000003.00000080.0000008.0000006.000000
\n", - "
" - ], - "text/plain": [ - " Survived Pclass Age SibSp Parch\n", - "count 891.000000 891.000000 714.000000 891.000000 891.000000\n", - "mean 0.383838 2.308642 29.699118 0.523008 0.381594\n", - "std 0.486592 0.836071 14.526497 1.102743 0.806057\n", - "min 0.000000 1.000000 0.420000 0.000000 0.000000\n", - "25% 0.000000 2.000000 20.125000 0.000000 0.000000\n", - "50% 0.000000 3.000000 28.000000 0.000000 0.000000\n", - "75% 1.000000 3.000000 38.000000 1.000000 0.000000\n", - "max 1.000000 3.000000 80.000000 8.000000 6.000000" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
Survived891.00.3838380.4865920.000.0000.01.01.0
Pclass891.02.3086420.8360711.002.0003.03.03.0
Age714.029.69911814.5264970.4220.12528.038.080.0
SibSp891.00.5230081.1027430.000.0000.01.08.0
Parch891.00.3815940.8060570.000.0000.00.06.0
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% 75% max\n", - "Survived 891.0 0.383838 0.486592 0.00 0.000 0.0 1.0 1.0\n", - "Pclass 891.0 2.308642 0.836071 1.00 2.000 3.0 3.0 3.0\n", - "Age 714.0 29.699118 14.526497 0.42 20.125 28.0 38.0 80.0\n", - "SibSp 891.0 0.523008 1.102743 0.00 0.000 0.0 1.0 8.0\n", - "Parch 891.0 0.381594 0.806057 0.00 0.000 0.0 0.0 6.0" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2.describe().transpose()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1eb1cf8f15ca4379be34b731268993a4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Summarize dataset: 0%| | 0/5 [00:00" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Make sure to install 'ydata_profiling' library before running the following\n", - "#see Lab Notes\n", - "\n", - "from ydata_profiling import ProfileReport\n", - "\n", - "profile = ProfileReport(df2, title=\"Profiling Report\")\n", - "profile" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "#Can you save the Data Profile Report to a file?\n", - "#Check the package Github site for examples (link to this is in the Lab Notes)\n", - "# https://github.com/ydataai/ydata-profiling\n", - "# Scroll to the bottom of the main GitHub page for examples of saving the report\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "#Enter the code here\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### See lots more examples of using this library/package for analysing datasets on the Github page. Scroll to bottom of main page to get the links" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}