{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import display, HTML\n", "display(HTML(\"\"))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ
6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S
7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "5 6 0 3 \n", "6 7 0 1 \n", "7 8 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "5 Moran, Mr. James male NaN 0 \n", "6 McCarthy, Mr. Timothy J male 54.0 0 \n", "7 Palsson, Master. Gosta Leonard male 2.0 3 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S \n", "5 0 330877 8.4583 NaN Q \n", "6 0 17463 51.8625 E46 S \n", "7 1 349909 21.0750 NaN S " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "#Change this next command to the location of train.csv on your Computer\n", "df = pd.read_csv(r\"C:\\\\Users\\Rafael\\\\Documents\\\\DataScience\\\\Data Analitics\\\\Week 3\\\\train.csv\")\n", "#df = pd.read_csv(\"C:\\Studies\\TU257\\DataAnalytics\\Week2\\train.csv\")\n", "df.head(8)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassSexAgeSibSpParchTicketCabinEmbarked
003male22.010A/5 21171NaNS
111female38.010PC 17599C85C
213female26.000STON/O2. 3101282NaNS
311female35.010113803C123S
403male35.000373450NaNS
503maleNaN00330877NaNQ
601male54.00017463E46S
703male2.031349909NaNS
\n", "
" ], "text/plain": [ " Survived Pclass Sex Age SibSp Parch Ticket Cabin \\\n", "0 0 3 male 22.0 1 0 A/5 21171 NaN \n", "1 1 1 female 38.0 1 0 PC 17599 C85 \n", "2 1 3 female 26.0 0 0 STON/O2. 3101282 NaN \n", "3 1 1 female 35.0 1 0 113803 C123 \n", "4 0 3 male 35.0 0 0 373450 NaN \n", "5 0 3 male NaN 0 0 330877 NaN \n", "6 0 1 male 54.0 0 0 17463 E46 \n", "7 0 3 male 2.0 3 1 349909 NaN \n", "\n", " Embarked \n", "0 S \n", "1 C \n", "2 S \n", "3 S \n", "4 S \n", "5 Q \n", "6 S \n", "7 S " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df.iloc[:,[1,2,4,5,6,7,8,10,11]]\n", "df2.head(8)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SurvivedPclassAgeSibSpParch
count891.000000891.000000714.000000891.000000891.000000
mean0.3838382.30864229.6991180.5230080.381594
std0.4865920.83607114.5264971.1027430.806057
min0.0000001.0000000.4200000.0000000.000000
25%0.0000002.00000020.1250000.0000000.000000
50%0.0000003.00000028.0000000.0000000.000000
75%1.0000003.00000038.0000001.0000000.000000
max1.0000003.00000080.0000008.0000006.000000
\n", "
" ], "text/plain": [ " Survived Pclass Age SibSp Parch\n", "count 891.000000 891.000000 714.000000 891.000000 891.000000\n", "mean 0.383838 2.308642 29.699118 0.523008 0.381594\n", "std 0.486592 0.836071 14.526497 1.102743 0.806057\n", "min 0.000000 1.000000 0.420000 0.000000 0.000000\n", "25% 0.000000 2.000000 20.125000 0.000000 0.000000\n", "50% 0.000000 3.000000 28.000000 0.000000 0.000000\n", "75% 1.000000 3.000000 38.000000 1.000000 0.000000\n", "max 1.000000 3.000000 80.000000 8.000000 6.000000" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.describe()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
Survived891.00.3838380.4865920.000.0000.01.01.0
Pclass891.02.3086420.8360711.002.0003.03.03.0
Age714.029.69911814.5264970.4220.12528.038.080.0
SibSp891.00.5230081.1027430.000.0000.01.08.0
Parch891.00.3815940.8060570.000.0000.00.06.0
\n", "
" ], "text/plain": [ " count mean std min 25% 50% 75% max\n", "Survived 891.0 0.383838 0.486592 0.00 0.000 0.0 1.0 1.0\n", "Pclass 891.0 2.308642 0.836071 1.00 2.000 3.0 3.0 3.0\n", "Age 714.0 29.699118 14.526497 0.42 20.125 28.0 38.0 80.0\n", "SibSp 891.0 0.523008 1.102743 0.00 0.000 0.0 1.0 8.0\n", "Parch 891.0 0.381594 0.806057 0.00 0.000 0.0 0.0 6.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.describe().transpose()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1eb1cf8f15ca4379be34b731268993a4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Summarize dataset: 0%| | 0/5 [00:00" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Make sure to install 'ydata_profiling' library before running the following\n", "#see Lab Notes\n", "\n", "from ydata_profiling import ProfileReport\n", "\n", "profile = ProfileReport(df2, title=\"Profiling Report\")\n", "profile" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "#Can you save the Data Profile Report to a file?\n", "#Check the package Github site for examples (link to this is in the Lab Notes)\n", "# https://github.com/ydataai/ydata-profiling\n", "# Scroll to the bottom of the main GitHub page for examples of saving the report\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "#Enter the code here\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### See lots more examples of using this library/package for analysing datasets on the Github page. Scroll to bottom of main page to get the links" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 4 }