Spaces:
Running
Running
File size: 3,870 Bytes
c1c5806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>.container { width:85% !important; }</style>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from IPython.display import display, HTML\n",
"display(HTML(\"<style>.container { width:85% !important; }</style>\"))"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "(unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \\UXXXXXXXX escape (403486649.py, line 4)",
"output_type": "error",
"traceback": [
"\u001b[1;36m Cell \u001b[1;32mIn[1], line 4\u001b[1;36m\u001b[0m\n\u001b[1;33m df = pd.read_csv(\"C:\\Users\\Rafael\\Documents\\DataScience\\Data Analitics\\Week 3\\TU257-Lab2-1-Automated-Data-Profiling.ipynb\")\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \\UXXXXXXXX escape\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"#Change this next command to the location of train.csv on your Computer\n",
"df = pd.read_csv(\"C:\\Users\\Rafael\\Documents\\DataScience\\Data Analitics\\Week 3\\TU257-Lab2-1-Automated-Data-Profiling.ipynb\")\n",
"#df = pd.read_csv(\"C:\\Studies\\TU257\\DataAnalytics\\Week2\\train.csv\")\n",
"df.head(8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df2 = df.iloc[:,[1,2,4,5,6,7,8,10,11]]\n",
"df2.head(8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df2.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df2.describe().transpose()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Make sure to install 'ydata_profiling' library before running the following\n",
"#see Lab Notes\n",
"\n",
"from ydata_profiling import ProfileReport\n",
"\n",
"profile = ProfileReport(df2, title=\"Profiling Report\")\n",
"profile"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Can you save the Data Profile Report to a file?\n",
"#Check the package Github site for examples (link to this is in the Lab Notes)\n",
"# https://github.com/ydataai/ydata-profiling\n",
"# Scroll to the bottom of the main GitHub page for examples of saving the report\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Enter the code here\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### See lots more examples of using this library/package for analysing datasets on the Github page. Scroll to bottom of main page to get the links"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
|