Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

File size: 3,870 Bytes

c1c5806

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>.container { width:85% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import display, HTML\n",
    "display(HTML(\"<style>.container { width:85% !important; }</style>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "(unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \\UXXXXXXXX escape (403486649.py, line 4)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;36m  Cell \u001b[1;32mIn[1], line 4\u001b[1;36m\u001b[0m\n\u001b[1;33m    df = pd.read_csv(\"C:\\Users\\Rafael\\Documents\\DataScience\\Data Analitics\\Week 3\\TU257-Lab2-1-Automated-Data-Profiling.ipynb\")\u001b[0m\n\u001b[1;37m                     ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \\UXXXXXXXX escape\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "#Change this next command to the location of train.csv on your Computer\n",
    "df = pd.read_csv(\"C:\\Users\\Rafael\\Documents\\DataScience\\Data Analitics\\Week 3\\TU257-Lab2-1-Automated-Data-Profiling.ipynb\")\n",
    "#df = pd.read_csv(\"C:\\Studies\\TU257\\DataAnalytics\\Week2\\train.csv\")\n",
    "df.head(8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df.iloc[:,[1,2,4,5,6,7,8,10,11]]\n",
    "df2.head(8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.describe().transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Make sure to install 'ydata_profiling' library before running the following\n",
    "#see Lab Notes\n",
    "\n",
    "from ydata_profiling import ProfileReport\n",
    "\n",
    "profile = ProfileReport(df2, title=\"Profiling Report\")\n",
    "profile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Can you save the Data Profile Report to a file?\n",
    "#Check the package Github site for examples (link to this is in the Lab Notes)\n",
    "#  https://github.com/ydataai/ydata-profiling\n",
    "# Scroll to the bottom of the main GitHub page for examples of saving the report\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Enter the code here\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See lots more examples of using this library/package for analysing datasets on the Github page. Scroll to bottom of main page to get the links"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}