harshiv
/

prediction

Tabular Classification

Model card Files Files and versions Community

harshiv commited on Apr 15, 2023

Commit

2b2aa8f

·

1 Parent(s): fb33d3b

Upload Untitled2.ipynb

Files changed (1) hide show

Untitled2.ipynb +104 -0

Untitled2.ipynb ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2a0f61a3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.8417508417508418\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
+    "\n",
+    "# Load the CSV data\n",
+    "data = pd.read_csv('dataset.csv')\n",
+    "\n",
+    "# Split the data into features and labels\n",
+    "X = data.drop('PlacedOrNot', axis=1)\n",
+    "y = data['PlacedOrNot']\n",
+    "\n",
+    "# Encode categorical features\n",
+    "categorical_features = ['HistoryOfBacklogs']\n",
+    "for feature in categorical_features:\n",
+    "    encoder = LabelEncoder()\n",
+    "    X[feature] = encoder.fit_transform(X[feature])\n",
+    "\n",
+    "# Split the data into training and testing sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "# Create the pipeline\n",
+    "numerical_features = ['Internships', 'CGPA']\n",
+    "numerical_transformer = StandardScaler()\n",
+    "categorical_features = [ 'HistoryOfBacklogs']\n",
+    "categorical_transformer = SimpleImputer(strategy='most_frequent')\n",
+    "preprocessor = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        ('num', numerical_transformer, numerical_features),\n",
+    "        ('cat', categorical_transformer, categorical_features)\n",
+    "    ])\n",
+    "\n",
+    "pipeline = Pipeline([\n",
+    "    ('preprocessor', preprocessor),\n",
+    "    ('classifier', RandomForestClassifier(random_state=42))\n",
+    "])\n",
+    "\n",
+    "# Train the model\n",
+    "pipeline.fit(X_train, y_train)\n",
+    "\n",
+    "# Evaluate the model\n",
+    "accuracy = pipeline.score(X_test, y_test)\n",
+    "print('Accuracy:', accuracy)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e941b77",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a2788a3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}