harshiv
/

prediction

Tabular Classification

Model card Files Files and versions Community

harshiv commited on Apr 15, 2023

Commit

e3a4d46

·

1 Parent(s): d079851

Delete predict

Files changed (1) hide show

predict +0 -46

predict DELETED Viewed

@@ -1,46 +0,0 @@
-import pandas as pd
-from sklearn.compose import ColumnTransformer
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.impute import SimpleImputer
-from sklearn.model_selection import train_test_split
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import LabelEncoder, StandardScaler
-# Load the CSV data
-data = pd.read_csv('dataset.csv')
-# Split the data into features and labels
-X = data.drop('PlacedOrNot', axis=1)
-y = data['PlacedOrNot']
-# Encode categorical features
-categorical_features = [ 'HistoryOfBacklogs']
-for feature in categorical_features:
-    encoder = LabelEncoder()
-    X[feature] = encoder.fit_transform(X[feature])
-# Split the data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Create the pipeline
-numerical_features = ['Internships', 'CGPA']
-numerical_transformer = StandardScaler()
-categorical_features = ['HistoryOfBacklogs']
-categorical_transformer = SimpleImputer(strategy='most_frequent')
-preprocessor = ColumnTransformer(
-    transformers=[
-        ('num', numerical_transformer, numerical_features),
-        ('cat', categorical_transformer, categorical_features)
-    ])
-pipeline = Pipeline([
-    ('preprocessor', preprocessor),
-    ('classifier', RandomForestClassifier(random_state=42))
-])
-# Train the model
-pipeline.fit(X_train, y_train)
-# Evaluate the model
-accuracy = pipeline.score(X_test, y_test)
-print('Accuracy:', accuracy)