harshiv commited on
Commit
e3a4d46
·
1 Parent(s): d079851

Delete predict

Browse files
Files changed (1) hide show
  1. predict +0 -46
predict DELETED
@@ -1,46 +0,0 @@
1
- import pandas as pd
2
- from sklearn.compose import ColumnTransformer
3
- from sklearn.ensemble import RandomForestClassifier
4
- from sklearn.impute import SimpleImputer
5
- from sklearn.model_selection import train_test_split
6
- from sklearn.pipeline import Pipeline
7
- from sklearn.preprocessing import LabelEncoder, StandardScaler
8
-
9
- # Load the CSV data
10
- data = pd.read_csv('dataset.csv')
11
-
12
- # Split the data into features and labels
13
- X = data.drop('PlacedOrNot', axis=1)
14
- y = data['PlacedOrNot']
15
-
16
- # Encode categorical features
17
- categorical_features = [ 'HistoryOfBacklogs']
18
- for feature in categorical_features:
19
- encoder = LabelEncoder()
20
- X[feature] = encoder.fit_transform(X[feature])
21
-
22
- # Split the data into training and testing sets
23
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
24
-
25
- # Create the pipeline
26
- numerical_features = ['Internships', 'CGPA']
27
- numerical_transformer = StandardScaler()
28
- categorical_features = ['HistoryOfBacklogs']
29
- categorical_transformer = SimpleImputer(strategy='most_frequent')
30
- preprocessor = ColumnTransformer(
31
- transformers=[
32
- ('num', numerical_transformer, numerical_features),
33
- ('cat', categorical_transformer, categorical_features)
34
- ])
35
-
36
- pipeline = Pipeline([
37
- ('preprocessor', preprocessor),
38
- ('classifier', RandomForestClassifier(random_state=42))
39
- ])
40
-
41
- # Train the model
42
- pipeline.fit(X_train, y_train)
43
-
44
- # Evaluate the model
45
- accuracy = pipeline.score(X_test, y_test)
46
- print('Accuracy:', accuracy)