shanmukakomal commited on
Commit
69561ea
·
1 Parent(s): 7c8b249
Files changed (2) hide show
  1. app.py +11 -35
  2. college predictor.py +0 -85
app.py CHANGED
@@ -6,6 +6,7 @@ from sklearn.preprocessing import LabelEncoder
6
  from sklearn.ensemble import RandomForestClassifier
7
  import joblib
8
 
 
9
  # Load and preprocess data
10
  def load_and_preprocess_data(filename):
11
  df = pd.read_csv(filename)
@@ -17,53 +18,28 @@ def load_and_preprocess_data(filename):
17
  label_encoders[col] = le
18
 
19
  X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
20
- y_college = df["College Name"]
21
- y_branch = df["Branch"]
22
 
23
- return X, y_college, y_branch, label_encoders, df
24
 
25
  filename = "AP_EAMCET_Engineering_10000 (1).csv"
26
- X, y_college, y_branch, label_encoders, df = load_and_preprocess_data(filename)
27
 
28
- # Train model and evaluate metrics
29
- def train_model(X, y, target_name):
30
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
31
  model = RandomForestClassifier(n_estimators=100, random_state=42)
32
  model.fit(X_train, y_train)
33
-
34
- # Predictions
35
- y_pred = model.predict(X_test)
36
-
37
- # Evaluate model metrics
38
- accuracy = accuracy_score(y_test, y_pred)
39
- precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
40
- recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
41
- f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
42
- conf_matrix = confusion_matrix(y_test, y_pred)
43
-
44
- print(f"{target_name} Model Metrics:")
45
- print(f"Accuracy: {accuracy:.4f}")
46
- print(f"Precision: {precision:.4f}")
47
- print(f"Recall: {recall:.4f}")
48
- print(f"F1 Score: {f1:.4f}")
49
- print(f"Confusion Matrix:\n{conf_matrix}\n")
50
-
51
  return model
52
 
53
- # Train separate models
54
- college_model = train_model(X, y_college, "College Name")
55
- branch_model = train_model(X, y_branch, "Branch")
56
 
57
- # Save models and encoders
58
- joblib.dump(college_model, "college_model.pkl")
59
- joblib.dump(branch_model, "branch_model.pkl")
60
  joblib.dump(label_encoders, "label_encoders.pkl")
61
 
62
  # Prediction function
63
  def predict_colleges(category, gender, rank, region):
64
- # Load models and label encoders
65
- college_model = joblib.load("college_model.pkl")
66
- branch_model = joblib.load("branch_model.pkl")
67
  label_encoders = joblib.load("label_encoders.pkl")
68
 
69
  # Transform input values using label encoders
@@ -74,7 +50,7 @@ def predict_colleges(category, gender, rank, region):
74
  except ValueError:
75
  return "Invalid input values. Please select valid options."
76
 
77
- # Filter dataset based on criteria
78
  filtered_df = df[
79
  (df["Category"] == category_enc) &
80
  (df["Gender"] == gender_enc) &
@@ -107,4 +83,4 @@ demo = gr.Interface(
107
  description="Enter your details to predict all possible colleges and branches based on your rank."
108
  )
109
 
110
- demo.launch()
 
6
  from sklearn.ensemble import RandomForestClassifier
7
  import joblib
8
 
9
+ #
10
  # Load and preprocess data
11
  def load_and_preprocess_data(filename):
12
  df = pd.read_csv(filename)
 
18
  label_encoders[col] = le
19
 
20
  X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
21
+ y_college_branch = df[["College Name", "Branch"]]
 
22
 
23
+ return X, y_college_branch, label_encoders, df
24
 
25
  filename = "AP_EAMCET_Engineering_10000 (1).csv"
26
+ X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename)
27
 
28
+ # Train model
29
+ def train_model(X, y):
30
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
31
  model = RandomForestClassifier(n_estimators=100, random_state=42)
32
  model.fit(X_train, y_train)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  return model
34
 
35
+ college_branch_model = train_model(X, y_college_branch)
 
 
36
 
37
+ joblib.dump(college_branch_model, "college_branch_model.pkl")
 
 
38
  joblib.dump(label_encoders, "label_encoders.pkl")
39
 
40
  # Prediction function
41
  def predict_colleges(category, gender, rank, region):
42
+ # Load label encoders
 
 
43
  label_encoders = joblib.load("label_encoders.pkl")
44
 
45
  # Transform input values using label encoders
 
50
  except ValueError:
51
  return "Invalid input values. Please select valid options."
52
 
53
+ # Filter the dataset based on encoded values
54
  filtered_df = df[
55
  (df["Category"] == category_enc) &
56
  (df["Gender"] == gender_enc) &
 
83
  description="Enter your details to predict all possible colleges and branches based on your rank."
84
  )
85
 
86
+ demo.launch()
college predictor.py DELETED
@@ -1,85 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- import gradio as gr
4
- from sklearn.model_selection import train_test_split
5
- from sklearn.preprocessing import LabelEncoder
6
- from sklearn.ensemble import RandomForestClassifier
7
- import joblib
8
-
9
- # Load and preprocess data
10
- def load_and_preprocess_data(filename):
11
- df = pd.read_csv(filename)
12
-
13
- label_encoders = {}
14
- for col in ["College Name", "Category", "Gender", "Branch", "Region"]:
15
- le = LabelEncoder()
16
- df[col] = le.fit_transform(df[col])
17
- label_encoders[col] = le
18
-
19
- X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
20
- y_college_branch = df[["College Name", "Branch"]]
21
-
22
- return X, y_college_branch, label_encoders, df
23
-
24
- filename = "AP_EAMCET_Engineering_10000 (1).csv"
25
- X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename)
26
-
27
- # Train model
28
- def train_model(X, y):
29
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
30
- model = RandomForestClassifier(n_estimators=100, random_state=42)
31
- model.fit(X_train, y_train)
32
- return model
33
-
34
- college_branch_model = train_model(X, y_college_branch)
35
-
36
- joblib.dump(college_branch_model, "college_branch_model.pkl")
37
- joblib.dump(label_encoders, "label_encoders.pkl")
38
-
39
- # Prediction function
40
- def predict_colleges(category, gender, rank, region):
41
- # Load label encoders
42
- label_encoders = joblib.load("label_encoders.pkl")
43
-
44
- # Transform input values using label encoders
45
- try:
46
- category_enc = label_encoders["Category"].transform([category])[0]
47
- gender_enc = label_encoders["Gender"].transform([gender])[0]
48
- region_enc = label_encoders["Region"].transform([region])[0]
49
- except ValueError:
50
- return "Invalid input values. Please select valid options."
51
-
52
- # Filter the dataset based on encoded values
53
- filtered_df = df[
54
- (df["Category"] == category_enc) &
55
- (df["Gender"] == gender_enc) &
56
- (df["Opening Rank"] <= rank) &
57
- (df["Closing Rank"] >= rank) &
58
- (df["Region"] == region_enc)
59
- ]
60
-
61
- if filtered_df.empty:
62
- return "No matching colleges found."
63
-
64
- # Decode college names and branches
65
- filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values)
66
- filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values)
67
-
68
- result = filtered_df[["College Name", "Branch"]].drop_duplicates().to_string(index=False)
69
- return result
70
-
71
- # Gradio Interface
72
- demo = gr.Interface(
73
- fn=predict_colleges,
74
- inputs=[
75
- gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"),
76
- gr.Radio(choices=["Male", "Female"], label="Gender"),
77
- gr.Number(label="Rank"),
78
- gr.Dropdown(choices=["AU", "SV"], label="Region")
79
- ],
80
- outputs="text",
81
- title="AP EAMCET College Predictor",
82
- description="Enter your details to predict all possible colleges and branches based on your rank."
83
- )
84
-
85
- demo.launch()