shanmukakomal
update
7c8b249
raw
history blame
3.99 kB
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib
# Load and preprocess data
def load_and_preprocess_data(filename):
df = pd.read_csv(filename)
label_encoders = {}
for col in ["College Name", "Category", "Gender", "Branch", "Region"]:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le
X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
y_college = df["College Name"]
y_branch = df["Branch"]
return X, y_college, y_branch, label_encoders, df
filename = "AP_EAMCET_Engineering_10000 (1).csv"
X, y_college, y_branch, label_encoders, df = load_and_preprocess_data(filename)
# Train model and evaluate metrics
def train_model(X, y, target_name):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)
# Evaluate model metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f"{target_name} Model Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Confusion Matrix:\n{conf_matrix}\n")
return model
# Train separate models
college_model = train_model(X, y_college, "College Name")
branch_model = train_model(X, y_branch, "Branch")
# Save models and encoders
joblib.dump(college_model, "college_model.pkl")
joblib.dump(branch_model, "branch_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
# Prediction function
def predict_colleges(category, gender, rank, region):
# Load models and label encoders
college_model = joblib.load("college_model.pkl")
branch_model = joblib.load("branch_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")
# Transform input values using label encoders
try:
category_enc = label_encoders["Category"].transform([category])[0]
gender_enc = label_encoders["Gender"].transform([gender])[0]
region_enc = label_encoders["Region"].transform([region])[0]
except ValueError:
return "Invalid input values. Please select valid options."
# Filter dataset based on criteria
filtered_df = df[
(df["Category"] == category_enc) &
(df["Gender"] == gender_enc) &
(df["Opening Rank"] <= rank) &
(df["Closing Rank"] >= rank) &
(df["Region"] == region_enc)
]
if filtered_df.empty:
return "No matching colleges found."
# Decode college names and branches
filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values)
filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values)
result = filtered_df[["College Name", "Branch"]].drop_duplicates().to_string(index=False)
return result
# Gradio Interface
demo = gr.Interface(
fn=predict_colleges,
inputs=[
gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"),
gr.Radio(choices=["Male", "Female"], label="Gender"),
gr.Number(label="Rank"),
gr.Dropdown(choices=["AU", "SV"], label="Region")
],
outputs="text",
title="AP EAMCET College Predictor",
description="Enter your details to predict all possible colleges and branches based on your rank."
)
demo.launch()