Spaces:
Running
Running
import pandas as pd | |
import numpy as np | |
import gradio as gr | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.ensemble import RandomForestClassifier | |
import joblib | |
# Load and preprocess data | |
def load_and_preprocess_data(filename): | |
df = pd.read_csv(filename) | |
label_encoders = {} | |
for col in ["College Name", "Category", "Gender", "Branch", "Region"]: | |
le = LabelEncoder() | |
df[col] = le.fit_transform(df[col]) | |
label_encoders[col] = le | |
X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]] | |
y_college = df["College Name"] | |
y_branch = df["Branch"] | |
return X, y_college, y_branch, label_encoders, df | |
filename = "AP_EAMCET_Engineering_10000 (1).csv" | |
X, y_college, y_branch, label_encoders, df = load_and_preprocess_data(filename) | |
# Train model and evaluate metrics | |
def train_model(X, y, target_name): | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
model = RandomForestClassifier(n_estimators=100, random_state=42) | |
model.fit(X_train, y_train) | |
# Predictions | |
y_pred = model.predict(X_test) | |
# Evaluate model metrics | |
accuracy = accuracy_score(y_test, y_pred) | |
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1) | |
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1) | |
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1) | |
conf_matrix = confusion_matrix(y_test, y_pred) | |
print(f"{target_name} Model Metrics:") | |
print(f"Accuracy: {accuracy:.4f}") | |
print(f"Precision: {precision:.4f}") | |
print(f"Recall: {recall:.4f}") | |
print(f"F1 Score: {f1:.4f}") | |
print(f"Confusion Matrix:\n{conf_matrix}\n") | |
return model | |
# Train separate models | |
college_model = train_model(X, y_college, "College Name") | |
branch_model = train_model(X, y_branch, "Branch") | |
# Save models and encoders | |
joblib.dump(college_model, "college_model.pkl") | |
joblib.dump(branch_model, "branch_model.pkl") | |
joblib.dump(label_encoders, "label_encoders.pkl") | |
# Prediction function | |
def predict_colleges(category, gender, rank, region): | |
# Load models and label encoders | |
college_model = joblib.load("college_model.pkl") | |
branch_model = joblib.load("branch_model.pkl") | |
label_encoders = joblib.load("label_encoders.pkl") | |
# Transform input values using label encoders | |
try: | |
category_enc = label_encoders["Category"].transform([category])[0] | |
gender_enc = label_encoders["Gender"].transform([gender])[0] | |
region_enc = label_encoders["Region"].transform([region])[0] | |
except ValueError: | |
return "Invalid input values. Please select valid options." | |
# Filter dataset based on criteria | |
filtered_df = df[ | |
(df["Category"] == category_enc) & | |
(df["Gender"] == gender_enc) & | |
(df["Opening Rank"] <= rank) & | |
(df["Closing Rank"] >= rank) & | |
(df["Region"] == region_enc) | |
] | |
if filtered_df.empty: | |
return "No matching colleges found." | |
# Decode college names and branches | |
filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values) | |
filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values) | |
result = filtered_df[["College Name", "Branch"]].drop_duplicates().to_string(index=False) | |
return result | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=predict_colleges, | |
inputs=[ | |
gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"), | |
gr.Radio(choices=["Male", "Female"], label="Gender"), | |
gr.Number(label="Rank"), | |
gr.Dropdown(choices=["AU", "SV"], label="Region") | |
], | |
outputs="text", | |
title="AP EAMCET College Predictor", | |
description="Enter your details to predict all possible colleges and branches based on your rank." | |
) | |
demo.launch() |