Spaces:
Sleeping
Sleeping
File size: 3,293 Bytes
6ce9799 e37e0cd 6ce9799 69561ea 6ce9799 69561ea 6ce9799 7c8b249 69561ea 6ce9799 69561ea 6ce9799 69561ea 6ce9799 69561ea 6ce9799 49718ed e37e0cd eef8fb4 69561ea 6ce9799 69561ea 6ce9799 13b4574 6ce9799 49718ed 6ce9799 13b4574 49718ed 6ce9799 49718ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib
import re
# Load and preprocess data
def load_and_preprocess_data(filename):
df = pd.read_csv(filename)
label_encoders = {}
for col in ["College Name", "Category", "Gender", "Branch", "Region"]:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le
X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
y_college_branch = df[["College Name", "Branch"]]
return X, y_college_branch, label_encoders, df
filename = "AP_EAMCET_Engineering_10000 (1).csv"
X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename)
# Train model
def train_model(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
return model
college_branch_model = train_model(X, y_college_branch)
joblib.dump(college_branch_model, "college_branch_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
# Prediction function
def predict_colleges(category, gender, rank, region):
if not isinstance(rank, (int, float)) or rank < 0 or not re.match(r'^\d+$', str(int(rank))):
return "Invalid Rank: Please enter a valid positive integer without symbols."
# Load label encoders
label_encoders = joblib.load("label_encoders.pkl")
# Transform input values using label encoders
try:
category_enc = label_encoders["Category"].transform([category])[0]
gender_enc = label_encoders["Gender"].transform([gender])[0]
region_enc = label_encoders["Region"].transform([region])[0]
except ValueError:
return "Invalid input values. Please select valid options."
# Filter the dataset based on encoded values
filtered_df = df[
(df["Category"] == category_enc) &
(df["Gender"] == gender_enc) &
(df["Opening Rank"] <= rank) &
(df["Closing Rank"] >= rank) &
(df["Region"] == region_enc)
]
if filtered_df.empty:
return "No matching colleges found."
# Decode college names and branches
filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values)
filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values)
result = filtered_df[["College Name", "Branch"]].drop_duplicates()
return result
# Gradio Interface
demo = gr.Interface(
fn=predict_colleges,
inputs=[
gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"),
gr.Radio(choices=["Male", "Female"], label="Gender"),
gr.Number(precision=0, label="Rank"), # Restrict to whole numbers only
gr.Dropdown(choices=["AU", "SV"], label="Region")
],
outputs=gr.Dataframe(headers=["College Name", "Branch"]),
title="AP EAPCET College Predictor",
description="Enter your details to predict all possible colleges and branches based on your rank."
)
demo.launch()
|