import pandas as pd import numpy as np import gradio as gr from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier import joblib # Load and preprocess data def load_and_preprocess_data(filename): df = pd.read_csv(filename) label_encoders = {} for col in ["College Name", "Category", "Gender", "Branch", "Region"]: le = LabelEncoder() df[col] = le.fit_transform(df[col]) label_encoders[col] = le X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]] y_college_branch = df[["College Name", "Branch"]] return X, y_college_branch, label_encoders, df filename = "AP_EAMCET_Engineering_10000 (1).csv" X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename) # Train model def train_model(X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) return model college_branch_model = train_model(X, y_college_branch) joblib.dump(college_branch_model, "college_branch_model.pkl") joblib.dump(label_encoders, "label_encoders.pkl") # Prediction function def predict_colleges(category, gender, rank, region): # Load label encoders label_encoders = joblib.load("label_encoders.pkl") # Transform input values using label encoders try: category_enc = label_encoders["Category"].transform([category])[0] gender_enc = label_encoders["Gender"].transform([gender])[0] region_enc = label_encoders["Region"].transform([region])[0] except ValueError: return "Invalid input values. Please select valid options." # Filter the dataset based on encoded values filtered_df = df[ (df["Category"] == category_enc) & (df["Gender"] == gender_enc) & (df["Opening Rank"] <= rank) & (df["Closing Rank"] >= rank) & (df["Region"] == region_enc) ] if filtered_df.empty: return "No matching colleges found." # Decode college names and branches filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values) filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values) result = filtered_df[["College Name", "Branch"]].drop_duplicates() return result # Gradio Interface demo = gr.Interface( fn=predict_colleges, inputs=[ gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"), gr.Radio(choices=["Male", "Female"], label="Gender"), gr.Number(label="Rank"), gr.Dropdown(choices=["AU", "SV"], label="Region") ], outputs=gr.Dataframe(headers=["College Name", "Branch"]), title="AP EAMCET College Predictor", description="Enter your details to predict all possible colleges and branches based on your rank." ) demo.launch()