import pandas as pd import numpy as np import gradio as gr from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier import joblib # Load and preprocess data def load_and_preprocess_data(filename): df = pd.read_csv(filename) label_encoders = {} for col in ["College Name", "Category", "Gender", "Branch", "Region"]: le = LabelEncoder() df[col] = le.fit_transform(df[col]) label_encoders[col] = le X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]] y_college = df["College Name"] y_branch = df["Branch"] return X, y_college, y_branch, label_encoders, df filename = "/content/AP_EAMCET_Engineering_10000 (1).csv" X, y_college, y_branch, label_encoders, df = load_and_preprocess_data(filename) # Train model and evaluate metrics def train_model(X, y, target_name): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Predictions y_pred = model.predict(X_test) # Evaluate model metrics accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred, average='weighted', zero_division=1) recall = recall_score(y_test, y_pred, average='weighted', zero_division=1) f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1) conf_matrix = confusion_matrix(y_test, y_pred) print(f"{target_name} Model Metrics:") print(f"Accuracy: {accuracy:.4f}") print(f"Precision: {precision:.4f}") print(f"Recall: {recall:.4f}") print(f"F1 Score: {f1:.4f}") print(f"Confusion Matrix:\n{conf_matrix}\n") return model # Train separate models college_model = train_model(X, y_college, "College Name") branch_model = train_model(X, y_branch, "Branch") # Save models and encoders joblib.dump(college_model, "college_model.pkl") joblib.dump(branch_model, "branch_model.pkl") joblib.dump(label_encoders, "label_encoders.pkl") # Prediction function def predict_colleges(category, gender, rank, region): # Load models and label encoders college_model = joblib.load("college_model.pkl") branch_model = joblib.load("branch_model.pkl") label_encoders = joblib.load("label_encoders.pkl") # Transform input values using label encoders try: category_enc = label_encoders["Category"].transform([category])[0] gender_enc = label_encoders["Gender"].transform([gender])[0] region_enc = label_encoders["Region"].transform([region])[0] except ValueError: return "Invalid input values. Please select valid options." # Filter dataset based on criteria filtered_df = df[ (df["Category"] == category_enc) & (df["Gender"] == gender_enc) & (df["Opening Rank"] <= rank) & (df["Closing Rank"] >= rank) & (df["Region"] == region_enc) ] if filtered_df.empty: return "No matching colleges found." # Decode college names and branches filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values) filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values) result = filtered_df[["College Name", "Branch"]].drop_duplicates().to_string(index=False) return result # Gradio Interface demo = gr.Interface( fn=predict_colleges, inputs=[ gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"), gr.Radio(choices=["Male", "Female"], label="Gender"), gr.Number(label="Rank"), gr.Dropdown(choices=["AU", "SV"], label="Region") ], outputs="text", title="AP EAMCET College Predictor", description="Enter your details to predict all possible colleges and branches based on your rank." ) demo.launch()