File size: 3,048 Bytes
6ce9799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69561ea
6ce9799
69561ea
6ce9799
7c8b249
69561ea
6ce9799
69561ea
 
6ce9799
 
 
 
 
69561ea
6ce9799
69561ea
6ce9799
 
 
 
69561ea
6ce9799
 
 
 
 
 
 
 
 
 
69561ea
6ce9799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b4574
6ce9799
 
 
 
 
 
 
 
 
 
 
13b4574
6ce9799
 
 
 
69561ea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load and preprocess data
def load_and_preprocess_data(filename):
    df = pd.read_csv(filename)
    
    label_encoders = {}
    for col in ["College Name", "Category", "Gender", "Branch", "Region"]:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    
    X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
    y_college_branch = df[["College Name", "Branch"]]
    
    return X, y_college_branch, label_encoders, df

filename = "AP_EAMCET_Engineering_10000 (1).csv"
X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename)

# Train model
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

college_branch_model = train_model(X, y_college_branch)

joblib.dump(college_branch_model, "college_branch_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

# Prediction function
def predict_colleges(category, gender, rank, region):
    # Load label encoders
    label_encoders = joblib.load("label_encoders.pkl")
    
    # Transform input values using label encoders
    try:
        category_enc = label_encoders["Category"].transform([category])[0]
        gender_enc = label_encoders["Gender"].transform([gender])[0]
        region_enc = label_encoders["Region"].transform([region])[0]
    except ValueError:
        return "Invalid input values. Please select valid options."
    
    # Filter the dataset based on encoded values
    filtered_df = df[
        (df["Category"] == category_enc) &
        (df["Gender"] == gender_enc) &
        (df["Opening Rank"] <= rank) &
        (df["Closing Rank"] >= rank) &
        (df["Region"] == region_enc)
    ]
    
    if filtered_df.empty:
        return "No matching colleges found."
    
    # Decode college names and branches
    filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values)
    filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values)
    
    result = filtered_df[["College Name", "Branch"]].drop_duplicates()
    return result

# Gradio Interface
demo = gr.Interface(
    fn=predict_colleges,
    inputs=[
        gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"),
        gr.Radio(choices=["Male", "Female"], label="Gender"),
        gr.Number(label="Rank"),
        gr.Dropdown(choices=["AU", "SV"], label="Region")
    ],
    outputs=gr.Dataframe(headers=["College Name", "Branch"]),
    title="AP EAMCET College Predictor",
    description="Enter your details to predict all possible colleges and branches based on your rank."
)

demo.launch()