File size: 3,293 Bytes
6ce9799
 
 
 
 
 
 
e37e0cd
6ce9799
 
 
 
 
 
 
 
 
 
 
 
69561ea
6ce9799
69561ea
6ce9799
7c8b249
69561ea
6ce9799
69561ea
 
6ce9799
 
 
 
 
69561ea
6ce9799
69561ea
6ce9799
 
 
 
49718ed
e37e0cd
eef8fb4
69561ea
6ce9799
 
 
 
 
 
 
 
 
 
69561ea
6ce9799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b4574
6ce9799
 
 
 
 
 
 
 
49718ed
6ce9799
 
13b4574
49718ed
6ce9799
 
 
49718ed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib
import re

# Load and preprocess data
def load_and_preprocess_data(filename):
    df = pd.read_csv(filename)
    
    label_encoders = {}
    for col in ["College Name", "Category", "Gender", "Branch", "Region"]:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    
    X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]]
    y_college_branch = df[["College Name", "Branch"]]
    
    return X, y_college_branch, label_encoders, df

filename = "AP_EAMCET_Engineering_10000 (1).csv"
X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename)

# Train model
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

college_branch_model = train_model(X, y_college_branch)

joblib.dump(college_branch_model, "college_branch_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

# Prediction function
def predict_colleges(category, gender, rank, region):
    if not isinstance(rank, (int, float)) or rank < 0 or not re.match(r'^\d+$', str(int(rank))):
        return "Invalid Rank: Please enter a valid positive integer without symbols."
    
    # Load label encoders
    label_encoders = joblib.load("label_encoders.pkl")
    
    # Transform input values using label encoders
    try:
        category_enc = label_encoders["Category"].transform([category])[0]
        gender_enc = label_encoders["Gender"].transform([gender])[0]
        region_enc = label_encoders["Region"].transform([region])[0]
    except ValueError:
        return "Invalid input values. Please select valid options."
    
    # Filter the dataset based on encoded values
    filtered_df = df[
        (df["Category"] == category_enc) &
        (df["Gender"] == gender_enc) &
        (df["Opening Rank"] <= rank) &
        (df["Closing Rank"] >= rank) &
        (df["Region"] == region_enc)
    ]
    
    if filtered_df.empty:
        return "No matching colleges found."
    
    # Decode college names and branches
    filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values)
    filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values)
    
    result = filtered_df[["College Name", "Branch"]].drop_duplicates()
    return result

# Gradio Interface
demo = gr.Interface(
    fn=predict_colleges,
    inputs=[
        gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"),
        gr.Radio(choices=["Male", "Female"], label="Gender"),
        gr.Number(precision=0, label="Rank"),  # Restrict to whole numbers only
        gr.Dropdown(choices=["AU", "SV"], label="Region")
    ],
    outputs=gr.Dataframe(headers=["College Name", "Branch"]),
    title="AP EAPCET College Predictor",
    description="Enter your details to predict all possible colleges and branches based on your rank."
)

demo.launch()