# -*- coding: utf-8 -*- """Untitled32.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1fKN0jOoDSOaUCMAAoxNUnSUd-HPcRXNZ """ from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from lightgbm import LGBMClassifier from sklearn.model_selection import train_test_split import streamlit as st import pandas as pd import numpy as np data = pd.read_csv('archive (8).zip') data.head() to_use_cols = ['as_of_year', 'county_name', 'applicant_sex_name', 'action_taken_name', 'loan_amount_000s', 'applicant_income_000s', 'state_name', 'property_type_name', 'loan_type_name'] data_reduced = data[to_use_cols] data_no_na = data_reduced.dropna() succeeded = ['Loan originated', 'Loan purchased by the institution', ] failed = ['Application approved but not accepted', 'Preapproval request denied by financial institution', 'Application denied by financial institution', 'Preapproval request approved but not accepted'] user_error = ['File closed for incompleteness', 'Application withdrawn by applicant', ] mapped = {tuple(succeeded): 1, tuple(failed): 2, tuple(user_error): 3} def mapped(x): if x in succeeded: return 1 else: return 0 data_no_na.action_taken_name = data_no_na.action_taken_name.apply(mapped) mapped_type = { 'Conventional': 0, 'FHA-insured': 1, 'VA-guaranteed':2, 'FSA/RHS-guaranteed':3 } data_no_na.loan_type_name.apply(lambda x: mapped_type[x]) data_no_na['loan_encoded'] = data_no_na.loan_type_name.apply(lambda x: mapped_type[x]) data_no_na.property_type_name.value_counts() data_no_na['property_encoded'] = data_no_na.property_type_name.apply(lambda x: 1 if x == 'Manufactured housing' else 0) data_no_na.state_name.value_counts() data_no_na.county_name.value_counts() code = {} i = 0 for county in data_no_na.county_name.unique(): code[county] = i i += 1 data_no_na['county_code']= data_no_na.county_name.map(code) data_no_na.head(2) data_no_na['sex_encoded'] = data_no_na.applicant_sex_name.apply(lambda x: 1 if x == 'Male' else 0) data_no_na.head(2) cols = ['county_code', 'sex_encoded', 'property_encoded', 'loan_encoded', 'applicant_income_000s', 'loan_amount_000s', 'action_taken_name'] train = data_no_na[cols] X_train, X_test, y_train, y_test = train_test_split(train.drop('action_taken_name', axis=1), train.action_taken_name, test_size=0.3, random_state=42) gbm = LGBMClassifier(n_estimators=200) random = RandomForestClassifier(n_estimators=200) tree = DecisionTreeClassifier() gbm.fit(X_train, y_train) random.fit(X_train, y_train) tree.fit(X_train, y_train) def mapping(**kwargs): kwargs['county'] = code[kwargs['county']] kwargs['sex'] = 1 if kwargs['sex'] == 'Male' else 0 kwargs['property_type'] = 1 if kwargs['property_type'] == 'Manufactured housing' else 0 kwargs['loan'] = mapped_type[kwargs['loan']] kwargs['income'] = float(kwargs['income']) kwargs['loan_amount'] = float(kwargs['loan_amount']) return kwargs st.title('Loan Approval for Washington House') st.dataframe(data_no_na.head()) col1, col2 = st.columns([3, 1]) with col1: st.text('Please, fill this form') with st.form("my_form"): county = st.selectbox('County', data_no_na.county_name.unique().tolist()) sex = st.selectbox('Sex', ['Male', 'Female']) property_type = st.selectbox('Property Type', data_no_na.property_type_name.unique().tolist()) loan = st.selectbox('Loan Type', data_no_na.loan_type_name.unique().tolist()) income = st.number_input('Your Yearly income (in 000$)') loan_amount = st.number_input('Loan Amount') model_choice = col2.selectbox('Choose model', ['RandomForest', 'Tree', 'LGBM']) # Every form must have a submit button. submitted = st.form_submit_button("Submit") if submitted: col2.info('Predicting') new_demand = np.array([list(mapping(county=county, sex=sex, property_type=property_type, loan=loan, income=income, loan_amount=loan_amount).values())]) if model_choice == 'Tree': result = tree.predict(new_demand) elif model_choice == 'RandomForest': result = random.predict(new_demand) else: result = gbm.predict(new_demand) if result[0]: col2.success('Accepted') else: col2.error("Rejected")