|
from typing import Union |
|
import pandas as pd |
|
import streamlit as st |
|
import numpy as np |
|
from sklearn.metrics import ( |
|
classification_report, |
|
confusion_matrix, |
|
) |
|
from sklearn.linear_model import LogisticRegression |
|
from xgboost.sklearn import XGBClassifier |
|
from common.data import SplitDataset |
|
from common.util import ( |
|
create_cross_validation_df, |
|
cross_validation_scores, |
|
get_df_trueStatus_probabilityDefault_threshStatus_loanAmount, |
|
) |
|
from common.views import ( |
|
cross_validation_graph, |
|
) |
|
|
|
|
|
def make_evaluation_view( |
|
model_name_short: str, |
|
model_name_generic: str, |
|
): |
|
def view( |
|
clf_gbt_model: Union[XGBClassifier, LogisticRegression], |
|
split_dataset: SplitDataset, |
|
currency: str, |
|
prob_thresh_selected, |
|
predicted_default_status, |
|
): |
|
st.header(f"Model Evaluation - {model_name_generic}") |
|
|
|
st.subheader("Cross Validation") |
|
|
|
st.write("Shows how our model will perform as new loans come in.") |
|
st.write( |
|
"If evaluation metric for test and train set improve as models \ |
|
train on each fold suggests performance will be stable." |
|
) |
|
|
|
st.write(f"XGBoost cross validation test:") |
|
|
|
stcol_seed, stcol_eval_metric = st.columns(2) |
|
|
|
with stcol_seed: |
|
cv_seed = int( |
|
st.number_input( |
|
label="Random State Seed for Cross Validation:", |
|
value=123235, |
|
key=f"cv_seed_{model_name_short}", |
|
) |
|
) |
|
|
|
with stcol_eval_metric: |
|
eval_metric = st.selectbox( |
|
label="Select evaluation metric", |
|
options=[ |
|
"auc", |
|
"aucpr", |
|
"rmse", |
|
"mae", |
|
"logloss", |
|
"error", |
|
"merror", |
|
"mlogloss", |
|
], |
|
key=f"eval_metric_{model_name_short}", |
|
) |
|
|
|
stcol_trees, stcol_eval_nfold, stcol_earlystoppingrounds = st.columns( |
|
3 |
|
) |
|
|
|
with stcol_trees: |
|
trees = int( |
|
st.number_input( |
|
label="Number of trees", |
|
value=5, |
|
key=f"trees_{model_name_short}", |
|
) |
|
) |
|
|
|
with stcol_eval_nfold: |
|
nfolds = int( |
|
st.number_input( |
|
label="Number of folds", |
|
value=5, |
|
key=f"nfolds_{model_name_short}", |
|
) |
|
) |
|
|
|
with stcol_earlystoppingrounds: |
|
early_stopping_rounds = int( |
|
st.number_input( |
|
label="Early stopping rounds", |
|
value=10, |
|
key=f"early_stopping_rounds_{model_name_short}", |
|
) |
|
) |
|
|
|
DTrain, cv_df = create_cross_validation_df( |
|
split_dataset.X_test, |
|
split_dataset.y_test, |
|
eval_metric, |
|
cv_seed, |
|
trees, |
|
nfolds, |
|
early_stopping_rounds, |
|
) |
|
|
|
st.write(cv_df) |
|
|
|
scoring_options = [ |
|
"roc_auc", |
|
"accuracy", |
|
"precision", |
|
"recall", |
|
"f1", |
|
"jaccard", |
|
] |
|
|
|
overfit_test = st.radio( |
|
label="Overfit test:", |
|
options=("No", "Yes"), |
|
key=f"overfit_test_{model_name_short}", |
|
) |
|
|
|
if overfit_test == "Yes": |
|
st.write("Overfit test:") |
|
iterations = int( |
|
st.number_input( |
|
label="Number of folds (iterations)", |
|
value=500, |
|
key=f"iterations_{model_name_short}", |
|
) |
|
) |
|
|
|
DTrain, cv_df_it = create_cross_validation_df( |
|
split_dataset.X_test, |
|
split_dataset.y_test, |
|
eval_metric, |
|
cv_seed, |
|
iterations, |
|
nfolds, |
|
iterations, |
|
) |
|
|
|
fig_it = cross_validation_graph(cv_df_it, eval_metric, iterations) |
|
st.pyplot(fig_it) |
|
|
|
st.write("Sklearn cross validation test:") |
|
stcol_scoringmetric, st_nfold = st.columns(2) |
|
|
|
with stcol_scoringmetric: |
|
score_metric = st.selectbox( |
|
label="Select score", |
|
options=scoring_options, |
|
key=f"stcol_scoringmetric_{model_name_short}", |
|
) |
|
|
|
with st_nfold: |
|
nfolds_score = int( |
|
st.number_input( |
|
label="Number of folds", |
|
value=5, |
|
key=f"st_nfold_{model_name_short}", |
|
) |
|
) |
|
|
|
cv_scores = cross_validation_scores( |
|
clf_gbt_model, |
|
split_dataset.X_test, |
|
split_dataset.y_test, |
|
nfolds_score, |
|
score_metric, |
|
cv_seed, |
|
) |
|
|
|
stcol_vals, stcol_mean, st_std = st.columns(3) |
|
|
|
with stcol_vals: |
|
st.markdown(f"{score_metric} scores:") |
|
st.write( |
|
pd.DataFrame( |
|
cv_scores, |
|
columns=[score_metric], |
|
) |
|
) |
|
|
|
with stcol_mean: |
|
st.metric( |
|
label=f"Average {score_metric} score ", |
|
value="{:.4f}".format(cv_scores.mean()), |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
with st_std: |
|
st.metric( |
|
label=f"{score_metric} standard deviation (+/-)", |
|
value="{:.4f}".format(cv_scores.std()), |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
st.subheader("Classification Report") |
|
|
|
target_names = ["Non-Default", "Default"] |
|
|
|
classification_report_dict = classification_report( |
|
split_dataset.y_test, |
|
predicted_default_status, |
|
target_names=target_names, |
|
output_dict=True, |
|
) |
|
|
|
( |
|
stcol_defaultpres, |
|
stcol_defaultrecall, |
|
stcol_defaultf1score, |
|
stcol_f1score, |
|
) = st.columns(4) |
|
with stcol_defaultpres: |
|
st.metric( |
|
label="Default Precision", |
|
value="{:.0%}".format( |
|
classification_report_dict["Default"]["precision"] |
|
), |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
with stcol_defaultrecall: |
|
st.metric( |
|
label="Default Recall", |
|
value="{:.0%}".format( |
|
classification_report_dict["Default"]["recall"] |
|
), |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
with stcol_defaultf1score: |
|
st.metric( |
|
label="Default F1 Score", |
|
value="{:.2f}".format( |
|
classification_report_dict["Default"]["f1-score"] |
|
), |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
with stcol_f1score: |
|
st.metric( |
|
label="Macro avg F1 Score (Model F1 Score):", |
|
value="{:.2f}".format( |
|
classification_report_dict["macro avg"]["f1-score"] |
|
), |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
with st.expander("Classification Report Dictionary:"): |
|
st.write(classification_report_dict) |
|
|
|
st.markdown( |
|
f'Default precision: {"{:.0%}".format(classification_report_dict["Default"]["precision"])} of loans predicted as default were actually default.' |
|
) |
|
|
|
st.markdown( |
|
f'Default recall: {"{:.0%}".format(classification_report_dict["Default"]["recall"])} of true defaults predicted correctly.' |
|
) |
|
|
|
f1_gap = 1 - classification_report_dict["Default"]["f1-score"] |
|
st.markdown( |
|
f'Default F1 score: {"{:.2f}".format(classification_report_dict["Default"]["f1-score"])}\ |
|
is {"{:.2f}".format(f1_gap)} away from perfect precision and recall (no false positive rate).' |
|
) |
|
|
|
st.markdown( |
|
f'macro avg F1 score: {"{:.2f}".format(classification_report_dict["macro avg"]["f1-score"])} is the models F1 score.' |
|
) |
|
|
|
st.subheader("Confusion Matrix") |
|
confuctiomatrix_dict = confusion_matrix( |
|
split_dataset.y_test, predicted_default_status |
|
) |
|
|
|
tn, fp, fn, tp = confusion_matrix( |
|
split_dataset.y_test, predicted_default_status |
|
).ravel() |
|
|
|
with st.expander( |
|
"Confusion matrix (column name = classification model prediction, row name = true status, values = number of loans" |
|
): |
|
st.write(confuctiomatrix_dict) |
|
|
|
st.markdown( |
|
f'{tp} ,\ |
|
{"{:.0%}".format(tp / len(predicted_default_status))} \ |
|
true positives (defaults correctly predicted as defaults).' |
|
) |
|
|
|
st.markdown( |
|
f'{fp} ,\ |
|
{"{:.0%}".format(fp / len(predicted_default_status))} \ |
|
false positives (non-defaults incorrectly predicted as defaults).' |
|
) |
|
|
|
st.markdown( |
|
f'{fn} ,\ |
|
{"{:.0%}".format(fn / len(predicted_default_status))} \ |
|
false negatives (defaults incorrectly predicted as non-defaults).' |
|
) |
|
|
|
st.markdown( |
|
f'{tn} ,\ |
|
{"{:.0%}".format(tn / len(predicted_default_status))} \ |
|
true negatives (non-defaults correctly predicted as non-defaults).' |
|
) |
|
|
|
st.subheader("Bad Rate") |
|
|
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount = ( |
|
get_df_trueStatus_probabilityDefault_threshStatus_loanAmount( |
|
clf_gbt_model, |
|
split_dataset.X_test, |
|
split_dataset.y_test, |
|
prob_thresh_selected, |
|
"loan_amnt", |
|
) |
|
) |
|
|
|
with st.expander( |
|
"Loan Status, Probability of Default, & Loan Amount DataFrame" |
|
): |
|
st.write(df_trueStatus_probabilityDefault_threshStatus_loanAmount) |
|
|
|
accepted_loans = ( |
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount[ |
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount[ |
|
"PREDICT_DEFAULT_STATUS" |
|
] |
|
== 0 |
|
] |
|
) |
|
|
|
bad_rate = ( |
|
np.sum(accepted_loans["loan_status"]) |
|
/ accepted_loans["loan_status"].count() |
|
) |
|
|
|
with st.expander("Loan Amount Summary Statistics"): |
|
st.write( |
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount[ |
|
"loan_amnt" |
|
].describe() |
|
) |
|
|
|
avg_loan = np.mean( |
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount[ |
|
"loan_amnt" |
|
] |
|
) |
|
|
|
crosstab_df = pd.crosstab( |
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount[ |
|
"loan_status" |
|
], |
|
df_trueStatus_probabilityDefault_threshStatus_loanAmount[ |
|
"PREDICT_DEFAULT_STATUS" |
|
], |
|
).apply( |
|
lambda x: x * avg_loan, axis=0 |
|
) |
|
|
|
with st.expander( |
|
"Cross tabulation (column name = classification model prediction, row name = true status, values = number of loans * average loan value" |
|
): |
|
st.write(crosstab_df) |
|
|
|
st.write( |
|
f'Bad rate: {"{:.2%}".format(bad_rate)} of all the loans the model accepted (classified as non-default) from the test set were actually defaults.' |
|
) |
|
|
|
st.write( |
|
f'Estimated value of the bad rate is {currency} {"{:,.2f}".format(crosstab_df[0][1])}.' |
|
) |
|
|
|
st.write( |
|
f'Total estimated value of actual non-default loans is {currency} {"{:,.2f}".format(crosstab_df[0][0]+crosstab_df[0][1])}' |
|
) |
|
|
|
st.write( |
|
f'Estimated value of loans incorrectly predicted as default is {currency} {"{:,.2f}".format(crosstab_df[1][0])}' |
|
) |
|
|
|
st.write( |
|
f'Estimated value of loans correctly predicted as defaults is {currency} {"{:,.2f}".format(crosstab_df[1][1])}' |
|
) |
|
|
|
return df_trueStatus_probabilityDefault_threshStatus_loanAmount |
|
|
|
return view |
|
|
|
|
|
decision_tree_evaluation_view = make_evaluation_view("gbt", "Decision Tree") |
|
logistic_evaluation_view = make_evaluation_view("lg", "Logistic Regression") |
|
|