Spaces:

pkiage
/

credit_risk_modeling_demo

File size: 12,795 Bytes

232e5e5

from typing import Union
import pandas as pd
import streamlit as st
import numpy as np
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
)
from sklearn.linear_model import LogisticRegression
from xgboost.sklearn import XGBClassifier
from common.data import SplitDataset
from common.util import (
    create_cross_validation_df,
    cross_validation_scores,
    get_df_trueStatus_probabilityDefault_threshStatus_loanAmount,
)
from common.views import (
    cross_validation_graph,
)


def make_evaluation_view(
    model_name_short: str,
    model_name_generic: str,
):
    def view(
        clf_gbt_model: Union[XGBClassifier, LogisticRegression],
        split_dataset: SplitDataset,
        currency: str,
        prob_thresh_selected,
        predicted_default_status,
    ):
        st.header(f"Model Evaluation - {model_name_generic}")

        st.subheader("Cross Validation")

        st.write("Shows how our model will perform as new loans come in.")
        st.write(
            "If evaluation metric for test and train set improve as models \
            train on each fold suggests performance will be stable."
        )

        st.write(f"XGBoost cross validation test:")

        stcol_seed, stcol_eval_metric = st.columns(2)

        with stcol_seed:
            cv_seed = int(
                st.number_input(
                    label="Random State Seed for Cross Validation:",
                    value=123235,
                    key=f"cv_seed_{model_name_short}",
                )
            )

        with stcol_eval_metric:
            eval_metric = st.selectbox(
                label="Select evaluation metric",
                options=[
                    "auc",
                    "aucpr",
                    "rmse",
                    "mae",
                    "logloss",
                    "error",
                    "merror",
                    "mlogloss",
                ],
                key=f"eval_metric_{model_name_short}",
            )

        stcol_trees, stcol_eval_nfold, stcol_earlystoppingrounds = st.columns(
            3
        )

        with stcol_trees:
            trees = int(
                st.number_input(
                    label="Number of trees",
                    value=5,
                    key=f"trees_{model_name_short}",
                )
            )

        with stcol_eval_nfold:
            nfolds = int(
                st.number_input(
                    label="Number of folds",
                    value=5,
                    key=f"nfolds_{model_name_short}",
                )
            )

        with stcol_earlystoppingrounds:
            early_stopping_rounds = int(
                st.number_input(
                    label="Early stopping rounds",
                    value=10,
                    key=f"early_stopping_rounds_{model_name_short}",
                )
            )

        DTrain, cv_df = create_cross_validation_df(
            split_dataset.X_test,
            split_dataset.y_test,
            eval_metric,
            cv_seed,
            trees,
            nfolds,
            early_stopping_rounds,
        )

        st.write(cv_df)

        scoring_options = [
            "roc_auc",
            "accuracy",
            "precision",
            "recall",
            "f1",
            "jaccard",
        ]

        overfit_test = st.radio(
            label="Overfit test:",
            options=("No", "Yes"),
            key=f"overfit_test_{model_name_short}",
        )

        if overfit_test == "Yes":
            st.write("Overfit test:")
            iterations = int(
                st.number_input(
                    label="Number of folds (iterations)",
                    value=500,
                    key=f"iterations_{model_name_short}",
                )
            )

            DTrain, cv_df_it = create_cross_validation_df(
                split_dataset.X_test,
                split_dataset.y_test,
                eval_metric,
                cv_seed,
                iterations,
                nfolds,
                iterations,
            )

            fig_it = cross_validation_graph(cv_df_it, eval_metric, iterations)
            st.pyplot(fig_it)

        st.write("Sklearn cross validation test:")
        stcol_scoringmetric, st_nfold = st.columns(2)

        with stcol_scoringmetric:
            score_metric = st.selectbox(
                label="Select score",
                options=scoring_options,
                key=f"stcol_scoringmetric_{model_name_short}",
            )

        with st_nfold:
            nfolds_score = int(
                st.number_input(
                    label="Number of folds",
                    value=5,
                    key=f"st_nfold_{model_name_short}",
                )
            )

        cv_scores = cross_validation_scores(
            clf_gbt_model,
            split_dataset.X_test,
            split_dataset.y_test,
            nfolds_score,
            score_metric,
            cv_seed,
        )

        stcol_vals, stcol_mean, st_std = st.columns(3)

        with stcol_vals:
            st.markdown(f"{score_metric} scores:")
            st.write(
                pd.DataFrame(
                    cv_scores,
                    columns=[score_metric],
                )
            )

        with stcol_mean:
            st.metric(
                label=f"Average {score_metric} score ",
                value="{:.4f}".format(cv_scores.mean()),
                delta=None,
                delta_color="normal",
            )

        with st_std:
            st.metric(
                label=f"{score_metric} standard deviation (+/-)",
                value="{:.4f}".format(cv_scores.std()),
                delta=None,
                delta_color="normal",
            )

        st.subheader("Classification Report")

        target_names = ["Non-Default", "Default"]

        classification_report_dict = classification_report(
            split_dataset.y_test,
            predicted_default_status,
            target_names=target_names,
            output_dict=True,
        )

        (
            stcol_defaultpres,
            stcol_defaultrecall,
            stcol_defaultf1score,
            stcol_f1score,
        ) = st.columns(4)
        with stcol_defaultpres:
            st.metric(
                label="Default Precision",
                value="{:.0%}".format(
                    classification_report_dict["Default"]["precision"]
                ),
                delta=None,
                delta_color="normal",
            )

        with stcol_defaultrecall:
            st.metric(
                label="Default Recall",
                value="{:.0%}".format(
                    classification_report_dict["Default"]["recall"]
                ),
                delta=None,
                delta_color="normal",
            )

        with stcol_defaultf1score:
            st.metric(
                label="Default F1 Score",
                value="{:.2f}".format(
                    classification_report_dict["Default"]["f1-score"]
                ),
                delta=None,
                delta_color="normal",
            )

        with stcol_f1score:
            st.metric(
                label="Macro avg F1 Score (Model F1 Score):",
                value="{:.2f}".format(
                    classification_report_dict["macro avg"]["f1-score"]
                ),
                delta=None,
                delta_color="normal",
            )

        with st.expander("Classification Report Dictionary:"):
            st.write(classification_report_dict)

        st.markdown(
            f'Default precision: {"{:.0%}".format(classification_report_dict["Default"]["precision"])} of loans predicted as default were actually default.'
        )

        st.markdown(
            f'Default recall: {"{:.0%}".format(classification_report_dict["Default"]["recall"])} of true defaults predicted correctly.'
        )

        f1_gap = 1 - classification_report_dict["Default"]["f1-score"]
        st.markdown(
            f'Default F1 score: {"{:.2f}".format(classification_report_dict["Default"]["f1-score"])}\
                is {"{:.2f}".format(f1_gap)} away from perfect precision and recall (no false positive rate).'
        )

        st.markdown(
            f'macro avg F1 score: {"{:.2f}".format(classification_report_dict["macro avg"]["f1-score"])} is the models F1 score.'
        )

        st.subheader("Confusion Matrix")
        confuctiomatrix_dict = confusion_matrix(
            split_dataset.y_test, predicted_default_status
        )

        tn, fp, fn, tp = confusion_matrix(
            split_dataset.y_test, predicted_default_status
        ).ravel()

        with st.expander(
            "Confusion matrix (column name = classification model prediction, row name = true status, values = number of loans"
        ):
            st.write(confuctiomatrix_dict)

        st.markdown(
            f'{tp} ,\
            {"{:.0%}".format(tp / len(predicted_default_status))} \
                true positives (defaults correctly predicted as defaults).'
        )

        st.markdown(
            f'{fp} ,\
            {"{:.0%}".format(fp / len(predicted_default_status))} \
                false positives (non-defaults incorrectly predicted as defaults).'
        )

        st.markdown(
            f'{fn} ,\
            {"{:.0%}".format(fn / len(predicted_default_status))} \
                false negatives (defaults incorrectly predicted as non-defaults).'
        )

        st.markdown(
            f'{tn} ,\
            {"{:.0%}".format(tn / len(predicted_default_status))} \
                true negatives (non-defaults correctly predicted as non-defaults).'
        )

        st.subheader("Bad Rate")

        df_trueStatus_probabilityDefault_threshStatus_loanAmount = (
            get_df_trueStatus_probabilityDefault_threshStatus_loanAmount(
                clf_gbt_model,
                split_dataset.X_test,
                split_dataset.y_test,
                prob_thresh_selected,
                "loan_amnt",
            )
        )

        with st.expander(
            "Loan Status, Probability of Default, & Loan Amount DataFrame"
        ):
            st.write(df_trueStatus_probabilityDefault_threshStatus_loanAmount)

        accepted_loans = (
            df_trueStatus_probabilityDefault_threshStatus_loanAmount[
                df_trueStatus_probabilityDefault_threshStatus_loanAmount[
                    "PREDICT_DEFAULT_STATUS"
                ]
                == 0
            ]
        )

        bad_rate = (
            np.sum(accepted_loans["loan_status"])
            / accepted_loans["loan_status"].count()
        )

        with st.expander("Loan Amount Summary Statistics"):
            st.write(
                df_trueStatus_probabilityDefault_threshStatus_loanAmount[
                    "loan_amnt"
                ].describe()
            )

        avg_loan = np.mean(
            df_trueStatus_probabilityDefault_threshStatus_loanAmount[
                "loan_amnt"
            ]
        )

        crosstab_df = pd.crosstab(
            df_trueStatus_probabilityDefault_threshStatus_loanAmount[
                "loan_status"
            ],  # row label
            df_trueStatus_probabilityDefault_threshStatus_loanAmount[
                "PREDICT_DEFAULT_STATUS"
            ],
        ).apply(
            lambda x: x * avg_loan, axis=0
        )  # column label

        with st.expander(
            "Cross tabulation (column name = classification model prediction, row name = true status, values = number of loans * average loan value"
        ):
            st.write(crosstab_df)

        st.write(
            f'Bad rate: {"{:.2%}".format(bad_rate)} of all the loans the model accepted (classified as non-default) from the test set were actually defaults.'
        )

        st.write(
            f'Estimated value of the bad rate is {currency} {"{:,.2f}".format(crosstab_df[0][1])}.'
        )

        st.write(
            f'Total estimated value of actual non-default loans is {currency} {"{:,.2f}".format(crosstab_df[0][0]+crosstab_df[0][1])}'
        )

        st.write(
            f'Estimated value of loans incorrectly predicted as default is {currency} {"{:,.2f}".format(crosstab_df[1][0])}'
        )

        st.write(
            f'Estimated value of loans correctly predicted as defaults is {currency} {"{:,.2f}".format(crosstab_df[1][1])}'
        )

        return df_trueStatus_probabilityDefault_threshStatus_loanAmount

    return view


decision_tree_evaluation_view = make_evaluation_view("gbt", "Decision Tree")
logistic_evaluation_view = make_evaluation_view("lg", "Logistic Regression")