Spaces:

pkiage
/

credit_risk_modeling_demo

App Files Files Community

pkiage commited on Feb 8, 2022

Commit

808b291

•

1 Parent(s): 37b5a88

refactor: xgboost

Browse files

Files changed (5) hide show

app.py +16 -25
src/models/xgboost_model.py +32 -0
src/models/xgboost_predict_model.py +4 -0
src/models/xgboost_test_model.py +4 -0
src/models/xgboost_train_model.py +68 -0

app.py CHANGED Viewed

@@ -1,15 +1,17 @@
-from typing import OrderedDict
 import streamlit as st
-from data_setup import initialise_data
-from views.decision_tree import decisiontree_view
-from views.logistic import logistic_view
-from views.model_comparison import model_comparison_view
-from views.strategy_table import strategy_table_view
 def main():
     currency_options = ["USD", "KES", "GBP"]
     currency = st.sidebar.selectbox(
         label="What currency will you be using?", options=currency_options
     )
@@ -22,30 +24,19 @@ def main():
     st.title("Modelling")
-    model_options = ["Logistic Regression", "Decision Trees"]
-    # Returns list
     models_selected_list = st.sidebar.multiselect(
         label="Select model", options=model_options, default=model_options
     )
     models_selected_set = set(models_selected_list)
-    model_views = OrderedDict()
-    if "Logistic Regression" in models_selected_set:
-        logistic_model_view = logistic_view(split_dataset, currency)
-        model_views["Logistic Regression"] = logistic_model_view
-    if "Decision Trees" in models_selected_set:
-        decision_tree_model_view = decisiontree_view(split_dataset, currency)
-        model_views["Decision Trees"] = decision_tree_model_view
-    if models_selected_list:
-        model_comparison_view(
-            split_dataset,
-            model_views,
-        )
-        strategy_table_view(currency, model_views)
 if __name__ == "__main__":

 import streamlit as st
+from typing import OrderedDict
+from src.features.build_features import initialise_data
+from src.models.xgboost_model import xgboost_class
+from src.models.util_strategy_table import strategy_table_view
 def main():
     currency_options = ["USD", "KES", "GBP"]
+    model_options = ["XGBoost"]
     currency = st.sidebar.selectbox(
         label="What currency will you be using?", options=currency_options
     )
     st.title("Modelling")
     models_selected_list = st.sidebar.multiselect(
         label="Select model", options=model_options, default=model_options
     )
     models_selected_set = set(models_selected_list)
+    model_classes = OrderedDict()
+    if "XGBoost" in models_selected_set:
+        xgboost_model_class = xgboost_class(split_dataset, currency)
+        model_classes["XGBoost"] = xgboost_model_class
+    strategy_table_view(currency, model_classes)
 if __name__ == "__main__":

src/models/xgboost_model.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from src.features.build_features import SplitDataset
+from src.models.xgboost_train_model import xgboost_train_model
+from src.models.xgboost_predict_model import xgboost_predit_model
+from src.models.xgboost_test_model import xgboost_test_model
+from src.models.util_model_class import ModelClass
+def xgboost_class(split_dataset: SplitDataset, currency: str):
+    # Train Model
+    clf_xgbt_model = xgboost_train_model(split_dataset, currency)
+    # Predit using Trained Model
+    clf_xgbt_predictions = xgboost_predit_model(
+        clf_xgbt_model, split_dataset)
+    # Test Predictions of Trained Model
+    df_trueStatus_probabilityDefault_threshStatus_loanAmount_xgbt = xgboost_test_model(
+        clf_xgbt_model,
+        split_dataset,
+        currency,
+        clf_xgbt_predictions.probability_threshold_selected,
+        clf_xgbt_predictions.predicted_default_status)
+    return ModelClass(
+        model=clf_xgbt_model,
+        trueStatus_probabilityDefault_threshStatus_loanAmount_df=df_trueStatus_probabilityDefault_threshStatus_loanAmount_xgbt,
+        probability_threshold_selected=clf_xgbt_predictions.probability_threshold_selected,
+        predicted_default_status=clf_xgbt_predictions.predicted_default_status,
+        prediction_probability_df=clf_xgbt_predictions.prediction_probability_df,
+    )

src/models/xgboost_predict_model.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from src.models.util_predict_model import make_prediction_view
+xgboost_predit_model = make_prediction_view(
+    "XGBoost", "Gradient Boosted Tree with XGBoost")

src/models/xgboost_test_model.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from src.models.util_test import make_tests_view
+xgboost_test_model = make_tests_view(
+    "XGBoost", "Gradient Boosted Tree with XGBoost")

src/models/xgboost_train_model.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pickle
+import numpy as np
+import xgboost as xgb
+from src.features.build_features import SplitDataset
+import streamlit as st
+from src.visualization.graphs_decision_tree import(plot_importance_gbt,
+                                                   plot_tree_gbt)
+from src.visualization.graphs_settings import streamlit_chart_setting_height_width
+from src.visualization.graphs_download import (download_importance_gbt,
+                                               download_tree_gbt)
+@ st.cache(suppress_st_warning=True, hash_funcs={
+    xgb.XGBClassifier: pickle.dumps
+})
+def create_clf_xgbt_model(X_train, y_train):
+    # Using hyperparameters learning_rate and max_depth
+    return xgb.XGBClassifier(
+        learning_rate=0.1,
+        max_depth=7,
+        use_label_encoder=False,
+        eval_metric="logloss",
+    ).fit(X_train, np.ravel(y_train), eval_metric="logloss")
+def interpret_clf_xgbt_model(clf_xgbt_model):
+    st.subheader("XGBoost Decision Tree Feature Importance")
+    (barxsize, barysize,) = streamlit_chart_setting_height_width(
+        "Chart Settings", 10, 15, "barxsize", "barysize"
+    )
+    fig1 = plot_importance_gbt(clf_xgbt_model, barxsize, barysize)
+    st.pyplot(fig1)
+    download_importance_gbt(fig1, barxsize, barysize)
+    st.subheader("XGBoost Decision Tree Structure")
+    (treexsize, treeysize,) = streamlit_chart_setting_height_width(
+        "Chart Settings", 15, 10, "treexsize", "treeysize"
+    )
+    fig2 = plot_tree_gbt(treexsize, treeysize, clf_xgbt_model)
+    st.pyplot(fig2)
+    download_tree_gbt(treexsize, treeysize)
+    st.markdown(
+        "Note: The downloaded XGBoost Decision Tree plot chart in png has higher resolution than that displayed here."
+    )
+def xgboost_train_model(split_dataset: SplitDataset, currency: str):
+    st.header("XGBoost Decision Trees")
+    clf_xgbt_model = create_clf_xgbt_model(
+        split_dataset.X_train, split_dataset.y_train
+    )
+    interpret_clf_xgbt_model(clf_xgbt_model)
+    return clf_xgbt_model