pkiage commited on
Commit
808b291
1 Parent(s): 37b5a88

refactor: xgboost

Browse files
app.py CHANGED
@@ -1,15 +1,17 @@
1
- from typing import OrderedDict
2
  import streamlit as st
3
- from data_setup import initialise_data
4
- from views.decision_tree import decisiontree_view
5
- from views.logistic import logistic_view
6
- from views.model_comparison import model_comparison_view
7
- from views.strategy_table import strategy_table_view
 
8
 
9
 
10
  def main():
11
  currency_options = ["USD", "KES", "GBP"]
12
 
 
 
13
  currency = st.sidebar.selectbox(
14
  label="What currency will you be using?", options=currency_options
15
  )
@@ -22,30 +24,19 @@ def main():
22
 
23
  st.title("Modelling")
24
 
25
- model_options = ["Logistic Regression", "Decision Trees"]
26
-
27
- # Returns list
28
  models_selected_list = st.sidebar.multiselect(
29
  label="Select model", options=model_options, default=model_options
30
  )
31
 
32
  models_selected_set = set(models_selected_list)
33
- model_views = OrderedDict()
34
-
35
- if "Logistic Regression" in models_selected_set:
36
- logistic_model_view = logistic_view(split_dataset, currency)
37
- model_views["Logistic Regression"] = logistic_model_view
38
-
39
- if "Decision Trees" in models_selected_set:
40
- decision_tree_model_view = decisiontree_view(split_dataset, currency)
41
- model_views["Decision Trees"] = decision_tree_model_view
42
-
43
- if models_selected_list:
44
- model_comparison_view(
45
- split_dataset,
46
- model_views,
47
- )
48
- strategy_table_view(currency, model_views)
49
 
50
 
51
  if __name__ == "__main__":
 
 
1
  import streamlit as st
2
+ from typing import OrderedDict
3
+
4
+
5
+ from src.features.build_features import initialise_data
6
+ from src.models.xgboost_model import xgboost_class
7
+ from src.models.util_strategy_table import strategy_table_view
8
 
9
 
10
  def main():
11
  currency_options = ["USD", "KES", "GBP"]
12
 
13
+ model_options = ["XGBoost"]
14
+
15
  currency = st.sidebar.selectbox(
16
  label="What currency will you be using?", options=currency_options
17
  )
 
24
 
25
  st.title("Modelling")
26
 
 
 
 
27
  models_selected_list = st.sidebar.multiselect(
28
  label="Select model", options=model_options, default=model_options
29
  )
30
 
31
  models_selected_set = set(models_selected_list)
32
+
33
+ model_classes = OrderedDict()
34
+
35
+ if "XGBoost" in models_selected_set:
36
+ xgboost_model_class = xgboost_class(split_dataset, currency)
37
+ model_classes["XGBoost"] = xgboost_model_class
38
+
39
+ strategy_table_view(currency, model_classes)
 
 
 
 
 
 
 
 
40
 
41
 
42
  if __name__ == "__main__":
src/models/xgboost_model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.features.build_features import SplitDataset
2
+
3
+ from src.models.xgboost_train_model import xgboost_train_model
4
+ from src.models.xgboost_predict_model import xgboost_predit_model
5
+ from src.models.xgboost_test_model import xgboost_test_model
6
+ from src.models.util_model_class import ModelClass
7
+
8
+
9
+ def xgboost_class(split_dataset: SplitDataset, currency: str):
10
+
11
+ # Train Model
12
+ clf_xgbt_model = xgboost_train_model(split_dataset, currency)
13
+
14
+ # Predit using Trained Model
15
+ clf_xgbt_predictions = xgboost_predit_model(
16
+ clf_xgbt_model, split_dataset)
17
+
18
+ # Test Predictions of Trained Model
19
+ df_trueStatus_probabilityDefault_threshStatus_loanAmount_xgbt = xgboost_test_model(
20
+ clf_xgbt_model,
21
+ split_dataset,
22
+ currency,
23
+ clf_xgbt_predictions.probability_threshold_selected,
24
+ clf_xgbt_predictions.predicted_default_status)
25
+
26
+ return ModelClass(
27
+ model=clf_xgbt_model,
28
+ trueStatus_probabilityDefault_threshStatus_loanAmount_df=df_trueStatus_probabilityDefault_threshStatus_loanAmount_xgbt,
29
+ probability_threshold_selected=clf_xgbt_predictions.probability_threshold_selected,
30
+ predicted_default_status=clf_xgbt_predictions.predicted_default_status,
31
+ prediction_probability_df=clf_xgbt_predictions.prediction_probability_df,
32
+ )
src/models/xgboost_predict_model.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from src.models.util_predict_model import make_prediction_view
2
+
3
+ xgboost_predit_model = make_prediction_view(
4
+ "XGBoost", "Gradient Boosted Tree with XGBoost")
src/models/xgboost_test_model.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from src.models.util_test import make_tests_view
2
+
3
+ xgboost_test_model = make_tests_view(
4
+ "XGBoost", "Gradient Boosted Tree with XGBoost")
src/models/xgboost_train_model.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ import numpy as np
4
+ import xgboost as xgb
5
+ from src.features.build_features import SplitDataset
6
+ import streamlit as st
7
+
8
+ from src.visualization.graphs_decision_tree import(plot_importance_gbt,
9
+ plot_tree_gbt)
10
+
11
+ from src.visualization.graphs_settings import streamlit_chart_setting_height_width
12
+
13
+ from src.visualization.graphs_download import (download_importance_gbt,
14
+ download_tree_gbt)
15
+
16
+
17
+ @ st.cache(suppress_st_warning=True, hash_funcs={
18
+ xgb.XGBClassifier: pickle.dumps
19
+ })
20
+ def create_clf_xgbt_model(X_train, y_train):
21
+ # Using hyperparameters learning_rate and max_depth
22
+ return xgb.XGBClassifier(
23
+ learning_rate=0.1,
24
+ max_depth=7,
25
+ use_label_encoder=False,
26
+ eval_metric="logloss",
27
+ ).fit(X_train, np.ravel(y_train), eval_metric="logloss")
28
+
29
+
30
+ def interpret_clf_xgbt_model(clf_xgbt_model):
31
+ st.subheader("XGBoost Decision Tree Feature Importance")
32
+
33
+ (barxsize, barysize,) = streamlit_chart_setting_height_width(
34
+ "Chart Settings", 10, 15, "barxsize", "barysize"
35
+ )
36
+
37
+ fig1 = plot_importance_gbt(clf_xgbt_model, barxsize, barysize)
38
+
39
+ st.pyplot(fig1)
40
+
41
+ download_importance_gbt(fig1, barxsize, barysize)
42
+
43
+ st.subheader("XGBoost Decision Tree Structure")
44
+
45
+ (treexsize, treeysize,) = streamlit_chart_setting_height_width(
46
+ "Chart Settings", 15, 10, "treexsize", "treeysize"
47
+ )
48
+
49
+ fig2 = plot_tree_gbt(treexsize, treeysize, clf_xgbt_model)
50
+
51
+ st.pyplot(fig2)
52
+
53
+ download_tree_gbt(treexsize, treeysize)
54
+ st.markdown(
55
+ "Note: The downloaded XGBoost Decision Tree plot chart in png has higher resolution than that displayed here."
56
+ )
57
+
58
+
59
+ def xgboost_train_model(split_dataset: SplitDataset, currency: str):
60
+ st.header("XGBoost Decision Trees")
61
+
62
+ clf_xgbt_model = create_clf_xgbt_model(
63
+ split_dataset.X_train, split_dataset.y_train
64
+ )
65
+
66
+ interpret_clf_xgbt_model(clf_xgbt_model)
67
+
68
+ return clf_xgbt_model