refactor: xgboost
Browse files- app.py +16 -25
- src/models/xgboost_model.py +32 -0
- src/models/xgboost_predict_model.py +4 -0
- src/models/xgboost_test_model.py +4 -0
- src/models/xgboost_train_model.py +68 -0
app.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1 |
-
from typing import OrderedDict
|
2 |
import streamlit as st
|
3 |
-
from
|
4 |
-
|
5 |
-
|
6 |
-
from
|
7 |
-
from
|
|
|
8 |
|
9 |
|
10 |
def main():
|
11 |
currency_options = ["USD", "KES", "GBP"]
|
12 |
|
|
|
|
|
13 |
currency = st.sidebar.selectbox(
|
14 |
label="What currency will you be using?", options=currency_options
|
15 |
)
|
@@ -22,30 +24,19 @@ def main():
|
|
22 |
|
23 |
st.title("Modelling")
|
24 |
|
25 |
-
model_options = ["Logistic Regression", "Decision Trees"]
|
26 |
-
|
27 |
-
# Returns list
|
28 |
models_selected_list = st.sidebar.multiselect(
|
29 |
label="Select model", options=model_options, default=model_options
|
30 |
)
|
31 |
|
32 |
models_selected_set = set(models_selected_list)
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
model_views["Decision Trees"] = decision_tree_model_view
|
42 |
-
|
43 |
-
if models_selected_list:
|
44 |
-
model_comparison_view(
|
45 |
-
split_dataset,
|
46 |
-
model_views,
|
47 |
-
)
|
48 |
-
strategy_table_view(currency, model_views)
|
49 |
|
50 |
|
51 |
if __name__ == "__main__":
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from typing import OrderedDict
|
3 |
+
|
4 |
+
|
5 |
+
from src.features.build_features import initialise_data
|
6 |
+
from src.models.xgboost_model import xgboost_class
|
7 |
+
from src.models.util_strategy_table import strategy_table_view
|
8 |
|
9 |
|
10 |
def main():
|
11 |
currency_options = ["USD", "KES", "GBP"]
|
12 |
|
13 |
+
model_options = ["XGBoost"]
|
14 |
+
|
15 |
currency = st.sidebar.selectbox(
|
16 |
label="What currency will you be using?", options=currency_options
|
17 |
)
|
|
|
24 |
|
25 |
st.title("Modelling")
|
26 |
|
|
|
|
|
|
|
27 |
models_selected_list = st.sidebar.multiselect(
|
28 |
label="Select model", options=model_options, default=model_options
|
29 |
)
|
30 |
|
31 |
models_selected_set = set(models_selected_list)
|
32 |
+
|
33 |
+
model_classes = OrderedDict()
|
34 |
+
|
35 |
+
if "XGBoost" in models_selected_set:
|
36 |
+
xgboost_model_class = xgboost_class(split_dataset, currency)
|
37 |
+
model_classes["XGBoost"] = xgboost_model_class
|
38 |
+
|
39 |
+
strategy_table_view(currency, model_classes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
if __name__ == "__main__":
|
src/models/xgboost_model.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.features.build_features import SplitDataset
|
2 |
+
|
3 |
+
from src.models.xgboost_train_model import xgboost_train_model
|
4 |
+
from src.models.xgboost_predict_model import xgboost_predit_model
|
5 |
+
from src.models.xgboost_test_model import xgboost_test_model
|
6 |
+
from src.models.util_model_class import ModelClass
|
7 |
+
|
8 |
+
|
9 |
+
def xgboost_class(split_dataset: SplitDataset, currency: str):
|
10 |
+
|
11 |
+
# Train Model
|
12 |
+
clf_xgbt_model = xgboost_train_model(split_dataset, currency)
|
13 |
+
|
14 |
+
# Predit using Trained Model
|
15 |
+
clf_xgbt_predictions = xgboost_predit_model(
|
16 |
+
clf_xgbt_model, split_dataset)
|
17 |
+
|
18 |
+
# Test Predictions of Trained Model
|
19 |
+
df_trueStatus_probabilityDefault_threshStatus_loanAmount_xgbt = xgboost_test_model(
|
20 |
+
clf_xgbt_model,
|
21 |
+
split_dataset,
|
22 |
+
currency,
|
23 |
+
clf_xgbt_predictions.probability_threshold_selected,
|
24 |
+
clf_xgbt_predictions.predicted_default_status)
|
25 |
+
|
26 |
+
return ModelClass(
|
27 |
+
model=clf_xgbt_model,
|
28 |
+
trueStatus_probabilityDefault_threshStatus_loanAmount_df=df_trueStatus_probabilityDefault_threshStatus_loanAmount_xgbt,
|
29 |
+
probability_threshold_selected=clf_xgbt_predictions.probability_threshold_selected,
|
30 |
+
predicted_default_status=clf_xgbt_predictions.predicted_default_status,
|
31 |
+
prediction_probability_df=clf_xgbt_predictions.prediction_probability_df,
|
32 |
+
)
|
src/models/xgboost_predict_model.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.models.util_predict_model import make_prediction_view
|
2 |
+
|
3 |
+
xgboost_predit_model = make_prediction_view(
|
4 |
+
"XGBoost", "Gradient Boosted Tree with XGBoost")
|
src/models/xgboost_test_model.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.models.util_test import make_tests_view
|
2 |
+
|
3 |
+
xgboost_test_model = make_tests_view(
|
4 |
+
"XGBoost", "Gradient Boosted Tree with XGBoost")
|
src/models/xgboost_train_model.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import xgboost as xgb
|
5 |
+
from src.features.build_features import SplitDataset
|
6 |
+
import streamlit as st
|
7 |
+
|
8 |
+
from src.visualization.graphs_decision_tree import(plot_importance_gbt,
|
9 |
+
plot_tree_gbt)
|
10 |
+
|
11 |
+
from src.visualization.graphs_settings import streamlit_chart_setting_height_width
|
12 |
+
|
13 |
+
from src.visualization.graphs_download import (download_importance_gbt,
|
14 |
+
download_tree_gbt)
|
15 |
+
|
16 |
+
|
17 |
+
@ st.cache(suppress_st_warning=True, hash_funcs={
|
18 |
+
xgb.XGBClassifier: pickle.dumps
|
19 |
+
})
|
20 |
+
def create_clf_xgbt_model(X_train, y_train):
|
21 |
+
# Using hyperparameters learning_rate and max_depth
|
22 |
+
return xgb.XGBClassifier(
|
23 |
+
learning_rate=0.1,
|
24 |
+
max_depth=7,
|
25 |
+
use_label_encoder=False,
|
26 |
+
eval_metric="logloss",
|
27 |
+
).fit(X_train, np.ravel(y_train), eval_metric="logloss")
|
28 |
+
|
29 |
+
|
30 |
+
def interpret_clf_xgbt_model(clf_xgbt_model):
|
31 |
+
st.subheader("XGBoost Decision Tree Feature Importance")
|
32 |
+
|
33 |
+
(barxsize, barysize,) = streamlit_chart_setting_height_width(
|
34 |
+
"Chart Settings", 10, 15, "barxsize", "barysize"
|
35 |
+
)
|
36 |
+
|
37 |
+
fig1 = plot_importance_gbt(clf_xgbt_model, barxsize, barysize)
|
38 |
+
|
39 |
+
st.pyplot(fig1)
|
40 |
+
|
41 |
+
download_importance_gbt(fig1, barxsize, barysize)
|
42 |
+
|
43 |
+
st.subheader("XGBoost Decision Tree Structure")
|
44 |
+
|
45 |
+
(treexsize, treeysize,) = streamlit_chart_setting_height_width(
|
46 |
+
"Chart Settings", 15, 10, "treexsize", "treeysize"
|
47 |
+
)
|
48 |
+
|
49 |
+
fig2 = plot_tree_gbt(treexsize, treeysize, clf_xgbt_model)
|
50 |
+
|
51 |
+
st.pyplot(fig2)
|
52 |
+
|
53 |
+
download_tree_gbt(treexsize, treeysize)
|
54 |
+
st.markdown(
|
55 |
+
"Note: The downloaded XGBoost Decision Tree plot chart in png has higher resolution than that displayed here."
|
56 |
+
)
|
57 |
+
|
58 |
+
|
59 |
+
def xgboost_train_model(split_dataset: SplitDataset, currency: str):
|
60 |
+
st.header("XGBoost Decision Trees")
|
61 |
+
|
62 |
+
clf_xgbt_model = create_clf_xgbt_model(
|
63 |
+
split_dataset.X_train, split_dataset.y_train
|
64 |
+
)
|
65 |
+
|
66 |
+
interpret_clf_xgbt_model(clf_xgbt_model)
|
67 |
+
|
68 |
+
return clf_xgbt_model
|