Spaces:

pkiage
/

credit_risk_modeling_demo

App Files Files Community

credit_risk_modeling_demo / common /views.py

pkiage

Initial commit

232e5e5 over 2 years ago

raw

history blame

9.93 kB

	from typing import OrderedDict
	import streamlit as st # works on command prompt
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import xgboost as xgb
	from sklearn.metrics import (
	roc_curve,
	)
	from sklearn.calibration import calibration_curve
	from xgboost import plot_tree
	from views.typing import ModelView


	def plot_logistic_coeff_barh(coef_dict, x, y):
	fig = plt.figure(figsize=(x, y))
	coef_dict_sorted = dict(
	sorted(coef_dict.items(), key=lambda item: item[1], reverse=False)
	)
	plt.barh(zip(coef_dict_sorted.items()))
	return fig


	def print_negative_coefficients_logistic_model(coef_dict):
	# Equal to or less than 0
	NegativeCoefficients = dict(
	filter(lambda x: x[1] <= 0.0, coef_dict.items())
	)

	NegativeCoefficientsSorted = sorted(
	NegativeCoefficients.items(), key=lambda x: x[1], reverse=False
	)
	text = (
	"\n\nFeatures the model found to be negatively correlated with probability of default are:"
	"\n{negative_features}:"
	)
	st.markdown(text.format(negative_features=NegativeCoefficientsSorted))
	st.markdown(type(NegativeCoefficientsSorted))
	st.markdown(NegativeCoefficients.items())


	def print_positive_coefficients_logistic_model(coef_dict):
	# Equal to or greater than 0
	PositiveCoefficients = dict(
	filter(lambda x: x[1] >= 0.0, coef_dict.items())
	)

	PositiveCoefficientsSorted = sorted(
	PositiveCoefficients.items(), key=lambda x: x[1], reverse=True
	)
	text = (
	"\n\nFeatures the model found to be positively correlated with probability of default are:"
	"\n{positive_features}:"
	)
	st.markdown(text.format(positive_features=PositiveCoefficientsSorted))


	def plot_importance_gbt(clf_gbt_model, barxsize, barysize):
	axobject1 = xgb.plot_importance(clf_gbt_model, importance_type="weight")
	fig1 = axobject1.figure
	st.write("Feature Importance Plot (Gradient Boosted Tree)")
	fig1.set_size_inches(barxsize, barysize)
	return fig1


	def download_importance_gbt(fig1, barxsize, barysize):
	if st.button(
	"Download Feature Importance Plot as png (Gradient Boosted Tree)"
	):
	dpisize = max(barxsize, barysize)
	plt.savefig("bar.png", dpi=dpisize * 96, bbox_inches="tight")
	fig1.set_size_inches(barxsize, barysize)


	def plot_tree_gbt(treexsize, treeysize, clf_gbt_model):
	plot_tree(clf_gbt_model)
	fig2 = plt.gcf()
	fig2.set_size_inches(treexsize, treeysize)
	return fig2


	def download_tree_gbt(treexsize, treeysize):
	if st.button("Download Decision Tree Plot as png (Gradient Boosted Tree)"):
	dpisize = max(treexsize, treeysize)
	plt.savefig("tree.png", dpi=dpisize * 96, bbox_inches="tight")


	def cross_validation_graph(cv, eval_metric, trees):

	# Plot the test AUC scores for each iteration
	fig = plt.figure()
	plt.plot(cv[cv.columns[2]])
	plt.title(
	"Test {eval_metric} Score Over {it_numbr} Iterations".format(
	eval_metric=eval_metric, it_numbr=trees
	)
	)
	plt.xlabel("Iteration Number")
	plt.ylabel("Test {eval_metric} Score".format(eval_metric=eval_metric))
	return fig


	def recall_accuracy_threshold_tradeoff_fig(
	widthsize,
	heightsize,
	threshold_list,
	thresh_def_recalls_list,
	thresh_nondef_recalls_list,
	thresh_accs_list,
	):
	fig = plt.figure(figsize=(widthsize, heightsize))
	plt.plot(threshold_list, thresh_def_recalls_list, label="Default Recall")
	plt.plot(
	threshold_list, thresh_nondef_recalls_list, label="Non-Default Recall"
	)
	plt.plot(threshold_list, thresh_accs_list, label="Model Accuracy")
	plt.xlabel("Probability Threshold")
	plt.ylabel("Score")
	plt.xlim(0, 1)
	plt.ylim(0, 1)
	plt.legend()
	plt.title("Recall and Accuracy Score Tradeoff with Probability Threshold")
	plt.grid(False)
	return fig


	def roc_auc_compare_n_models(y, model_views: OrderedDict[str, ModelView]):
	colors = ["blue", "green"]
	fig = plt.figure()
	for color_idx, (model_name, model_view) in enumerate(model_views.items()):
	fpr, tpr, _thresholds = roc_curve(
	y, model_view.prediction_probability_df
	)
	plt.plot(fpr, tpr, color=colors[color_idx], label=f"{model_name}")
	plt.plot([0, 1], [0, 1], linestyle="--", label="Random Prediction")
	model_names = list(model_views.keys())
	if not model_names:
	model_name_str = "None"
	elif len(model_names) == 1:
	model_name_str = model_names[0]
	else:
	model_name_str = " and ".join(
	[", ".join(model_names[:-1]), model_names[-1]]
	)
	plt.title(f"ROC Chart for {model_name_str} on the Probability of Default")
	plt.xlabel("False Positive Rate (FP Rate)")
	plt.ylabel("True Positive Rate (TP Rate)")
	plt.legend()
	plt.grid(False)
	plt.xlim(0, 1)
	plt.ylim(0, 1)
	return fig


	def calibration_curve_report_commented_n(
	y, model_views: OrderedDict[str, ModelView], bins: int
	):
	fig = plt.figure()
	for model_name, model_view in model_views.items():
	frac_of_pos, mean_pred_val = calibration_curve(
	y,
	model_view.prediction_probability_df,
	n_bins=bins,
	normalize=True,
	)
	plt.plot(mean_pred_val, frac_of_pos, "s-", label=f"{model_name}")

	# Create the calibration curve plot with the guideline
	plt.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")

	plt.ylabel("Fraction of positives")
	plt.xlabel("Average Predicted Probability")
	plt.title("Calibration Curve")
	plt.legend()
	plt.grid(False)
	plt.xlim(0, 1)
	plt.ylim(0, 1)
	return fig


	def acceptance_rate_threshold_fig(probability_default, acceptancerate, bins):
	# Probability distribution
	probability_stat_distribution = probability_default.describe()

	# Acceptance rate threshold
	acc_rate_thresh = np.quantile(probability_default, acceptancerate)
	fig = plt.figure()

	plt.hist(
	probability_default,
	color="blue",
	bins=bins,
	histtype="bar",
	ec="white",
	)

	# Add a reference line to the plot for the threshold
	plt.axvline(x=acc_rate_thresh, color="red")
	plt.title("Acceptance Rate Thershold")

	return (
	fig,
	probability_stat_distribution,
	acc_rate_thresh,
	)


	def streamlit_2columns_metrics_pct_df(
	column1name_label: str,
	column2name_label: str,
	df: pd.DataFrame,
	):
	(
	column1name,
	column2name,
	) = st.columns(2)

	with column1name:
	st.metric(
	label=column1name_label,
	value="{:.0%}".format(df.value_counts().get(1) / df.shape[0]),
	delta=None,
	delta_color="normal",
	)

	with column2name:
	st.metric(
	label=column2name_label,
	value="{:.0%}".format(df.value_counts().get(0) / df.shape[0]),
	delta=None,
	delta_color="normal",
	)


	def streamlit_2columns_metrics_df(
	column1name_label: str,
	column2name_label: str,
	df: pd.DataFrame,
	):
	(
	column1name,
	column2name,
	) = st.columns(2)

	with column1name:
	st.metric(
	label=column1name_label,
	value=df.value_counts().get(1),
	delta=None,
	delta_color="normal",
	)

	with column2name:
	st.metric(
	label=column2name_label,
	value=df.value_counts().get(0),
	delta=None,
	delta_color="normal",
	)


	def streamlit_2columns_metrics_df_shape(df: pd.DataFrame):
	(
	column1name,
	column2name,
	) = st.columns(2)

	with column1name:
	st.metric(
	label="Rows",
	value=df.shape[0],
	delta=None,
	delta_color="normal",
	)

	with column2name:
	st.metric(
	label="Columns",
	value=df.shape[1],
	delta=None,
	delta_color="normal",
	)


	def streamlit_2columns_metrics_pct_series(
	column1name_label: str,
	column2name_label: str,
	series: pd.Series,
	):
	(
	column1name,
	column2name,
	) = st.columns(2)
	with column1name:
	st.metric(
	label=column1name_label,
	value="{:.0%}".format(series.get(1) / series.sum()),
	delta=None,
	delta_color="normal",
	)

	with column2name:
	st.metric(
	label=column2name_label,
	value="{:.0%}".format(series.get(0) / series.sum()),
	delta=None,
	delta_color="normal",
	)


	def streamlit_2columns_metrics_series(
	column1name_label: str,
	column2name_label: str,
	series: pd.Series,
	):
	(
	column1name,
	column2name,
	) = st.columns(2)
	with column1name:
	st.metric(
	label=column1name_label,
	value=series.get(1),
	delta=None,
	delta_color="normal",
	)

	with column2name:
	st.metric(
	label=column2name_label,
	value=series.get(0),
	delta=None,
	delta_color="normal",
	)


	def streamlit_chart_setting_height_width(
	title: str,
	default_widthvalue: int,
	default_heightvalue: int,
	widthkey: str,
	heightkey: str,
	):
	with st.expander(title):

	lbarx_col, lbary_col = st.columns(2)

	with lbarx_col:
	width_size = st.number_input(
	label="Width in inches:",
	value=default_widthvalue,
	key=widthkey,
	)

	with lbary_col:
	height_size = st.number_input(
	label="Height in inches:",
	value=default_heightvalue,
	key=heightkey,
	)
	return width_size, height_size