% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/perf.R
\name{perf_psi}
\alias{perf_psi}
\title{PSI}
\usage{
perf_psi(score, label = NULL, title = NULL, x_limits = c(100, 800),
  x_tick_break = 50, show_plot = TRUE, seed = 186,
  return_distr_dat = FALSE)
}
\arguments{
\item{score}{A list of credit score for actual and expected data samples. For example, score = list(actual = score_A, expect = score_E), both score_A and score_E are dataframes with the same column names.}

\item{label}{A list of label value for actual and expected data samples. The default is NULL. For example, label = list(actual = label_A, expect = label_E), both label_A and label_E are vectors or dataframes. The label values should be 0s and 1s, 0 represent for good and 1 for bad.}

\item{title}{Title of plot, default is NULL.}

\item{x_limits}{x-axis limits, default is c(100, 800).}

\item{x_tick_break}{x-axis ticker break, default is 50.}

\item{show_plot}{Logical, default is TRUE. It means whether to show plot.}

\item{seed}{Integer, default is 186. The specify seed is used for random sorting data.}

\item{return_distr_dat}{Logical, default is FALSE.}
}
\value{
a dataframe of psi & plots of credit score distribution
}
\description{
\code{perf_psi} calculates population stability index (PSI) and provides credit score distribution based on credit score datasets.
}
\details{
The population stability index (PSI) formula is displayed below: \deqn{PSI = \sum((Actual\% - Expected\%)*(\ln(\frac{Actual\%}{Expected\%}))).} The rule of thumb for the PSI is as follows: Less than 0.1 inference insignificant change, no action required; 0.1 - 0.25 inference some minor change, check other scorecard monitoring metrics; Greater than 0.25 inference major shift in population, need to delve deeper.
}
\examples{
\dontrun{
# load germancredit data
data("germancredit")

# filter variable via missing rate, iv, identical value rate
dt_sel = var_filter(germancredit, "creditability")

# breaking dt into train and test ------
dt_list = split_df(dt_sel, "creditability", ratio = 0.6, seed=21)
dt_train = dt_list$train; dt_test = dt_list$test

# woe binning ------
bins = woebin(dt_train, "creditability")

# converting train and test into woe values
train = woebin_ply(dt_train, bins)
test = woebin_ply(dt_test, bins)

# glm ------
m1 = glm(creditability ~ ., family = "binomial", data = train)
# summary(m1)

# Select a formula-based model by AIC
m_step = step(m1, direction="both", trace=FALSE)
m2 = eval(m_step$call)
# summary(m2)

# predicted proability
train_pred = predict(m2, type='response', train)
test_pred = predict(m2, type='response', test)

# # ks & roc plot
# perf_eva(train$creditability, train_pred, title = "train")
# perf_eva(test$creditability, test_pred, title = "test")

#' # scorecard
card = scorecard(bins, m2)

# credit score, only_total_score = TRUE
train_score = scorecard_ply(dt_train, card)
test_score = scorecard_ply(dt_test, card)

# Example I # psi
psi = perf_psi(
  score = list(train = train_score, test = test_score),
  label = list(train = train$creditability, test = test$creditability)
)
# psi$psi  # psi dataframe
# psi$pic  # pic of score distribution

# Example II # specifying score range
psi_s = perf_psi(
  score = list(train = train_score, test = test_score),
  label = list(train = train$creditability, test = test$creditability),
  x_limits = c(200, 750),
  x_tick_break = 50
  )

# Example III # credit score, only_total_score = FALSE
train_score2 = scorecard_ply(dt_train, card, only_total_score=FALSE)
test_score2 = scorecard_ply(dt_test, card, only_total_score=FALSE)

# psi
psi2 = perf_psi(
  score = list(train = train_score2, test = test_score2),
  label = list(train = train$creditability, test = test$creditability)
)
# psi2$psi  # psi dataframe
# psi2$pic  # pic of score distribution
}
}
\seealso{
\code{\link{perf_eva}}
}
