% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/derive_joined.R
\name{derive_vars_joined}
\alias{derive_vars_joined}
\title{Add Variables from an Additional Dataset Based on Conditions from Both
Datasets}
\usage{
derive_vars_joined(
  dataset,
  dataset_add,
  by_vars = NULL,
  order = NULL,
  new_vars = NULL,
  tmp_obs_nr_var = NULL,
  join_vars = NULL,
  join_type,
  filter_add = NULL,
  first_cond_lower = NULL,
  first_cond_upper = NULL,
  filter_join = NULL,
  mode = NULL,
  exist_flag = NULL,
  true_value = "Y",
  false_value = NA_character_,
  missing_values = NULL,
  check_type = "warning"
)
}
\arguments{
\item{dataset}{Input dataset

The variables specified by the \code{by_vars} argument are expected to be in the dataset.}

\item{dataset_add}{Additional dataset

The variables specified by the \code{by_vars}, the \code{new_vars}, the \code{join_vars},
and the \code{order} argument are expected.}

\item{by_vars}{Grouping variables

The two datasets are joined by the specified variables.

Variables can be renamed by naming the element, i.e.
\verb{by_vars = exprs(<name in input dataset> = <name in additional dataset>)}, similar to the \code{dplyr} joins.

\emph{Permitted Values}: list of variables created by \code{exprs()}
e.g. \code{exprs(USUBJID, VISIT)}}

\item{order}{Sort order

If the argument is set to a non-null value, for each observation of the
input dataset the first or last observation from the joined dataset is
selected with respect to the specified order. The specified variables are
expected in the additional dataset (\code{dataset_add}). If a variable is
available in both \code{dataset} and \code{dataset_add}, the one from \code{dataset_add}
is used for the sorting.

If an expression is named, e.g., \code{exprs(EXSTDT = convert_dtc_to_dt(EXSTDTC), EXSEQ)}, a corresponding variable (\code{EXSTDT}) is
added to the additional dataset and can be used in the filter conditions
(\code{filter_add}, \code{filter_join}) and for \code{join_vars} and \code{new_vars}. The
variable is not included in the output dataset.

For handling of \code{NA}s in sorting variables see \href{../articles/generic.html#sort_order}{Sort Order}.

\emph{Permitted Values}: list of expressions created by \code{exprs()}, e.g.,
\code{exprs(ADT, desc(AVAL))} or \code{NULL}}

\item{new_vars}{Variables to add

The specified variables from the additional dataset are added to the output
dataset. Variables can be renamed by naming the element, i.e., \verb{new_vars = exprs(<new name> = <old name>)}.

For example \code{new_vars = exprs(var1, var2)} adds variables \code{var1} and \code{var2}
from \code{dataset_add} to the input dataset.

And \code{new_vars = exprs(var1, new_var2 = old_var2)} takes \code{var1} and
\code{old_var2} from \code{dataset_add} and adds them to the input dataset renaming
\code{old_var2} to \code{new_var2}.

Values of the added variables can be modified by specifying an expression.
For example, \code{new_vars = LASTRSP = exprs(str_to_upper(AVALC))} adds the
variable \code{LASTRSP} to the dataset and sets it to the upper case value of
\code{AVALC}.

If the argument is not specified or set to \code{NULL}, all variables from the
additional dataset (\code{dataset_add}) are added.

\emph{Permitted Values}: list of variables or named expressions created by \code{exprs()}}

\item{tmp_obs_nr_var}{Temporary observation number

The specified variable is added to the input dataset (\code{dataset}) and the
additional dataset (\code{dataset_add}). It is set to the observation number
with respect to \code{order}. For each by group (\code{by_vars}) the observation
number starts with \code{1}. The variable can be used in the conditions
(\code{filter_join}, \code{first_cond_upper}, \code{first_cond_lower}). It can also be
used to select consecutive observations or the last observation.

The variable is not included in the output dataset. To include it specify
it for \code{new_vars}.}

\item{join_vars}{Variables to use from additional dataset

Any extra variables required from the additional dataset for \code{filter_join}
should be specified for this argument. Variables specified for \code{new_vars}
do not need to be repeated for \code{join_vars}. If a specified variable exists
in both the input dataset and the additional dataset, the suffix ".join" is
added to the variable from the additional dataset.

If an expression is named, e.g., \code{exprs(EXTDT = convert_dtc_to_dt(EXSTDTC))}, a corresponding variable is added to the
additional dataset and can be used in the filter conditions (\code{filter_add},
\code{filter_join}) and for \code{new_vars}. The variable is not included in the
output dataset.

The variables are not included in the output dataset.

\emph{Permitted Values}: list of variables or named expressions created by \code{exprs()}}

\item{join_type}{Observations to keep after joining

The argument determines which of the joined observations are kept with
respect to the original observation. For example, if \code{join_type = "after"}
is specified all observations after the original observations are kept.

For example for confirmed response or BOR in the oncology setting or
confirmed deterioration in questionnaires the confirmatory assessment must
be after the assessment. Thus \code{join_type = "after"} could be used.

Whereas, sometimes you might allow for confirmatory observations to occur
prior to the observation. For example, to identify AEs occurring on or
after seven days before a COVID AE. Thus \code{join_type = "all"} could be used.

\emph{Permitted Values:} \code{"before"}, \code{"after"}, \code{"all"}}

\item{filter_add}{Filter for additional dataset (\code{dataset_add})

Only observations from \code{dataset_add} fulfilling the specified condition are
joined to the input dataset. If the argument is not specified, all
observations are joined.

Variables created by \code{order} or \code{new_vars} arguments can be used in the
condition.

The condition can include summary functions like \code{all()} or \code{any()}. The
additional dataset is grouped by the by variables (\code{by_vars}).

\emph{Permitted Values}: a condition}

\item{first_cond_lower}{Condition for selecting range of data (before)

If this argument is specified, the other observations are restricted from
the first observation before the current observation where the specified
condition is fulfilled up to the current observation. If the condition is
not fulfilled for any of the other observations, no observations are
considered.

This argument should be specified if \code{filter_join} contains summary
functions which should not apply to all observations but only from a
certain observation before the current observation up to the current
observation. For an example see the last example below.}

\item{first_cond_upper}{Condition for selecting range of data (after)

If this argument is specified, the other observations are restricted up to
the first observation where the specified condition is fulfilled. If the
condition is not fulfilled for any of the other observations, no
observations are considered.

This argument should be specified if \code{filter_join} contains summary
functions which should not apply to all observations but only up to the
confirmation assessment. For an example see the last example below.}

\item{filter_join}{Filter for the joined dataset

The specified condition is applied to the joined dataset. Therefore
variables from both datasets \code{dataset} and \code{dataset_add} can be used.

Variables created by \code{order} or \code{new_vars} arguments can be used in the
condition.

The condition can include summary functions like \code{all()} or \code{any()}. The
joined dataset is grouped by the original observations.

\emph{Permitted Values}: a condition}

\item{mode}{Selection mode

Determines if the first or last observation is selected. If the \code{order}
argument is specified, \code{mode} must be non-null.

If the \code{order} argument is not specified, the \code{mode} argument is ignored.

\emph{Permitted Values}: \code{"first"}, \code{"last"}, \code{NULL}}

\item{exist_flag}{Exist flag

If the argument is specified (e.g., \code{exist_flag = FLAG}), the specified
variable (e.g., \code{FLAG}) is added to the input dataset. This variable will
be the value provided in \code{true_value} for all selected records from \code{dataset_add}
which are merged into the input dataset, and the value provided in \code{false_value} otherwise.

\emph{Permitted Values}: Variable name}

\item{true_value}{True value

The value for the specified variable \code{exist_flag}, applicable to
the first or last observation (depending on the mode) of each by group.

Permitted Values: An atomic scalar}

\item{false_value}{False value

The value for the specified variable \code{exist_flag}, NOT applicable to
the first or last observation (depending on the mode) of each by group.

Permitted Values: An atomic scalar}

\item{missing_values}{Values for non-matching observations

For observations of the input dataset (\code{dataset}) which do not have a
matching observation in the additional dataset (\code{dataset_add}) the values
of the specified variables are set to the specified value. Only variables
specified for \code{new_vars} can be specified for \code{missing_values}.

\emph{Permitted Values}: named list of expressions, e.g.,
\code{exprs(BASEC = "MISSING", BASE = -1)}}

\item{check_type}{Check uniqueness?

If \code{"warning"} or \code{"error"} is specified, the specified message is issued
if the observations of the (restricted) joined dataset are not unique with
respect to the by variables and the order.

This argument is ignored if \code{order} is not specified. In this case an error
is issued independent of \code{check_type} if the restricted joined dataset
contains more than one observation for any of the observations of the input
dataset.

\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}}
}
\value{
The output dataset contains all observations and variables of the
input dataset and additionally the variables specified for \code{new_vars} from
the additional dataset (\code{dataset_add}).
}
\description{
The function adds variables from an additional dataset to the input dataset.
The selection of the observations from the additional dataset can depend on
variables from both datasets. For example, add the lowest value (nadir)
before the current observation.
}
\details{
\enumerate{
\item The variables specified by \code{order} are added to the additional dataset
(\code{dataset_add}).
\item The variables specified by \code{join_vars} are added to the additional dataset
(\code{dataset_add}).
\item The records from the additional dataset (\code{dataset_add}) are restricted to
those matching the \code{filter_add} condition.
\item The input dataset and the (restricted) additional dataset are left joined
by the grouping variables (\code{by_vars}). If no grouping variables are
specified, a full join is performed.
\item If \code{first_cond_lower} is specified, for each observation of the input
dataset the joined dataset is restricted to observations from the first
observation where \code{first_cond_lower} is fulfilled (the observation fulfilling
the condition is included) up to the observation of the input dataset. If for
an observation of the input dataset the condition is not fulfilled, the
observation is removed.

If \code{first_cond_upper} is specified, for each observation of the input
dataset the joined dataset is restricted to observations up to the first
observation where \code{first_cond_upper} is fulfilled (the observation
fulfilling the condition is included). If for an observation of the input
dataset the condition is not fulfilled, the observation is removed.

For an example see the last example in the "Examples" section.
\item The joined dataset is restricted by the \code{filter_join} condition.
\item If \code{order} is specified, for each observation of the input dataset the
first or last observation (depending on \code{mode}) is selected.
\item The variables specified for \code{new_vars} are created (if requested) and
merged to the input dataset. I.e., the output dataset contains all
observations from the input dataset. For observations without a matching
observation in the joined dataset the new variables are set as specified by
\code{missing_values} (or to \code{NA} for variables not in \code{missing_values}).
Observations in the additional dataset which have no matching observation in
the input dataset are ignored.
}

\strong{Note:} This function creates temporary datasets which may be much bigger
than the input datasets. If this causes memory issues, please try setting
the admiral option \code{save_memory} to \code{TRUE} (see \code{set_admiral_options()}).
This reduces the memory consumption but increases the run-time.
}
\examples{
library(tibble)
library(lubridate)
library(dplyr, warn.conflicts = FALSE)
library(tidyr)

# Add AVISIT (based on time windows), AWLO, and AWHI
adbds <- tribble(
  ~USUBJID, ~ADY,
  "1",       -33,
  "1",        -2,
  "1",         3,
  "1",        24,
  "2",        NA,
)

windows <- tribble(
  ~AVISIT,    ~AWLO, ~AWHI,
  "BASELINE",   -30,     1,
  "WEEK 1",       2,     7,
  "WEEK 2",       8,    15,
  "WEEK 3",      16,    22,
  "WEEK 4",      23,    30
)

derive_vars_joined(
  adbds,
  dataset_add = windows,
  join_type = "all",
  filter_join = AWLO <= ADY & ADY <= AWHI
)

# derive the nadir after baseline and before the current observation
adbds <- tribble(
  ~USUBJID, ~ADY, ~AVAL,
  "1",        -7,    10,
  "1",         1,    12,
  "1",         8,    11,
  "1",        15,     9,
  "1",        20,    14,
  "1",        24,    12,
  "2",        13,     8
)

derive_vars_joined(
  adbds,
  dataset_add = adbds,
  by_vars = exprs(USUBJID),
  order = exprs(AVAL),
  new_vars = exprs(NADIR = AVAL),
  join_vars = exprs(ADY),
  join_type = "all",
  filter_add = ADY > 0,
  filter_join = ADY.join < ADY,
  mode = "first",
  check_type = "none"
)

# add highest hemoglobin value within two weeks before AE,
# take earliest if more than one
adae <- tribble(
  ~USUBJID, ~ASTDY,
  "1",           3,
  "1",          22,
  "2",           2
)

adlb <- tribble(
  ~USUBJID, ~PARAMCD, ~ADY, ~AVAL,
  "1",      "HGB",       1,   8.5,
  "1",      "HGB",       3,   7.9,
  "1",      "HGB",       5,   8.9,
  "1",      "HGB",       8,   8.0,
  "1",      "HGB",       9,   8.0,
  "1",      "HGB",      16,   7.4,
  "1",      "HGB",      24,   8.1,
  "1",      "ALB",       1,    42,
)

derive_vars_joined(
  adae,
  dataset_add = adlb,
  by_vars = exprs(USUBJID),
  order = exprs(AVAL, desc(ADY)),
  new_vars = exprs(HGB_MAX = AVAL, HGB_DY = ADY),
  join_type = "all",
  filter_add = PARAMCD == "HGB",
  filter_join = ASTDY - 14 <= ADY & ADY <= ASTDY,
  mode = "last"
)

# Add APERIOD, APERIODC based on ADSL
adsl <- tribble(
  ~USUBJID, ~AP01SDT,     ~AP01EDT,     ~AP02SDT,     ~AP02EDT,
  "1",      "2021-01-04", "2021-02-06", "2021-02-07", "2021-03-07",
  "2",      "2021-02-02", "2021-03-02", "2021-03-03", "2021-04-01"
) \%>\%
  mutate(across(ends_with("DT"), ymd)) \%>\%
  mutate(STUDYID = "xyz")

period_ref <- create_period_dataset(
  adsl,
  new_vars = exprs(APERSDT = APxxSDT, APEREDT = APxxEDT)
)

period_ref

adae <- tribble(
  ~USUBJID, ~ASTDT,
  "1",      "2021-01-01",
  "1",      "2021-01-05",
  "1",      "2021-02-05",
  "1",      "2021-03-05",
  "1",      "2021-04-05",
  "2",      "2021-02-15",
) \%>\%
  mutate(
    ASTDT = ymd(ASTDT),
    STUDYID = "xyz"
  )

derive_vars_joined(
  adae,
  dataset_add = period_ref,
  by_vars = exprs(STUDYID, USUBJID),
  join_vars = exprs(APERSDT, APEREDT),
  join_type = "all",
  filter_join = APERSDT <= ASTDT & ASTDT <= APEREDT
)

# Add day since last dose (LDRELD)
adae <- tribble(
  ~USUBJID, ~ASTDT,       ~AESEQ,
  "1",      "2020-02-02",      1,
  "1",      "2020-02-04",      2
) \%>\%
  mutate(ASTDT = ymd(ASTDT))

ex <- tribble(
  ~USUBJID, ~EXSDTC,
  "1",      "2020-01-10",
  "1",      "2020-01",
  "1",      "2020-01-20",
  "1",      "2020-02-03"
)

## Please note that EXSDT is created via the order argument and then used
## for new_vars, filter_add, and filter_join
derive_vars_joined(
  adae,
  dataset_add = ex,
  by_vars = exprs(USUBJID),
  order = exprs(EXSDT = convert_dtc_to_dt(EXSDTC)),
  join_type = "all",
  new_vars = exprs(LDRELD = compute_duration(
    start_date = EXSDT, end_date = ASTDT
  )),
  filter_add = !is.na(EXSDT),
  filter_join = EXSDT <= ASTDT,
  mode = "last"
)

# first_cond_lower and first_cond_upper argument
myd <- tribble(
  ~subj, ~day, ~val,
  "1",      1, "++",
  "1",      2, "-",
  "1",      3, "0",
  "1",      4, "+",
  "1",      5, "++",
  "1",      6, "-",
  "2",      1, "-",
  "2",      2, "++",
  "2",      3, "+",
  "2",      4, "0",
  "2",      5, "-",
  "2",      6, "++"
)

# derive last "++" day before "0" where all results in between are "+" or "++"
derive_vars_joined(
  myd,
  dataset_add = myd,
  by_vars = exprs(subj),
  order = exprs(day),
  mode = "first",
  new_vars = exprs(prev_plus_day = day),
  join_vars = exprs(val),
  join_type = "before",
  first_cond_lower = val.join == "++",
  filter_join = val == "0" & all(val.join \%in\% c("+", "++"))
)

# derive first "++" day after "0" where all results in between are "+" or "++"
derive_vars_joined(
  myd,
  dataset_add = myd,
  by_vars = exprs(subj),
  order = exprs(day),
  mode = "last",
  new_vars = exprs(next_plus_day = day),
  join_vars = exprs(val),
  join_type = "after",
  first_cond_upper = val.join == "++",
  filter_join = val == "0" & all(val.join \%in\% c("+", "++"))
)
}
\seealso{
\code{\link[=derive_var_joined_exist_flag]{derive_var_joined_exist_flag()}}, \code{\link[=filter_joined]{filter_joined()}}

General Derivation Functions for all ADaMs that returns variable appended to dataset:
\code{\link{derive_var_extreme_flag}()},
\code{\link{derive_var_joined_exist_flag}()},
\code{\link{derive_var_merged_ef_msrc}()},
\code{\link{derive_var_merged_exist_flag}()},
\code{\link{derive_var_merged_summary}()},
\code{\link{derive_var_obs_number}()},
\code{\link{derive_var_relative_flag}()},
\code{\link{derive_vars_cat}()},
\code{\link{derive_vars_computed}()},
\code{\link{derive_vars_merged}()},
\code{\link{derive_vars_merged_lookup}()},
\code{\link{derive_vars_transposed}()}
}
\concept{der_gen}
\keyword{der_gen}
