% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bipartite.R
\name{bipartiteRL}
\alias{bipartiteRL}
\title{Perform baseline bipartite record linkage before streaming updates}
\usage{
bipartiteRL(
  df1,
  df2,
  flds = NULL,
  flds1 = NULL,
  flds2 = NULL,
  types = NULL,
  breaks = c(0, 0.25, 0.5),
  nIter = 1000,
  burn = round(nIter * 0.1),
  a = 1,
  b = 1,
  aBM = 1,
  bBM = 1,
  seed = 0
)
}
\arguments{
\item{df1, df2}{Files 1 and 2 as dataframes where each row is a record and
each column is a field.}

\item{flds}{Names of the fields on which to compare the records in each file}

\item{flds1, flds2}{Allows specifying field names differently for each file.}

\item{types}{Types of comparisons to use for each field}

\item{breaks}{Breaks to use for Levenshtein distance on string fields}

\item{nIter, burn}{MCMC run length parameters. The returned number of samples
is nIter - burn.}

\item{a, b}{Prior parameters for m and u, respectively.}

\item{aBM, bBM}{Prior parameters for beta-linkage prior.}

\item{seed}{Random seed to set at beginning of MCMC run}
}
\value{
A list with class "bstrlstate" which can be passed to future streaming
updates.
}
\description{
This function establishes a baseline linkage between two files which can be
built upon with streaming updates adding more files. It outsources the linkage
work to the BRL package and appends information to the object which will allow
streaming record linkage to continue
}
\examples{
data(geco_small)

# Names of the columns on which to perform linkage
fieldnames <- c("given.name", "surname", "age", "occup",
                "extra1", "extra2", "extra3", "extra4", "extra5", "extra6")

# How to compare each of the fields
# First name and last name use normalized edit distance
# All others binary equal/unequal
types <- c("lv", "lv",
           "bi", "bi", "bi", "bi", "bi", "bi", "bi", "bi")
# Break continuous difference measures into 4 levels using these split points
breaks <- c(0, 0.25, 0.5)

res.twofile <- bipartiteRL(geco_small[[1]], geco_small[[2]],
                           flds = fieldnames, types = types, breaks = breaks,
                           nIter = 10, burn = 5, # Very small number of samples
                           seed = 0)

}
