% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataset-documentation.R
\docType{data}
\name{geco_30over_3err}
\alias{geco_30over_3err}
\title{Simulated Noisy Records}
\format{
A list of 7 data.frames. Each data.frame has 200 rows and 16 columns.
}
\usage{
data(geco_30over_3err)
}
\description{
A dataset containing several files of noisy simulated records. Records are
simulated using GeCo (Tran, Vatsalan, and Cristen (2013)) and organized into
files of 200 records each. The columns in each file consist of two ID columns
for validating links:
}
\details{
\itemize{
\item rec.id. Contains the entity number and duplicate number of each record. This is unique to a record.
\item entity. Contains the entity number of which this record is a copy. Is identical for all records which are noisy duplicates of the same original.
}

The columns also consist of fields used to perform linkage, into which 3 errors
have been randomly inserted:

\itemize{
\item given.name, surname. Text fields with potential typographical errors.
\item age, occup, extra1, ..., extra10. Categorical fields with potential swapped category errors.
}

Linkage may be performed on either the full dataset or on only a subset of the fields.
}
\seealso{
geco_small
}
\keyword{datasets}
