% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preprocessing.R
\name{flow_images_from_dataframe}
\alias{flow_images_from_dataframe}
\title{Takes the dataframe and the path to a directory and generates batches of
augmented/normalized data.}
\usage{
flow_images_from_dataframe(
  dataframe,
  directory = NULL,
  x_col = "filename",
  y_col = "class",
  generator = image_data_generator(),
  target_size = c(256, 256),
  color_mode = "rgb",
  classes = NULL,
  class_mode = "categorical",
  batch_size = 32,
  shuffle = TRUE,
  seed = NULL,
  save_to_dir = NULL,
  save_prefix = "",
  save_format = "png",
  subset = NULL,
  interpolation = "nearest",
  drop_duplicates = TRUE
)
}
\arguments{
\item{dataframe}{\code{data.frame} containing the filepaths relative to
directory (or absolute paths if directory is \code{NULL}) of the images in a
character column. It should include other column/s depending on the
\code{class_mode}:
\itemize{
\item if \code{class_mode} is "categorical" (default value) it must
include the \code{y_col} column with the class/es of each image. Values in
column can be character/list if a single class or list if multiple classes.
\item if \code{class_mode} is "binary" or "sparse" it must include the given
\code{y_col} column with class values as strings.
\item if \code{class_mode} is "other" it
should contain the columns specified in \code{y_col}.
\item if \code{class_mode} is "input" or NULL no extra column is needed.
}}

\item{directory}{character, path to the directory to read images from.
If \code{NULL}, data in \code{x_col} column should be absolute paths.}

\item{x_col}{character, column in dataframe that contains the filenames
(or absolute paths if directory is \code{NULL}).}

\item{y_col}{string or list, column/s in dataframe that has the target data.}

\item{generator}{Image data generator to use for augmenting/normalizing image
data.}

\item{target_size}{Either \code{NULL} (default to original size) or integer vector
\verb{(img_height, img_width)}.}

\item{color_mode}{one of "grayscale", "rgb". Default: "rgb". Whether the
images will be converted to have 1 or 3 color channels.}

\item{classes}{optional list of classes (e.g. \code{c('dogs', 'cats')}. Default:
\code{NULL} If not provided, the list of classes will be automatically inferred
from the \code{y_col}, which will map to the label indices, will be alphanumeric).
The dictionary containing the mapping from class names to class indices
can be obtained via the attribute \code{class_indices}.}

\item{class_mode}{one of "categorical", "binary", "sparse", "input", "other" or None.
Default: "categorical". Mode for yielding the targets:
\itemize{
\item "binary": 1D array of binary labels,
\item "categorical": 2D array of one-hot encoded labels. Supports multi-label output.
\item "sparse": 1D array of integer labels,
\item "input": images identical to input images (mainly used to work with autoencoders),
\item "other": array of y_col data,
\code{NULL}, no targets are returned (the generator will only yield batches of
image data, which is useful to use in  \code{predict_generator()}).
}}

\item{batch_size}{int (default: \code{32}).}

\item{shuffle}{boolean (defaut: \code{TRUE}).}

\item{seed}{int (default: \code{NULL}).}

\item{save_to_dir}{\code{NULL} or str (default: \code{NULL}). This allows you to
optionally specify a directory to which to save the augmented pictures being
generated (useful for visualizing what you are doing).}

\item{save_prefix}{str (default: ''). Prefix to use for filenames of saved
pictures (only relevant if \code{save_to_dir} is set).}

\item{save_format}{one of "png", "jpeg" (only relevant if save_to_dir is
set). Default: "png".}

\item{subset}{Subset of data (\code{"training"} or \code{"validation"}) if
\code{validation_split} is set in \code{\link[=image_data_generator]{image_data_generator()}}.}

\item{interpolation}{Interpolation method used to resample the image if the
target size is different from that of the loaded image. Supported methods
are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 or newer is
installed, "lanczos" is also supported. If PIL version 3.4.0 or newer is
installed, "box" and "hamming" are also supported. By default, "nearest"
is used.}

\item{drop_duplicates}{Boolean, whether to drop duplicate rows based on
filename.}
}
\description{
Takes the dataframe and the path to a directory and generates batches of
augmented/normalized data.
}
\details{
Yields batches indefinitely, in an infinite loop.
}
\note{
This functions requires that \code{pandas} (python module) is installed in the
same environment as \code{tensorflow} and \code{keras}.

If you are using \code{r-tensorflow} (the default environment) you can install
\code{pandas} by running \code{reticulate::virtualenv_install("pandas", envname = "r-tensorflow")}
or \code{reticulate::conda_install("pandas", envname = "r-tensorflow")} depending on
the kind of environment you are using.
}
\section{Yields}{
 \verb{(x, y)} where \code{x} is an array of image data and \code{y} is a
array of corresponding labels. The generator loops indefinitely.
}

\seealso{
Other image preprocessing: 
\code{\link{fit_image_data_generator}()},
\code{\link{flow_images_from_data}()},
\code{\link{flow_images_from_directory}()},
\code{\link{image_load}()},
\code{\link{image_to_array}()}
}
\concept{image preprocessing}
