% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_geodist.R
\name{plot_geodist}
\alias{plot_geodist}
\title{Plot euclidean nearest neighbor distances in geographic space or feature space}
\usage{
plot_geodist(
  x,
  modeldomain,
  type = "geo",
  cvfolds = NULL,
  testdata = NULL,
  samplesize = 2000,
  sampling = "regular",
  variables = NULL,
  showPlot = TRUE
)
}
\arguments{
\item{x}{object of class sf, training data locations}

\item{modeldomain}{raster or sf object defining the prediction area (see Details)}

\item{type}{"geo" or "feature". Should the distance be computed in geographic space or in the normalized multivariate predictor space (see Details)}

\item{cvfolds}{optional. List of row indices of x that are held back in each CV iteration. See e.g. ?createFolds or ?createSpaceTimeFolds}

\item{testdata}{optional. object of class sf: Data used for independent validation}

\item{samplesize}{numeric. How many prediction samples should be used? Only required if modeldomain is a raster (see Details)}

\item{sampling}{character. How to draw prediction samples? See \link[sp]{spsample}. Use sampling = "Fibonacci" for global applications.}

\item{variables}{character vector defining the predictor variables used if type="feature. If not provided all variables included in modeldomain are used.}

\item{showPlot}{logical}
}
\value{
A list including the plot and the corresponding data.frame containing the distances
}
\description{
Density plot of nearest neighbor distances in geographic space or feature space between training data as well as between training data and prediction locations.
Optional, the nearest neighbor distances between training data and test data or between training data and CV iterations is shown.
The plot can be used to check the suitability of a chosen CV method to be representative to estimate map accuracy. Alternatively distances can also be calculated in the multivariate feature space.
}
\details{
The modeldomain is a sf polygon or a raster that defines the prediction area. The function takes a regular point sample (amount defined by samplesize) from the spatial extent.
    If type = "feature", the argument modeldomain (and if provided then also the testdata) has to include predictors. Predictor values for x are optional if modeldomain is a raster. If not provided they are extracted from the modeldomain rasterStack.
}
\note{
See Meyer and Pebesma (2022) for an application of this plotting function
}
\examples{
\dontrun{
library(sf)
library(raster)
library(caret)

########### prepare sample data:
dat <- get(load(system.file("extdata","Cookfarm.RData",package="CAST")))
dat <- aggregate(dat[,c("DEM","TWI", "NDRE.M", "Easting", "Northing")],
by=list(as.character(dat$SOURCEID)),mean)
pts <- dat[,-1]
pts <- st_as_sf(pts,coords=c("Easting","Northing"))
st_crs(pts) <- 26911
pts_train <- pts[1:29,]
pts_test <- pts[30:42,]
studyArea <- raster::stack(system.file("extdata","predictors_2012-03-25.grd",package="CAST"))
studyArea = studyArea[[c("DEM","TWI", "NDRE.M", "NDRE.Sd", "Bt")]]

########### Distance between training data and new data:
dist <- plot_geodist(pts_train,studyArea)

########### Distance between training data, new data and test data:
#mapview(pts_train,col.regions="blue")+mapview(pts_test,col.regions="red")
dist <- plot_geodist(pts_train,studyArea,testdata=pts_test)

########### Distance between training data, new data and CV folds:
folds <- createFolds(1:nrow(pts_train),k=3,returnTrain=FALSE)
dist <- plot_geodist(x=pts_train, modeldomain=studyArea, cvfolds=folds)

########### Distances in the feature space:
plot_geodist(x=pts_train, modeldomain=studyArea,
type = "feature",variables=c("DEM","TWI", "NDRE.M"))

dist <- plot_geodist(x=pts_train, modeldomain=studyArea, cvfolds = folds, testdata = pts_test,
type = "feature",variables=c("DEM","TWI", "NDRE.M"))

############ Example for a random global dataset
############ (refer to figure in Meyer and Pebesma 2022)
library(sf)
library(rnaturalearth)
library(ggplot2)

### Define prediction area (here: global):
ee <- st_crs("+proj=eqearth")
co <- ne_countries(returnclass = "sf")
co.ee <- st_transform(co, ee)

### Simulate a spatial random sample
### (alternatively replace pts_random by a real sampling dataset (see Meyer and Pebesma 2022):
sf_use_s2(FALSE)
pts_random <- st_sample(co, 2000)

### See points on the map:
ggplot() + geom_sf(data = co.ee, fill="#00BFC4",col="#00BFC4") +
     geom_sf(data = pts_random, color = "#F8766D",size=0.5, shape=3) +
     guides(fill = FALSE, col = FALSE) +
     labs(x = NULL, y = NULL)

### plot distances:
dist <- plot_geodist(pts_random,co,showPlot=FALSE)
dist$plot+scale_x_log10(labels=round)

}
}
\author{
Hanna Meyer, Edzer Pebesma, Marvin Ludwig
}
