% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/listGenomes.R
\name{listGenomes}
\alias{listGenomes}
\title{List All Available Genomes}
\usage{
listGenomes(kingdom = "all", details = FALSE, update = FALSE,
  database = "all")
}
\arguments{
\item{kingdom}{a character string specifying a potential filter of available genomes: "all","Archaea", "Bacteria", "Eukaryota", "Viroids", "Viruses".}

\item{details}{a boolean value specifying whether only the scientific names of stored genomes shall be returned
(details = FALSE) or all information such as "organism_name","kingdoms", "group","subgroup","file_size_MB",
"chrs","organelles","plasmids", and "bio_projects".}

\item{update}{a logical value specifying whether or not the available organism table shall be updated from the NCBI server.
Default is \code{update} = \code{FALSE}.}

\item{database}{a character string specifying the database for which genome availability shall be checked, 
e.g. \code{database} =  \code{"refseq"} or \code{database} =  \code{"all"}.}
}
\value{
A data.frame storing either the organism names (details = FALSE)
or all information present on the NCBI database (details = TRUE).
}
\description{
This function retrieves the names of all genomes available on the NCBI ftp:// server and stores
the results in a file named 'overview.txt' inside the directory '_ncbi_downloads' that
is built inside the workspace.
}
\details{
Internally this function loads the the overview.txt file from NCBI:
\url{ftp://ftp.ncbi.nlm.nih.gov/genomes/GENOME_REPORTS/} and creates a directory '_ncbi_downloads' in the \code{temdir()}
folder to store the overview.txt file for future processing. In case the overview.txt file already exists within the
'_ncbi_downloads' folder and is accessible within the workspace, no download process will be performed again.
}
\note{
Please note that the ftp:// connection relies on the NCBI server and cannot be
accurately accessed via a proxy.
}
\examples{
\dontrun{

# the simplest way to retrieve all names of genomes stored within NCBI databases
head(listGenomes() , 5)

# show all details
head(listGenomes(details = TRUE) , 5)

# show all details only for Bacteria
head(listGenomes(kingdom = "Bacteria", details = TRUE) , 5)

# in case you are interested in the number of genomes available for each kingdom, run:

ncbi_genomes <- listGenomes(details = TRUE)
table(ncbi_genomes[ , "kingdoms"])

# analogous, if you are interested in the number of genomes available for each group, run:
ncbi_genomes <- listGenomes(details = TRUE)
table(ncbi_genomes[ , "group"])

# for subgroup
table(ncbi_genomes[ , "subgroup"])

# you can also limit your search to the refseq database
head(listGenomes(database = "refseq") , 20)

head(listGenomes(details=TRUE, database = "refseq") , 5)

head(listGenomes(kingdom = "Eukaryota", details = TRUE,database = "refseq") , 5)

# order by file size
library(dplyr)
head(arrange(ncbi_genomes, desc(file_size_MB)) , 5)


# you can also update the organism table using the 'update' argument
head(listGenomes(details = TRUE,update = TRUE) , 5)

}

}
\author{
Hajk-Georg Drost
}
\references{
\url{ftp://ftp.ncbi.nlm.nih.gov/genomes/GENOME_REPORTS/overview.txt}
}

