% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.primary.metabolome.R
\name{build.primary.metabolome}
\alias{build.primary.metabolome}
\title{build.primary.metabolome}
\usage{
build.primary.metabolome(
  pc.directory = NULL,
  get.properties = FALSE,
  threads = 8,
  db.name = "primary.metabolome",
  rcdk.desc = c("org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor",
    "org.openscience.cdk.qsar.descriptors.molecular.AcidicGroupCountDescriptor",
    "org.openscience.cdk.qsar.descriptors.molecular.BasicGroupCountDescriptor",
    "org.openscience.cdk.qsar.descriptors.molecular.TPSADescriptor"),
  pubchem.bio.object = NULL,
  output.directory = NULL,
  keep.primary.only = TRUE,
  min.tax.ct = 3
)
}
\arguments{
\item{pc.directory}{directory from which to load pubchem .Rdata files}

\item{get.properties}{logical. if TRUE, will return rcdk calculated properties:  XLogP, TPSA, HBondDonorCount and HBondAcceptorCount.}

\item{threads}{integer. how many threads to use when calculating rcdk properties.  parallel processing via DoParallel and foreach packages.}

\item{db.name}{character. what do you wish the file name for the saved version of this database to be?  default = 'primary.metabolome.'  Saved as an .Rdata file in the 'pc.directory' location.}

\item{rcdk.desc}{vector. character vector of valid rcdk descriptors.  default = rcdk.desc <- c("org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor", "org.openscience.cdk.qsar.descriptors.molecular.AcidicGroupCountDescriptor", "org.openscience.cdk.qsar.descriptors.molecular.BasicGroupCountDescriptor", "org.openscience.cdk.qsar.descriptors.molecular.TPSADescriptor"). To see descriptor categories: 'dc <- rcdk::get.desc.categories(); dc' .  To see the descriptors within one category: 'dn <- rcdk::get.desc.names(dc[4]); dn'. Note that the four default parameters are relatively fast to calculate - some descriptors take a very long time to calculate.  you can calculate as many as you wish, but processing time will increase the more descriptors are added.}

\item{pubchem.bio.object}{R data.table, generally produced by build.pubchem.bio; preferably, define pc.directory}

\item{output.directory}{directory to which the pubchem.bio database is saved.  If NULL, will try to save in pc.directory (if provided), else not saved.}

\item{keep.primary.only}{logical.  If TRUE, only biological metabolites scored as 'primary' are returned. If FALSE, full dataset of metabolites is returned, with new logical column, 'primary'}

\item{min.tax.ct}{integer.  if assigned an integer value, only those metabolites with at least min.tax.ct unique taxonomy assigments are considered 'primary'.  default = 3.}
}
\value{
a data frame containing pubchem CID ('cid'), and lowest common ancestor ('lca') NCBI taxonomy ID integer. will also save to pc.directory as .Rdata file.
}
\description{
utilizes downloaded and properly formatted local pubchem data created by 'get.pubchem.ftp' function to filter a dataset created by 'build.pubchem.bio' function
}
\details{
utilizes downloaded and properly formatted local pubchem data created by 'get.pubchem.ftp' function
}
\author{
Corey Broeckling
data('pubchem.bio', package = "pubchem.bio")
my.primary.db <- build.primary.metabolome(
pubchem.bio.object = pubchem.bio,
get.properties = FALSE, threads = 1)
head(my.taxon.db)
}
