#' Phylogeny generating
#'
#' Generates a phylogeny from a \code{sim} object containing speciation and 
#' extinction times, parent and status information (see \code{?sim}). Returns a
#' \code{phylo} object containing information on the phylogeny, following an 
#' "evolutionary Hennigian" (sensu Ezard et al 2011) format (i.e., a 
#' bifurcating tree). Takes an optional argument encoding fossil occurrences to
#' return a sampled ancestor tree (see references). This tree consists of the
#' original tree, plus the fossil occurrences added as branches of length 
#' \code{0} branching off of the corresponding species at the time of 
#' occurrence. Such trees can be used, as is or with small modifications, as
#' starting trees in phylogenetic inference software that make use of the
#' fossilized birth-death model. Returns \code{NA} and sends a warning if the
#' simulation has only one lineage or if more than one species has \code{NA}
#' as parent (i.e. there is no single common ancestor in the simulation). In 
#' the latter case, please use \code{find.lineages} first. 
#'
#' @param sim A \code{sim} object, containing extinction times, speciation times,
#' parent, and status information for each species in the simulation. See 
#' \code{?sim}.
#' 
#' @param fossils A data frame with a \code{"Species"} column and a
#' \code{SampT} column, usually an output of the \code{sample.clade}
#' function. Species names must contain only one number each, corresponding
#' to the order of the \code{sim} vectors.
#'
#' @param returnRootTime Logical indicating if phylo should have information
#' regarding \code{root.time}. If set to \code{NULL} (default), returned 
#' phylogenies will not have \code{root.time} if there is at least one extant 
#' lineage in the sim object. If there are only extinct lineages in the 
#' \code{sim} object and it is set to \code{NULL}, \code{root.time} will be 
#' returned. If set to \code{FALSE} or \code{TRUE}, \code{root.time} will be 
#' removed or forced into the phylo object, respectively. In this case, we 
#' highly recommend users to read about the behavior of some functions (such as
#' APE's \code{axisPhylo}) when this argument is forced.
#'
#' @details When \code{root.time} is added to a phylogeny, packages such as APE
#' can change their interpretation of the information in the \code{phylo} 
#' object. For instance, a completely extinct phylogeny might be interpreted as
#' extant if there is no info about \code{root.time}. This might create 
#' misleading interpretations even with simple functions such as 
#' \code{ape::axisPhylo}. \code{make.phylo} tries to accommodate different 
#' evo/paleo practices in its default value for \code{returnRootTime} by 
#' automatically attributing \code{root.time} when the \code{sim} object is 
#' extinct. We encourage careful inspection of output if users force 
#' \code{make.phylo} to use a specific behavior, especially when using 
#' phylogenies generated by this function as input in functions from other 
#' packages. For extinct phylogenies, it might usually be important to 
#' explicitly provide information that the edge is indeed a relevant part of 
#' the phylogeny (for instance adding \code{root.edge = TRUE} when plotting a 
#' phylogeny with \code{root.time} information with \code{ape::plot.phylo}. The
#' last example here provides  a visualization of this issue.
#' 
#' @return A \code{phylo} object from the APE package. Tip labels are numbered
#' following the order of species in the \code{sim} object. If fossil 
#' occurrence data was supplied, the tree will include fossil occurrences as 
#' tips with branch length \code{0}, bifurcating at its sampling time from the 
#' corresponding species' edge (i.e. a sampled ancestor tree). Note that to 
#' obtain a true sampled ancestor (SA) tree, one must perform the last step of 
#' deleting tips that are not either extant or fossil occurrences (i.e. the 
#' tips at true time of extinction). 
#' 
#' Note this package does not depend on APE (Paradis et al, 2004) since it is 
#' never used inside its functions, but it is suggested since one might want to
#' manipulate the phylogenies generated by this function.
#' 
#' @author Matheus Januario and Bruno do Rosario Petrucci
#' 
#' @references
#' 
#' Ezard, T. H., Pearson, P. N., Aze, T., & Purvis, A. (2012). The meaning of 
#' birth and death (in macroevolutionary birth-death models). Biology letters, 
#' 8(1), 139-142.
#' 
#' Paradis, E., Claude, J., Strimmer, & K. (2004). APE: Analyses of Phylogenetics
#' and Evolution in R language. Bioinformatics, 20(2), 289-290.
#' 
#' Heath, T. A., Huelsenbeck, J. P., & Stadler, T. (2014). The fossilized 
#' birth–death process for coherent calibration of divergence-time estimates. 
#' Proceedings of the National Academy of Sciences, 111(29), E2957-E2966.
#'
#' @examples
#'
#' ###
#' # we can start with a simple phylogeny
#' 
#' # set a simulation seed
#' set.seed(1) 
#' 
#' # simulate a BD process with constant rates
#' sim <- bd.sim(n0 = 1, lambda = 0.3, mu = 0.1, tMax = 10, 
#'              nExtant = c(2, Inf))
#' 
#' # make the phylogeny
#' phy <- make.phylo(sim)
#' 
#' # plot it
#' if (requireNamespace("ape", quietly = TRUE)) {
#'   # store old par settings
#'   oldPar <- par(no.readonly = TRUE) 
#'   
#'   # change par to show phylogenies
#'   par(mfrow = c(1, 2))
#'   
#'   ape::plot.phylo(phy)
#'   
#'   # we can also plot only the molecular phylogeny
#'   ape::plot.phylo(ape::drop.fossil(phy))
#'   
#'   # reset par
#'   par(oldPar)
#' }
#' 
#' ###
#' # this works for sim generated with any of the scenarios in bd.sim
#' 
#' # set seed
#' set.seed(1)
#' 
#' # simulate
#' sim <- bd.sim(n0 = 1, lambda = function(t) 0.2 + 0.01*t, 
#'              mu = function(t) 0.03 + 0.015*t, tMax = 10, 
#'              nExtant = c(2, Inf))
#' 
#' # make the phylogeny
#' phy <- make.phylo(sim)
#' 
#' # plot it
#' if (requireNamespace("ape", quietly = TRUE)) {
#'   # store old par settings
#'   oldPar <- par(no.readonly = TRUE) 
#'   
#'   # change par to show phylogenies
#'   par(mfrow = c(1, 2))
#'   
#'   # plot phylogeny
#'   ape::plot.phylo(phy)
#'   ape::axisPhylo()
#'   
#'   # we can also plot only the molecular phylogeny
#'   ape::plot.phylo(ape::drop.fossil(phy))
#'   ape::axisPhylo()
#'   
#'   # reset par 
#'   par(oldPar)
#' }
#' 
#' ### 
#' # we can use the fossils argument to generate a sample ancestors tree
#' 
#' # set seed
#' set.seed(1)
#' 
#' # simulate a simple birth-death process
#' sim <- bd.sim(n0 = 1, lambda = 0.2, mu = 0.05, tMax = 10, 
#'               nExtant = c(2, Inf))
#' 
#' # make the traditional phylogeny
#' phy <- make.phylo(sim)
#' 
#' # sample fossils
#' fossils <- sample.clade(sim, 0.1, 10)
#' 
#' # make the sampled ancestor tree
#' saTree <- make.phylo(sim, fossils)
#' 
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#'   # store old par settings
#'   oldPar <- par(no.readonly = TRUE) 
#'   
#'   # visualize longevities and fossil occurrences
#'   draw.sim(sim, fossils)
#'   
#'   # change par to show phylogenies
#'   par(mfrow = c(1, 2))
#' 
#'   # phylogeny
#'   ape::plot.phylo(phy, main = "Phylogenetic tree")
#'   ape::axisPhylo()
#'   
#'   # sampled ancestor tree
#'   ape::plot.phylo(saTree, main = "Sampled Ancestor tree")
#'   ape::axisPhylo()
#'   
#'   # reset par
#'   par(oldPar)
#' }
#' 
#' ### 
#' # finally, we can test the usage of returnRootTime
#' 
#' # set seed
#' set.seed(1)
#' 
#' # simulate a simple birth-death process with more than one
#' # species and completely extinct:
#' sim <- bd.sim(n0 = 1, lambda = 0.5, mu = 0.5, tMax = 10, nExtant = c(0, 0))
#' 
#' # make a phylogeny using default values
#' phy <- make.phylo(sim)
#' 
#' # force phylo to not have root.time info
#' phy_rootless <- make.phylo(sim, returnRootTime = FALSE)
#' 
#' # plot them
#' if (requireNamespace("ape", quietly = TRUE)) {
#'   # store old par settings
#'   oldPar <- par(no.readonly = TRUE) 
#'   
#'   # change par to show phylogenies
#'   par(mfrow = c(1, 3))
#'   
#'   # if we use the default value, axisPhylo works as intended
#'   ape::plot.phylo(phy, root.edge = TRUE, main = "root.time default value")
#'   ape::axisPhylo()
#'   
#'   # note that without root.edge, we have incorrect times,
#'   # as APE assumes tMax was the time of first speciation
#'   ape::plot.phylo(phy, main = "root.edge not passed to plot.phylo")
#'   ape::axisPhylo()
#'   
#'   # if we force root.time to be FALSE, APE assumes the tree is
#'   # ultrametric, which leads to an incorrect time axis
#'   ape::plot.phylo(phy_rootless, main = "root.time forced as FALSE")
#'   ape::axisPhylo()
#'   # note time scale in axis
#'   
#'   # reset par
#'   par(oldPar)
#' }
#' 
#' @name make.phylo
#' @rdname make.phylo
#' @export

make.phylo <- function(sim, fossils = NULL, returnRootTime = NULL) {
  # check that sim is a valid sim object
  if (!is.sim(sim)) {
    stop("Invalid argument, must be a sim object. See ?sim")
  }
  
  # simulations with just one species do not have a phylogeny
  if (length(sim$TE) < 2) {
    message("There is no phylogeny for a simulation with only one lineage")
    return(NA)
  }

  # simulations with more than one starting species have multiple phylogenies
  if (sum(is.na(sim$PAR)) > 1) {
    message("Multiple starting species. Use function find.lineages")
    return(NA)
  }
  
  # if fossils are provided, make a sampled ancestor tree instead
  if (!is.null(fossils)) {
    
    if (nrow(fossils) == 0) {
      stop("Please insert a data frame containig fossil data. 
           See ?make.phylo for more information.")
    }
    
    # make Species field of fossils numeric
    fossils$Species <- as.numeric(gsub('.([0-9]+)*','\\1', fossils$Species))
    
    # get a list of sample species
    sampledSpecies <- unique(c(which(sim$EXTANT), fossils$Species))
    
    # if any of them are not in the sim range, error
    if (any(!(sampledSpecies %in% 1:length(sim$TE)))) {
      stop("Sampled species must all be in sim")
    }
    
    # start the vector with names
    names <- paste0("t", 1:length(sim$TS))
    sampledNames <- paste0("t", which(sim$EXTANT))
    
    # fossil count for each species
    count <- 1
    
    # previous fossil species starting count
    prevFossil <- 0
    
    # changes to make this part easier
    sim$PAR[is.na(sim$PAR)] <- sim$TE[sim$EXTANT] <- 0
    
    # data frame with current and new species numbers
    numbers <- data.frame(cur = 1:length(sim$TS), orig = 1:length(sim$TS))
    # need this to have correct naming conventions
    
    # for each fossil occurrence
    for (i in 1:nrow(fossils)) {
      # change count as needed
      if (fossils[i, ]$Species == prevFossil) {
        count <- count + 1
      } else {
        count <- 1
        prevFossil <- fossils[i, ]$Species
      }
      
      # take fossil sampling time
      sampT <- fossils[i, ]$SampT
      
      # and species number
      nSp <- fossils[i, ]$Species
      
      # if sampT is out of the time nSp was alive, error
      if ((sampT > sim$TS[nSp]) || (sampT < sim$TE[nSp])) {
        stop("Fossil occurrences must fall during corresponding species' period")
      }
      
      # first we need to find the position of the fossil on sim
      
      # get daughters of nSp
      daug <- sim$PAR == nSp
      
      # if nSp does not have daughters
      if (sum(daug) == 0) {
        pos <- max(which(sim$PAR < nSp)) + 1
      } else if (sampT > max(sim$TS[daug])) {
        # if sampT is higher than the age of all other daughters of nSp,
        # make it the first daughter
        pos <- min(which(daug))
      } else {
        # if there are daughters with a higher age, it will be younger than those
        pos <- which(sim$TS == min(sim$TS[daug][sim$TS[daug] > sampT])) + 1
      }
      
      # before and after this position
      before <- if (pos == 0) 0 else 1:(pos - 1)
      after <- if (pos > length(sim$PAR)) 0 else pos:length(sim$PAR)
      
      # get parents that need to be changes
      changePAR <- sim$PAR >= pos
      previousPars <- sim$PAR[changePAR]
      numbers[numbers$cur >= pos, 1] <- numbers[numbers$cur >= pos, 1] + 1 
      
      # change corresponding parents
      sim$PAR[changePAR] <- sim$PAR[changePAR] + 1
      
      # add new parent to the vector
      sim$PAR <- c(sim$PAR[before], nSp, sim$PAR[after])
      
      # and change the easy sim elements
      sim$TS <- c(sim$TS[before], sampT, sim$TS[after])
      sim$TE <- c(sim$TE[before], sampT, sim$TE[after])
      sim$EXTANT <- c(sim$EXTANT[before], FALSE, sim$EXTANT[after])
      
      # find original species number
      nSpOrig <- numbers[numbers$cur == nSp, 2]
      
      # add new name
      newName <- paste0("t", nSpOrig, ".", count)
      names <- c(names[before], newName, names[after])
      
      # add to sampled names
      sampledNames <- c(sampledNames, newName)
      
      # change fossil species names as needed
      fossils$Species <- ifelse(fossils$Species < pos,
                                fossils$Species,
                                fossils$Species + 1)
    }
    
    # change back
    sim$PAR[sim$PAR == 0] <- sim$TE[sim$EXTANT] <- NA
    
  }
  
  #construct the phylogeny:
  
  # make TE sensible
  sim$TE[sim$EXTANT] <- 0
  
  # aux function
  all.dir.daughter <- function(lin, x) {
    # all.dir.daughters returns the name of each direct daughter species
    # x = a simulation from paleobuddy
    # lin = a numeric specyfing the name of a lineage
    return(which(x$PAR == lin))
  }

  # current node
  curNode <- length(sim$TE) + 1 
  
  # create the edge matrix
  edge <- matrix(nrow = 1, ncol = 2, data = c(curNode, NA)) 
  
  # lineages which the function already put in the phylogeny
  passed <- vector()
  
  # current lineage
  i <- 2 
  
  # lineages which the function still has to solve (at least)
  lins <- c(1, 2)
  
  # internal variable to help control the node function
  jump <- 0 
  
  # number of nodes in the phylogeny
  nNode <- length(sim$TE) - 1
  
  # vector storing the node corresponding to each birth
  birthsNode <- rep(NA, times = length(sim$TE)) 
  birthsNode[2] <- curNode
  
  # needed for debugging
  counter <- 0

  # while some tip does not have a place in the phylogeny
  while (length(lins) > 0) {
    # find daughters
    dau <- all.dir.daughter(lin = i, x = sim)
    dau <- dau[!(dau %in% passed)]

    # if lineage has daughters
    if (is.numeric(dau) & length(dau) > 0) {

      # if a whole clade has very recently been put in the phylogeny
      if (jump == 1) {
        curNode <- max(edge) + 1

        # append it to the edge matrix
        if (is.na(edge[nrow(edge), 2])) {
          # if there is no edge there currently
          edge[nrow(edge), 2] <- curNode
        } else {
          # if there is
          edge <- rbind(edge,
                        matrix(nrow = 1, ncol = 2, 
                               data = c(prevNode, curNode)))
        }
        
        # update birthsNode
        birthsNode[dau[1]] <- curNode
        
        # update jump
        jump <- 0

      # if the current lineage is a non-monophyletic branch
      } else { 
        # update curNode
        curNode <- curNode + 1
        
        # append to edge matrix, as above
        if (is.na(edge[nrow(edge), 2])) {
          edge[nrow(edge), 2] <- curNode
        } else {
          edge <- rbind(edge,
                        matrix(nrow = 1, ncol = 2, 
                               data = c(curNode - 1, curNode)))
        }
        
        # update birthsNode
        birthsNode[dau[1]] <- curNode
      }

      # update edge
      edge <- rbind(edge,
                    matrix(nrow = 1, ncol = 2, data = c(curNode, NA)))
      
      # update lineage list and current lineage
      lins <- c(lins, dau[1])
      i <- lins[length(lins)]
    }

    # if lineage has no daughters
    if (is.numeric(dau) & length(dau) == 0) {

      # append lineage to the edge matrix
      if (is.na(edge[nrow(edge), 2])) {
        
        # if there is no edge there currently
        edge[nrow(edge), 2] <- i
      } 
      
      else {
        # if there is
        edge <- rbind(edge, 
                    matrix(nrow = 1, ncol = 2, 
                           data = c(max(
                             edge[!(duplicated(edge[,1]) | 
                                duplicated(edge[,1], fromLast = TRUE)), 1]), i)))
      }
      
      # we put the lineage on the phylogeny
      passed <- c(passed, i)
      
      # update lineage list and current lineage
      lins <- lins[-length(lins)]
      i <- lins[length(lins)]
    }
    
    # this means that the function reached the end of the lineage of the curNode
    if (sum(edge[, 1] %in% curNode) > 1) {
      # the warning here only "affects" a a condition which is never satisfied
      # (jump when there is previous opened edge).
      suppressWarnings(
        {prevNode <- 
          max(edge[!(duplicated(edge[, 1]) | 
                       duplicated(edge[, 1], fromLast = TRUE)), 1])})
      
      # update jump
      jump <- 1
    }

    # registering bugs (if any)
    counter <- counter + 1
    
    # if the function ran for too long
    if (counter > 10*dim(edge)[1]) {
      return("The function is lost and seems that it will not find a phylogeny.
             Please report this error and provide this simulation for debugging")}

  }

  # calculating edge length
  edgeLength <- vector()
  for (i in 1:nrow(edge)) {
    # make auxiliary variables
    aux1 <- edge[i, 1]
    aux2 <- edge[i, 2]

    # if the branch is a tip
    if (aux2 <= length(sim$TE)) {
      # calculate length
      edgeLength[i] <- sim$TS[which(birthsNode == aux1)] - sim$TE[aux2]
    } else {
      # calculate length
      edgeLength[i] <- sim$TS[which(birthsNode == aux1)] -
                        sim$TS[which(birthsNode == aux2)]
    }

  }

  # Tyding all together to create the phylo object
  phy <- list(tip.label = paste0("t", 1:length(sim$TE)),
    edge = edge, 
    edge.length = edgeLength, 
    Nnode = nNode, 
    root.edge = sim$TS[1] - sim$TS[2])
  
  
  # if user does not force root.time
  if(is.null(returnRootTime)) {
    # if there are no extinct species, set root time to
    # origin of the simulation so phylo axis are not wrong
    if (sum(sim$EXTANT) < 1) {
        phy$root.time <- sim$TS[1]
    }
  } else {
    # otherwise, return root time if user asked for it
    if (returnRootTime) {
      phy$root.time <- sim$TS[1]
    }
  }
  
  
  phy$node.label <- seq(from = length(sim$TE) + 1, 
                        to = length(sim$TE) + 1 + nNode)
  
  # if fossils are provided
  if(!is.null(fossils)){
    # alter names to label occurrences
    phy$tip.label <- names
  }
  
  class(phy) <- "phylo"
  
  return(phy)
}
