#' readSegs
#'
#' This function is used to read in the segments. It can either take a file
#' path to a csv to read in the data, or it can take in a long data frame
#' and convert it to the format needed for the pipeline. The variable colMap is
#' used in order to map your column names to what the pipeline expects. For 
#' instance, if your column that has the chromosome numbers in it is titled 
#' "chrom" instead of the expected "chromosome" then you would specify the colMap
#' as c("ID","chrom","start","end","segVal"). If your data is seperated into 
#' major and minor allele copy numbers then for the segVal part of the colMap 
#' should be formatted as "nMajor+nMinor" to let the function know to add them
#' together.
#'
#' @param path The path to the .txt file with the data in it, or a folder
#' containing the .txt files
#' @param colMap The mapping of column names. The default is 
#' c("ID","chromosome","start","end","segVal"). If your column names vary 
#' from this please pass a vector similar to the above with the changes.
#' @param readPloidy Whether or not the input file has ploidy and should be read
#' @export
#' @return Returns a segments in a list formatted to be run through the pipeline
readSegs = function(path, colMap = NULL, readPloidy = FALSE){
  # Checks to make sure the file exists
  if (!(file.exists(path) || dir.exists(path)))
    stop(paste("No file or folder found at",path," Please check for file."))
  
  
  # Checks whether or not the input is a folder and if so it recursively
  # calls the readSegs function and builds the final list
  if (dir.exists(path)) {
    fileNames = dir(path,"*.txt")
    segData = list()
    ploidyVals = numeric
    for (fileName in fileNames){
      segs = readSegs(paste0(path,"/",fileName),colMap, readPloidy)
      if (readPloidy){
        ploidyVals = c(ploidyVals,segs[[2]])
        segData = c(segData,segs[[1]])
      }
      else {
        segData = c(segData,segs)  
      }
    }
    
    if (readPloidy){
      readData = list(segData,ploidyVals)
      return(readData)
    }
    return(segData)
  }
  
  
  # The expected column header
  expected =  c("ID","chromosome","start","end","segVal")
  
  # Add ploidy to the expected 
  if (readPloidy){
    expected = c(expected,"ploidy")
  }
  
  # If a custom mapping is not expected use the default
  if (is.null(colMap))
    colMap = expected
  
  #Check if the mapping has a combination for the segVal
  if (grepl("+",colMap[5],fixed = TRUE)) {
    colMap = append(colMap,strsplit(colMap[5],"+",T)[[1]],4)
    colMap = colMap[-7]
    
    expected = append(expected,paste0(expected[5],"2"),5)
    
    toAdd = T
  } else {
    toAdd = F
  }
  
  # Read in the data
  # Determine if CSV or TSV
  delins = list("\t",","," ")
  separator = ","
  for (delin in delins){
    lineOne = read.table(path,sep=delin,nrows = 1, header=TRUE)
    if (sum(colMap %in% colnames(lineOne)) >= length(colMap)){
      separator = delin
      break
    }
  }

  data = utils::read.table(path, header = TRUE, stringsAsFactors = FALSE, sep = separator)
  
  # Check to make sure all the columns match the expected, and if not give an
  # informative error message
  if (length(colMap) != length(expected)){
    msg = paste("Not enough column mappings given. \n",
                "Need to specify a mapping for each of the following:\n",
                paste(expected,collapse = ", "))
    stop(msg)
  }
  else if (sum(colMap %in% colnames(data)) < length(colMap)){
    msg = paste("Not all columns found. Please check mapping.\n ",
                "Mapping used:",paste(colMap,collapse = ", "),"\n ",
                paste(colMap[!(colMap %in% colnames(data))],collapse = ", ")
                , "columns not found.")
    stop(msg)
  } 
  else if (readPloidy & !(colMap[6] %in% colnames(data))){
    msg = paste("Ploidy data not found in the given data file.\n",
                "Either check the column map for ploidy or \n",
                "consider reading ploidy in seperately and \n",
                "using the ploidyData parameter or addPloidy method.")
    stop(msg)
  }
  
  # Only grabs the needed columns using the column mapping
  data = data[,colMap]
  
  #Checks for rows with NA's and removes them
  rows = dim(data)[1]
  data = data[stats::complete.cases(data),]
  if (dim(data)[1] != rows)
    message("Removed ",(rows-dim(data)[1])," rows that contained NA's.\n")
  
  if (grepl("chr",data[1,2])){
    message("Chromosome numbers formatted as 'chr_'. Removing leading 3 chars.\n")
    data[,2] = sapply(data[,2],function(x){substr(x,4,nchar(x))})
  }
  
  if (toAdd) {
    data[,colMap[5]] = data[,colMap[5]] + data[,colMap[6]]
    colMap[5] = "segVal"
    colMap = colMap[-6]
    colnames(data)[5] = "segVal"
    data = data[,colMap]
  }
  
  #Goes through each of the samples and creates a list of their segments
  sampIDs = unique(data[[colMap[1]]])
  segData = list()
  ploidyVals = numeric()
  for(i in sampIDs)
  {
    tab<-data[data[[colMap[1]]]==i,colMap[2:5]]
    colnames(tab)<- expected[2:5]
    segData[[i]]<-tab
    
    if (readPloidy){
      ploidyData = data[data[[colMap[1]]]==i,colMap[6]]
      ploidyVals = c(ploidyVals,unique(ploidyData))
      names(ploidyVals)[length(ploidyVals)] = i
    }
      
  }
  
  names(segData) = sampIDs
  
  if (readPloidy){
    readData = list(segData,ploidyVals)
    return(readData)
  }
  
  return(segData)
} 
