if(!require("QuantumClone")){
  if(!require(devtools)){
    install.packages("devtools")
    library("devtools")
  }
  install_github("DeveauP/QuantumClone")
}
if(!require("sciClone")){
  if(!require(devtools)){
    install.packages("devtools")
    library("devtools")
  }
  install_github("genome/sciclone")
  
}
arg <- commandArgs()
print(arg)
range_clones<-eval(parse(text=arg[2])) #number of clones in the samples
range_mutations<-eval(parse(text=arg[3])) # number of mutations


range_ploidy<-as.character(arg[4])
print(range_ploidy)

range_samples<-eval(parse(text=arg[5]))
range_depth<-eval(parse(text=arg[6]))
range_contamination<-eval(parse(text=arg[7]))
number_of_tests<-eval(parse(text=arg[8]))


cnum<-function(x){as.numeric(as.character(x))}
################################
###########Start comparison#####
################################
###########Define functions#####
################################

strcount<-function(x,pattern='',split=''){
  unlist(lapply(strsplit(x,split),function(z) na.omit(length(grep(pattern,z)))))
}


Compare_to_sciClone<-function(number_of_tests,contamination = 0,number_of_clones = 4,number_of_mutations = 100,number_of_samples = 2,depth = 100,ploidy = "AB"){
  whole.data<-list()
  result<-data.frame()
   maxit<-4
#   corr_BIC<-rep(2:10,each = maxit)*log(number_of_mutations)
  
  if(grepl(pattern = "ploidy",x = ploidy)){
    #QC_algs<-c("QCloned","QCloned.flash","QCloned.flash.DEoptim","QCloned.integrate","flash","flash.BIC","flash.AIC")
    QC_algs<-c("QCloned","QCloned.flash","QCloned.flash.DEoptim","flash","flash.BIC","flash.AIC")
    
    indice_algs<-c(QC_algs,"fpc","QC.single")
    
  }
  else{
    #QC_algs<-c("QCloned","QCloned.flash","QCloned.flash.DEoptim","QCloned.integrate","flash","flash.BIC","flash.AIC")
     QC_algs<-c("QCloned","QCloned.flash","QCloned.flash.DEoptim","flash","flash.BIC","flash.AIC")
    
    indice_algs<-c(QC_algs,"fpc","sciclone")
    
  }
  #BIC_seq<-seq(from = -1, to = 20, by =1) 
  for(i in 1:number_of_tests){
    print(i)
    
    set.seed(122+i) ### Moved in reaction to NbClust setting seed
    
    if(!grepl(pattern="ploidy",x = ploidy)){ # if tests are not on ploidy run sciClone
      start.data<-QuantumCat(number_of_clones = number_of_clones,number_of_mutations = number_of_mutations,
                             ploidy = ploidy,depth = depth,number_of_samples = number_of_samples,contamination = rep(contamination, times = number_of_samples))
      whole.data<-c(whole.data,start.data)
      sci.data<-Format_to_sciClone(start.data,contamination = contamination)
      
      ptm<-proc.time()
      sciCloned<-sciClone(vafs = sci.data,sampleNames = as.character(1:number_of_samples),maximumClusters = 10,minimumDepth = 0)
      sci.elapsed<-sum((proc.time()-ptm)[1:3])
      
      ptm<-proc.time()
      QCloned.flash<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                         Initializations = maxit,preclustering = "FLASH",save_plot = FALSE,
                                         nclone_range=2:10,ncores = 4, keep.all.models = TRUE,
                                         model.selection = "BIC",	
					 optim = "default")
      QC.flash.elapsed<-sum((proc.time()-ptm)[1:3])
      
      ptm<-proc.time()
      QCloned<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                   Initializations = maxit,preclustering = "FLASH",save_plot = FALSE,
                                   nclone_range=2:10,ncores = 4, keep.all.models = TRUE,
                                   model.selection = "BIC",
				   optim = "compound")
      QC.elapsed<-sum((proc.time()-ptm)[1:3])
      
      ptm<-proc.time()
      QCloned.flash.DEoptim<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                                Initializations = maxit,preclustering = "FLASH",save_plot = FALSE,
                                                nclone_range=2:10,ncores = 4,keep.all.models = TRUE,
                                                model.selection = "BIC",
                                                optim = "DEoptim")
      QC.flash.DEoptim.elapsed<-sum((proc.time()-ptm)[1:3])
      
    
      #FLASH Clustering
      ptm<-proc.time()
      flash<-FlashQC(start.data,conta = rep(contamination,times = number_of_samples), Nclus = 2:10)
      flash.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      flash.BIC<-FlashQC(start.data,conta = rep(contamination,times = number_of_samples), 
                         Nclus = 2:10,model.selection = "BIC")
      flash.BIC.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      flash.AIC<-FlashQC(start.data,conta = rep(contamination,times = number_of_samples), 
                         Nclus = 2:10,model.selection = "AIC")
      flash.AIC.elapsed<-sum((proc.time()-ptm)[1:3])
      
      
    }
    else{ # run QC with single possibility
      ##################################
      ## OVERDIPLOID GENOMES ###########
      ##################################
      range_ploidy<-as.numeric(as.character(strsplit(x = ploidy, split = ":",fixed = TRUE)[[1]][2]))
      if(is.na(range_ploidy)) range_ploidy<-as.character(strsplit(x = ploidy, split = ":",fixed = TRUE)[[1]][2])
      
      start.data<-QuantumCat(number_of_clones = number_of_clones,number_of_mutations = number_of_mutations,
                             ploidy = range_ploidy,depth = depth,number_of_samples = number_of_samples,contamination = rep(contamination, times = number_of_samples))
      whole.data<-c(whole.data,start.data)
      ptm<-proc.time()
      
      QCloned.single<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                          Initializations = maxit,preclustering = "FLASH",save_plot = FALSE,
                                          nclone_range=2:10,ncores = 4,force.single.copy = TRUE,
                                          model.selection = "BIC")
      QCloned.single.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      
      #Normal
      QCloned<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                   Initializations = maxit,preclustering = "kmedoid",save_plot = FALSE,
                                   nclone_range=2:10,ncores = 4,keep.all.models = TRUE,
                                   model.selection = "BIC",
				   optim = "compound")
      QC.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      #Flash init
      QCloned.flash<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                         Initializations = 4,preclustering = "FLASH",save_plot = FALSE,
                                         nclone_range=2:10,ncores = 4,keep.all.models = TRUE,
                                         model.selection = "BIC",
					 optim = "default")
      QC.flash.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      #Flash + DEoptim
      QCloned.flash.DEoptim<-One_step_clustering(SNV_list = start.data,contamination = rep(contamination,times = number_of_samples),
                                                Initializations = 4,preclustering = "FLASH",save_plot = FALSE,
                                                nclone_range=2:10,ncores = 4,keep.all.models = TRUE,
                                                model.selection = 6,
                                                optim = "DEoptim")
      QC.flash.DEoptim.elapsed<-sum((proc.time()-ptm)[1:3])
     
      
      ### FLASH Clustering
      ptm<-proc.time()
      flash<-FlashQC(start.data,conta = rep(contamination,times = number_of_samples), Nclus = 2:10)
      flash.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      flash.BIC<-FlashQC(start.data,conta = rep(contamination,times = number_of_samples), 
                         Nclus = 2:10,model.selection = "BIC")
      flash.BIC.elapsed<-sum((proc.time()-ptm)[1:3])
      ptm<-proc.time()
      flash.AIC<-FlashQC(start.data,conta = rep(contamination,times = number_of_samples), 
                         Nclus = 2:10,model.selection = "AIC")
      flash.AIC.elapsed<-sum((proc.time()-ptm)[1:3])
    }
    ptm<-proc.time()
    
    df_fpc<-matrix(0,number_of_mutations,number_of_samples)
    for(j in 1:length(start.data)){
      df_fpc[,j]<-start.data[[j]]$Alt/(start.data[[j]]$Depth)
    }
    ptm<-proc.time()
    fpc.res<-fpc::pamk(data=df_fpc,krange = 2:10)
    fpc.elapsed<-sum((proc.time()-ptm)[1:3])
    

      select_QC<-numeric(length = length(QCloned))
      select_QC.flash<-numeric(length = length(QCloned.flash))
      select_QC.flash.DEoptim<-numeric(length = length(QCloned.flash.DEoptim))
      for(crit in 1:length(QCloned)){
      	select_QC[crit]<-QCloned[[crit]]$Crit


      }
      for(crit in 1:length(QCloned.flash)){
      	select_QC.flash[crit]<-QCloned.flash[[crit]]$Crit
      }
      for(crit in 1:length(QCloned.flash.DEoptim)){
      	select_QC.flash.DEoptim[crit]<-QCloned.flash.DEoptim[[crit]]$Crit
      }
      select_QC<-which.min(select_QC) 
      select_QC.flash<-which.min(select_QC.flash)
      select_QC.flash.DEoptim<-which.min(select_QC.flash.DEoptim)


    #### NMI calculation for sciClone ####
    if(!grepl(pattern="ploidy",x = ploidy)){
      clus<-sciCloned@vafs.merged$cluster
      P_cluster<-table(clus[clus>0])/length(clus[clus>0])
      P_clone<-table(sciCloned@vafs.merged$chr[clus>0])/length(sciCloned@vafs.merged$chr[clus>0])
      H_clone<--sum(P_clone*log(P_clone))
      H_cluster<--sum(P_cluster*log(P_cluster))
      A<-aggregate(rep(1, times = length(clus[clus>0])), by = list(x=clus[clus>0],y=sciCloned@vafs.merged$chr[clus>0] ), sum)
      L<-log(A[,3]/(length(clus[clus>0])*P_cluster[A[,1]]*P_clone[A[,2]]))
      NMI_sci<-2*sum(A[,3]/length(clus[clus>0])*L)/(H_clone+H_cluster)
      


      indices<-c(
        unlist(Precision_Recall(hx = QCloned[[select_QC]]$cluster,
				Truth = QCloned[[select_QC]]$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = QCloned.flash[[select_QC.flash]]$cluster,
				Truth = QCloned.flash[[select_QC]]$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = QCloned.flash.DEoptim[[select_QC.flash.DEoptim]]$cluster,
				Truth = QCloned.flash.DEoptim[[select_QC]]$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = flash$cluster,Truth = flash$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = flash.BIC$cluster,Truth = flash$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = flash.AIC$cluster,Truth = flash$filtered.data[[1]]$Chr)),
        ### fpc
        unlist(Precision_Recall(hx = fpc.res$pamobject$cluster,Truth = start.data[[1]]$Chr)),
        ### sciClone
        unlist(Precision_Recall(hx = clus[clus>0],Truth = sciCloned@vafs.merged$chr[clus>0]))
      )
    }
    else{
      NMI_QC.single<-Compute_NMI(QCloned.single)
      
      indices<-c(
        unlist(Precision_Recall(hx = QCloned[[select_QC]]$cluster,
				Truth = QCloned[[select_QC]]$filtered.data[[1]]$Chr)),

        unlist(Precision_Recall(hx = QCloned.flash[[select_QC.flash]]$cluster,
				Truth = QCloned.flash[[select_QC.flash]]$filtered.data[[1]]$Chr)),

        unlist(Precision_Recall(hx = QCloned.flash.DEoptim[[select_QC.flash.DEoptim]]$cluster,
				Truth = QCloned.flash.DEoptim[[select_QC.flash.DEoptim]]$filtered.data[[1]]$Chr)),

        unlist(Precision_Recall(hx = flash$cluster,Truth = flash$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = flash.BIC$cluster,Truth = flash$filtered.data[[1]]$Chr)),
        unlist(Precision_Recall(hx = flash.AIC$cluster,Truth = flash$filtered.data[[1]]$Chr)),
        ### fpc
        unlist(Precision_Recall(hx = fpc.res$pamobject$cluster,Truth = start.data[[1]]$Chr)),
        ### single
        unlist(Precision_Recall(hx = QCloned.single$cluster,Truth = QCloned.single$filtered.data[[1]]$Chr))
      )
    }
    ### NMI calculation for QClone, with a range of BIC ####
    
    NMI_QC<-c(Compute_NMI(QCloned[[select_QC]]),
              Compute_NMI(QCloned.flash[[select_QC.flash]]),
              Compute_NMI(QCloned.flash.DEoptim[[select_QC.flash.DEoptim]]),
              NMI_cutree(flash$cluster,start.data[[1]]$Chr),
              NMI_cutree(flash.BIC$cluster,start.data[[1]]$Chr),
              NMI_cutree(flash.AIC$cluster,start.data[[1]]$Chr)
    )
    
    
    Time.QC<-c(QC.elapsed,
               QC.flash.elapsed,
               QC.flash.DEoptim.elapsed,
               flash.elapsed,
               flash.BIC.elapsed,
               flash.AIC.elapsed)
    
    
    ### NMI calculation for fpc ####
    P_cluster<-table(fpc.res$pamobject$cluster)/length(fpc.res$pamobject$cluster)
    P_clone<-table(start.data[[1]]$Chr)/length(start.data[[1]]$Chr)
    H_clone<--sum(P_clone*log(P_clone))
    H_cluster<--sum(P_cluster*log(P_cluster))
    A<-aggregate(rep(1, times = length(fpc.res$pamobject$cluster)), by = list(x= fpc.res$pamobject$cluster,y=start.data[[1]]$Chr ), sum)
    L<-log(A[,3]/(length(fpc.res$pamobject$cluster)*P_cluster[A[,1]]*P_clone[A[,2]]))
    NMI_fpc<-2*sum(A[,3]/length(fpc.res$pamobject$cluster)*L)/(H_clone+H_cluster)
    
    ### binding results
    indice_names<-names(unlist(Precision_Recall(hx = flash.AIC$cluster,Truth = flash$filtered.data[[1]]$Chr)))
    names(NMI_QC)<-paste0("NMI.",QC_algs)
    names(indices)<-paste(indice_names,rep(indice_algs,each = length(indice_names)),sep =".")
    names(Time.QC)<-paste0("Time.",QC_algs)
    
    
    if(!grepl(pattern="ploidy",x = ploidy)){
      
      ### DIPLOID GENOMES
      spare<-data.frame(Id= i,t(indices),
                        NMI_sci = NMI_sci,
                        t(NMI_QC),
                        NMI_kmedoid = NMI_fpc,
                        Time.sci = sci.elapsed,
                        t(Time.QC),
                        Time.kmedoid = fpc.elapsed,
                        Mutations_left.sci = length(clus[clus>0]),
                        Mutations_left.QC = length(QCloned$cluster)
      )
      #colnames(spare)<-c("Id","NMI_sci","NMI_QC","NMI_kmedoid","Time(sci)","Time(QC)","Time(kmedoid)","Mutations_left(sci)","Mutations_left_QC")
      
    }
    else{
      ##################################
      ## OVERDIPLOID GENOMES ###########
      ##################################
      spare<-data.frame(Id = i,t(indices),
                        t(NMI_QC),
                        t(NMI_QC.single),
                        NMI_kmedoid= NMI_fpc,
                        t(Time.QC),
                        Time.QC.single = QCloned.single.elapsed,
                        Time.kmedoid = fpc.elapsed,
                        Mutations_left_QC =length(QCloned$cluster)
      )
    }
    result<-rbind(result,spare)
    if(i>1){
      print(result[i,])
    }
    dir.create(paste("test",i,sep=""),showWarnings=FALSE)
    save(list = c("start.data",QC_algs,"spare"),file = paste0("test",i,"/Clustered_data.rda"))
  }
  
  write.table(x=result,file="results_sciClone_QCclone.txt",quote=FALSE,sep="\t",row.names=FALSE)
  return(whole.data)
}
create_for_pyClone<-function(QCat){
  result<-list()
  # mutation_id ref_counts var_count normal_cn minor_cn major_cn
  for(i in 1:length(QCat)){
    print(head(QCat[[i]]))
    mutation_id<-apply(X = QCat[[1]][,c("Chr","Start")],MARGIN = 1,FUN = function(z) paste("chr",z[1],":",z[2],":sample",sep = ''))
    ref_counts<-QCat[[i]]$Depth-QCat[[i]]$Alt
    var_counts<-QCat[[i]]$Alt
    minor_cn<-sapply(X = as.character(QCat[[i]]$Genotype),FUN = function(z) strcount(x=z,pattern = "B"))
    major_cn<-sapply(X = as.character(QCat[[i]]$Genotype),FUN = function(z) strcount(x=z,pattern = "A"))
    normal_cn<-rep(2,times=dim(QCat[[1]])[1])
    print(head(cbind(mutation_id,ref_counts,var_counts,normal_cn,minor_cn,major_cn)))
    print(dim(cbind(mutation_id,ref_counts,var_counts,normal_cn,minor_cn,major_cn))) 
    result[[i]]<-cbind(mutation_id,ref_counts,var_counts,normal_cn,minor_cn,major_cn) 
  }
  #print("result")
  #print(head(result[[1]]))
  #print(head(result[[2]]))
  return(result)
}
Format_to_sciClone<-function(QuantumCat_out,contamination = 0){
  test<-rep(TRUE,times = dim(QuantumCat_out[[1]])[1])
  result<-list()
  for(i in 1:length(QuantumCat_out)){
    test<-test & QuantumCat_out[[i]]$Genotype=="AB"
  }
  for(i in 1:length(QuantumCat_out)){
    QCf<-QuantumCat_out[[i]][test,]
    
    result[[i]]<-data.frame(cbind(QCf$Chr,QCf$Start,
                                  QCf$Depth-QCf$Alt,QCf$Alt,
                                  (QCf$Alt/QCf$Depth)*100 /(1-contamination)))
  }
  return(result)
}
Filter_on_AB<-function(QuantumCat_out){
  test<-rep(TRUE,times = dim(QuantumCat_out[[1]])[1])
  result<-list()
  for(i in 1:length(QuantumCat_out)){
    test<-test & QuantumCat_out[[i]]$Genotype=="AB"
  }
  for(i in 1:length(QuantumCat_out)){
    result[[i]]<-QuantumCat_out[[i]][test,]
  }
  return(result)
}
###################################
#######Use functions###############
###################################
print("Starting with sciClone and QClone")
valid.data<-Compare_to_sciClone(number_of_tests,contamination = range_contamination,
                                number_of_clones = range_clones,number_of_mutations = range_mutations,
                                number_of_samples = range_samples,depth = range_depth,
                                ploidy = range_ploidy)

for(i in 1:length(valid.data)){
  if(range_samples>1){
    if((i%%range_samples)==1){
      #print('Condition OK')
      #dir.create(paste("test",i%/%range_samples+1,sep=""),showWarnings=FALSE)	
      spare<-create_for_pyClone(valid.data[i:(i+range_samples-1)])
      #print(length(spare))
      for(j in 1:length(spare)){
        write.table(x=spare[[j]],file=paste("./test",i%/%range_samples+1,"/sample",j,".tsv",sep=""),quote=FALSE,row.names=F,sep="\t")
        
      }
      save(valid.data, file = paste("./test",i%/%range_samples+1,"/NMI_sci_QC.Rda",sep=""))
    }   
  }
  else if(range_samples==1){
    dir.create(paste("test",i,sep=""),showWarnings=FALSE)	
    spare<-create_for_pyClone(valid.data[i:(i+range_samples-1)])
    #print(length(spare))
    for(j in 1:length(spare)){
      write.table(x=spare[[j]],file=paste("./test",i,"/sample",1,".tsv",sep=""),quote=FALSE,row.names=F,sep="\t")
    }
    #save(valid.data, file = paste("./test",i,"/NMI_sci_QC.Rda",sep=""))
  }
}
