##如果只提供gene symbol 想添加entrez_id列的方法
add.geneid<- function(input.matrix,symbol.colname="symbol"){
  return.matrix <- input.matrix %>% dplyr::rename("symbol"=symbol.colname)
  change.symbol <-
    alias2SymbolTable(return.matrix$symbol) %>%
    data.frame(input.gene = return.matrix$symbol) %>%
    dplyr::rename("office.name"= "." )
  return.matrix$symbol <- change.symbol$office.name
  return.matrix <- return.matrix[!is.na(return.matrix$symbol), ]
  return.matrix <-
    return.matrix %>%
    dplyr::group_by(symbol) %>%
    summarize_if(is.numeric, mean, na.rm = TRUE)
  entrezid.list <- bitr(
    na.omit(return.matrix$symbol),
    fromType = "SYMBOL",
    toType = c("ENTREZID"),
    OrgDb = org.Hs.eg.db
  ) %>% dplyr::rename("entrez_id" = "ENTREZID", "symbol" = "SYMBOL")

  return.matrix <-
    return.matrix %>% dplyr::left_join(entrezid.list, by = "symbol") %>% dplyr::select("symbol", "entrez_id", everything())
  return(return.matrix)

}

##p值列表转换成次序/次序率
change2order<- function(statistics.matrix,rate=T,only_coding_gene=T){
  if(only_coding_gene){
    data("human_coding_gene.RData")
    id_symbol<- human_coding_gene %>%
      dplyr::select("GeneID") %>%
      dplyr::rename("entrez_id"="GeneID") %>%
      dplyr::mutate(across(entrez_id,as.character))
    statistics.matrix %<>%inner_join(id_symbol,by="entrez_id")
  }
  statistics.matrix %<>%
    dplyr::mutate(across(c("symbol","entrez_id"),as.character)) %>%
    unite("rowname",c("symbol","entrez_id"), sep = ":", remove = TRUE) %>%
    column_to_rownames("rowname")

  order.out <- sapply(statistics.matrix,FUN=function(x){
    y<- rank(x,na.last = T,ties.method = "average")
    if(rate){
    order.out <- y/max(na.omit(y))
    }else{
      order.out <- y
    }
    return(order.out)
  })
  rownames(order.out) <- rownames(statistics.matrix)
  return(order.out)
}

##获得统计值矩阵
get.statistics.matrix <- function(datapath,
                       gene.colname = c("symbol"),
                       statistics.colname = "P.Value"){
  data.list<- list.files(datapath)
  return.matrix <-
    data.frame(matrix(nrow = 0,ncol = length(gene.colname))) %>%
    mutate_all(as.character,na.rm = TRUE)
  names(return.matrix) <- gene.colname

  for (file.name in data.list) {
    data.name <- str_extract(file.name,"GSE\\d+_*\\d*")
    data<- fread(str_c(datapath,file.name))
    ext.data<-data %>%
      dplyr::select(all_of(c(gene.colname,statistics.colname))) %>%
      dplyr::mutate(across(gene.colname,as.character))
    names(ext.data) <- c(gene.colname,data.name)
    return.matrix <- full_join(return.matrix,ext.data,by=gene.colname)
  }
  return(return.matrix)
}


##可以用来生成次序统计量计算的输入矩阵，或者通过调节参数获得p值列表矩阵、p值次序率矩阵

#' Generate input format for os.jcdf()
#' @description
#' This method is used to construct the input matrix of os.jcdf or organize a certain statistic of multiple data into a matrix. When the form parameter is selected as "order.rate", the output matrix can be directly used as the input matrix of os.jcdf.
#' @param datapath :Path for storing multiple summary data or full path of a evidence matrix file.
#' @param evidence.matrix :Logical, whether to generate an evidence matrix, the default is "F":represents the datapath input is the path of multiple summary data storage, "T": represents the datapath input is the path of the statistical value matrix of multiple data.
#' @param gene.colname :Vector, column name of gene column.You can enter multiple gene identification columns, such as: c("symbol","entrez_id").
#' @param statistics.colname :Character, the column name of statistics value as the basis for sorting.
#' @param form :Character, choose the output data from."order.rate"：Represents the order rate matrix that converts the order into the [0,1] interval; "order": outputs the order matrix; "statistics list": the matrix of statistics selected as the basis for sorting.
#' @param add.entrezid :Logical. Whether to add entrezid, default T.
#'
#' @return return the input matrix of os.jcdf or organize a certain statistic of multiple data into a matrix.
#' @export
#'
#' @examples
#' datapath="C:/data" #Path for storing multiple summary data
#' get.input.form(
#' datapath,
#' evidence.matrix= F,
#' gene.colname = c("symbol"),
#' statistics.colname = "P.Value",
#' form = "order.rate",
#' add.entrezid = T
#' )
get.input.form <- function(datapath,
                              evidence.matrix=F,
                              gene.colname = c("symbol","entrez_id"),
                              statistics.colname = "P.Value",
                              form = "order.rate",
                              add.entrezid = T) {
  if(evidence.matrix){
    return.matrix <- datapath
  }else{
    return.matrix <-get.statistics.matrix(datapath,gene.colname,statistics.colname)
  }
  if(add.entrezid){
    return.matrix<- add.geneid (return.matrix)
  }
  if(form=="order.rate"){
    os.input<- change2order(return.matrix,rate=T,only_coding_gene=T)
  }else if(form=="order"){
    os.input<- change2order(return.matrix,rate=F,only_coding_gene=T)
  }else if(form=="statistics list"){
    os.input<- return.matrix
  }else{
    stop ("There is no such format,please choose: 'order.rate'/'order'/'statistics list' as input")
  }
  return(os.input)
}

