R Programming: How can I separate column names in to different columns to perform downstream analysis?

statistics

#1

I have a big data matrix and each column has named with multiple information and separated by an underscore. e.g.: Genotype, Tissue, Time, Treatment, and Replication (e.g.WT_Shoot_0t_NTrt_1)

A sample of my data frame;

structure(list(Proteins = c(“SnrK”, “MAPKK”, “PP2C”), WT_Shoot_0t_NTrt_1 = c(0.580784899, 1.210078166, 1.505880218), WT_Shoot_0t_NTrt_2 = c(0.957816536, 1.42644091, 0.943047498), WT_Shoot_0t_NTrt_3 = c(0.559338535, 1.481513748, 1.114371918), WT_Shoot_1t_Trt_1 = c(0.831382253, 1.478551276, 0.837832395), WT_Shoot_1t_Trt_2 = c(1.180515054, 1.445100969, 1.18151722), WT_Shoot_1t_Trt_3 = c(1.332735497, 1.515484415, 0.99774335), WT_root_0t_NTrt_1 = c(1.717008073, 2.048229681, 1.448233358), WT_root_0t_NTrt_2 = c(1.431501693, 1.850835296, 1.128499829), WT_root_0t_NTrt_3 = c(1.752086402, 2.047380811, 1.190984777), WT_root_1t_Trt_1 = c(1.368684187, 1.507348975, 1.531142731), WT_root_1t_Trt_2 = c(1.204974777, 1.440904968, 1.103257306), WT_root_1t_Trt_3 = c(0.996016342, 1.630774074, 1.141581901), mut1_Shoot_0t_NTrt_1 = c(1.05451186, 1.916352545, 1.030983014), mut1_Shoot_0t_NTrt_2 = c(1.54792871, 1.676837161, 1.244400719), mut1_Shoot_0t_NTrt_3 = c(1.318611728, 1.613611, 1.28740667), mut1_Shoot_1t_Trt_1 = c(1.551790106, 1.619609895, 1.097308351), mut1_Shoot_1t_Trt_2 = c(1.638951097, 1.437759761, 1.139143972), mut1_Shoot_1t_Trt_3 = c(1.18670455, 1.530006726, 1.583110853), mut1_root_0t_NTrt_1 = c(0.981436287, 0.5156177, 0.799418798), mut1_root_0t_NTrt_2 = c(1.143837649, 0.772921721, 1.098218628), mut1_root_0t_NTrt_3 = c(1.163352788, 1.371823855, 1.278531528), mut1_root_1t_Trt_1 = c(1.13334394, 0.768721169, 1.155071974), mut1_root_1t_Trt_2 = c(1.015317761, 0.838696502, 0.9622491), mut1_root_1t_Trt_3 = c(1.961461109, 0.697184247, 0.926734427)), row.names = c(NA, -3L), class = c(“tbl_df”, “tbl”, “data.frame”), spec = structure(list(cols = list(Proteins = structure(list(), class = c(“collector_character”, “collector”)), WT_Shoot_0t_NTrt_1 = structure(list(), class = c(“collector_double”, “collector”)), WT_Shoot_0t_NTrt_2 = structure(list(), class = c(“collector_double”, “collector”)), WT_Shoot_0t_NTrt_3 = structure(list(), class = c(“collector_double”, “collector”)), WT_Shoot_1t_Trt_1 = structure(list(), class = c(“collector_double”, “collector”)), WT_Shoot_1t_Trt_2 = structure(list(), class = c(“collector_double”, “collector”)), WT_Shoot_1t_Trt_3 = structure(list(), class = c(“collector_double”, “collector”)), WT_root_0t_NTrt_1 = structure(list(), class = c(“collector_double”, “collector”)), WT_root_0t_NTrt_2 = structure(list(), class = c(“collector_double”, “collector”)), WT_root_0t_NTrt_3 = structure(list(), class = c(“collector_double”, “collector”)), WT_root_1t_Trt_1 = structure(list(), class = c(“collector_double”, “collector”)), WT_root_1t_Trt_2 = structure(list(), class = c(“collector_double”, “collector”)), WT_root_1t_Trt_3 = structure(list(), class = c(“collector_double”, “collector”)), mut1_Shoot_0t_NTrt_1 = structure(list(), class = c(“collector_double”, “collector”)), mut1_Shoot_0t_NTrt_2 = structure(list(), class = c(“collector_double”, “collector”)), mut1_Shoot_0t_NTrt_3 = structure(list(), class = c(“collector_double”, “collector”)), mut1_Shoot_1t_Trt_1 = structure(list(), class = c(“collector_double”, “collector”)), mut1_Shoot_1t_Trt_2 = structure(list(), class = c(“collector_double”, “collector”)), mut1_Shoot_1t_Trt_3 = structure(list(), class = c(“collector_double”, “collector”)), mut1_root_0t_NTrt_1 = structure(list(), class = c(“collector_double”, “collector”)), mut1_root_0t_NTrt_2 = structure(list(), class = c(“collector_double”, “collector”)), mut1_root_0t_NTrt_3 = structure(list(), class = c(“collector_double”, “collector”)), mut1_root_1t_Trt_1 = structure(list(), class = c(“collector_double”, “collector”)), mut1_root_1t_Trt_2 = structure(list(), class = c(“collector_double”, “collector”)), mut1_root_1t_Trt_3 = structure(list(), class = c(“collector_double”, “collector”))), default = structure(list(), class = c(“collector_guess”, “collector”))), class = “col_spec”))

How can I make a table to like below to process downstream statistical analysis (i.e. ANOVA, Tukey) Sample Table


#2

Hi @kynda I have tried to construct the desired table using the code below:

library(data.table)

# create sample dataframe
df = data.frame(Proteins = c("SnrK", "MAPKK", "PP2C"), 
                   WT_Shoot_0t_NTrt_1 = c(0.580784899, 1.210078166, 1.505880218), 
                   WT_Shoot_0t_NTrt_2 = c(0.957816536, 1.42644091, 0.943047498), 
                   WT_Shoot_0t_NTrt_3 = c(0.559338535, 1.481513748, 1.114371918), 
                   WT_Shoot_1t_Trt_1 = c(0.831382253, 1.478551276, 0.837832395), 
                   WT_Shoot_1t_Trt_2 = c(1.180515054, 1.445100969, 1.18151722), 
                   WT_Shoot_1t_Trt_3 = c(1.332735497, 1.515484415, 0.99774335), 
                   WT_root_0t_NTrt_1 = c(1.717008073, 2.048229681, 1.448233358), 
                   WT_root_0t_NTrt_2 = c(1.431501693, 1.850835296, 1.128499829), 
                   WT_root_0t_NTrt_3 = c(1.752086402, 2.047380811, 1.190984777), 
                   WT_root_1t_Trt_1 = c(1.368684187, 1.507348975, 1.531142731), 
                   WT_root_1t_Trt_2 = c(1.204974777, 1.440904968, 1.103257306), 
                   WT_root_1t_Trt_3 = c(0.996016342, 1.630774074, 1.141581901), 
                   mut1_Shoot_0t_NTrt_1 = c(1.05451186, 1.916352545, 1.030983014), 
                   mut1_Shoot_0t_NTrt_2 = c(1.54792871, 1.676837161, 1.244400719), 
                   mut1_Shoot_0t_NTrt_3 = c(1.318611728, 1.613611, 1.28740667), 
                   mut1_Shoot_1t_Trt_1 = c(1.551790106, 1.619609895, 1.097308351), 
                   mut1_Shoot_1t_Trt_2 = c(1.638951097, 1.437759761, 1.139143972), 
                   mut1_Shoot_1t_Trt_3 = c(1.18670455, 1.530006726, 1.583110853), 
                   mut1_root_0t_NTrt_1 = c(0.981436287, 0.5156177, 0.799418798), 
                   mut1_root_0t_NTrt_2 = c(1.143837649, 0.772921721, 1.098218628), 
                   mut1_root_0t_NTrt_3 = c(1.163352788, 1.371823855, 1.278531528), 
                   mut1_root_1t_Trt_1 = c(1.13334394, 0.768721169, 1.155071974), 
                   mut1_root_1t_Trt_2 = c(1.015317761, 0.838696502, 0.9622491), 
                   mut1_root_1t_Trt_3 = c(1.961461109, 0.697184247, 0.926734427))


# reshape original dataframe from wide format to long format
df_long = melt(df, id.vars = "Proteins", measure.vars = names(df)[-1], value.name = "measurement")

# sort data as per the 'Proteins' column
df_long = df_long[order(df_long$Proteins, decreasing = T),]
              
df_long$variable = as.character(df_long$variable)

# create required columns / variables
df_long$genotype = ""
df_long$Tissue = ""
df_long$time = ""
df_long$Trtment = ""
df_long$Replication = ""

# populate newly created columns
for(i in 1:nrow(df_long)){
  
  df_long$genotype[i] = strsplit(df_long$variable[i], "_")[[1]][1]
  df_long$Tissue[i] = strsplit(df_long$variable[i], "_")[[1]][2]
  df_long$time[i] = strsplit(df_long$variable[i], "_")[[1]][3]
  df_long$Trtment[i] = strsplit(df_long$variable[i], "_")[[1]][4]
  df_long$Replication[i] = as.numeric(strsplit(df_long$variable[i], "_")[[1]][5])
  
}

# rearranging columns of the dataframe
df_long = df_long[,c("Proteins", "genotype", "Tissue", "time", "Trtment", "Replication", "measurement")]

Hope it helps.


#3

Thank you very much Joshi. Greatly appreciate.