今天学到一个东西, R 语言回归函数里面的公式函数, as.formula(). 其作用就是将字符串转换成公式。
> aa = "ReadCount~Age+BMI+Sex+HAMD+1+(1|Sex)"
> aa
[1] "ReadCount~Age+BMI+Sex+HAMD+1+(1|Sex)"
> as.formula(aa)
ReadCount ~ Age + BMI + Sex + HAMD + 1 + (1 | Sex)
library(glmmADMB)
#setwd('J:/Correlation')
file_in = "Data_for_glm/KQ/p__Firmicutes.for_glm.xls"
file_out = "Result/KQ/p__Firmicutes.glm_result.correlation.xls"
#colums = c("ReadCount","Age","Age_onset","BMI","HAMA","HAMD","HY_state","Height","MMSE","MoCA","NMSS","Sex","UPDRS","Weight")
dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1
colums = c("ReadCount","Age","BMI","Sex")
colums = append(colums, 'HAMD')
dat_for_diff1 = subset(dat, Group=='PD')
dat_for_diff2 = dat_for_diff1[,colums]
dat_for_diff3 = na.omit(dat_for_diff2)
dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
#formu = paste(append(colums, '(1|Sex)'), collapse='+')
om <- glmmadmb(ReadCount~Age+Sex+BMI+HAMD+1+(1|Sex),family="nbinom",data=dat_for_diff3)
#由于不同的指标在不同的样本中存在NA值,所以将所有指标都囊括进来,然后na.omit是不妥的。
glm_analysis = function(file_in,file_out,colums){
dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1
dat_for_diff1 = subset(dat, Group=='PD')
dat_for_diff2 = dat_for_diff1[,colums]
dat_for_diff3 = na.omit(dat_for_diff2)
dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
#om <- glmmadmb(SiblingNegotiation~FoodTreatment*SexParent+(1|Nest)+offset(log(BroodSize)),zeroInflation=TRUE,family="nbinom",data=Owls)
om <- glmmadmb(ReadCount~Age+Sex+HAMA+NMSS+UPDRS+1+(1|Sex),family="nbinom",data=dat_for_diff3) #先不加入Weight
om_simple = summary(om)
first_line = colnames(om_simple$coefficients)
new_data_frame = NULL
new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);rownames(new_data_frame)[1] = 'Item'
new_data_frame = rbind(new_data_frame,om_simple$coefficients)
write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
}
setwd('J:/Correlation')
#Lst = read.table('single_bacteria.windows.list',head=T)
#
#for (Row in 1:nrow(Lst)){
# infile_correlation = as.character(Lst[Row,"for_glm"])
# outfile_correlation = as.character(Lst[Row,"correlation"])
# glm_analysis(infile_4variable,outfile_4variable)
#}
#测试一下
#===================================================FB========================================================
glm_analysis("Data_for_glm/FB/c__Clostridia.for_glm.xls","Result/FB/c__Clostridia.glm_result.correlation.xls",c("Sex","Age","ReadCount","weight"))
glm_analysis("Data_for_glm/FB/o__Clostridiales.for_glm.xls","Result/FB/o__Clostridiales.glm_result.correlation.xls",c("Sex","Age","ReadCount","weight"))
glm_analysis("Data_for_glm/FB/f__Ruminococcaceae.for_glm.xls","Result/FB/f__Ruminococcaceae.glm_result.correlation.xls",c("Sex","Age","ReadCount","MMSE"))
#===================================================KQ========================================================
glm_analysis("Data_for_glm/KQ/p__Firmicutes.for_glm.xls","Result/KQ/p__Firmicutes.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS","HAMA","HAMD","NMSS"))
glm_analysis("Data_for_glm/KQ/p__Proteobacteria.for_glm.xls","Result/KQ/p__Proteobacteria.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","HAMD"))
glm_analysis("Data_for_glm/KQ/f__Neisseriaceae.for_glm.xls","Result/KQ/f__Neisseriaceae.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","HAMD"))
glm_analysis("Data_for_glm/KQ/g__Neisseria.for_glm.xls","Result/KQ/g__Neisseria.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","HAMD"))
glm_analysis("Data_for_glm/KQ/c__Bacilli.for_glm.xls","Result/KQ/c__Bacilli.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/o__Lactobacillales.for_glm.xls","Result/KQ/o__Lactobacillales.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/f__Streptococcaceae.for_glm.xls","Result/KQ/f__Streptococcaceae.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/g__Streptococcus.for_glm.xls","Result/KQ/g__Streptococcus.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/s__Streptococcus_mitis.for_glm.xls","Result/KQ/s__Streptococcus_mitis.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/s__Neisseria_subflava.for_glm.xls","Result/KQ/s__Neisseria_subflava.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS","HAMA","HAMD","NMSS"))
#===================================================BQ========================================================
glm_analysis("Data_for_glm/BQ/o__Corynebacteriales.for_glm.xls","Result/BQ/o__Corynebacteriales.glm_result.correlation.xls", c("Sex","BMI","Age","ReadCount","Age_onset","HAMD"))
glm_analysis("Data_for_glm/BQ/f__Corynebacteriaceae.for_glm.xls","Result/BQ/f__Corynebacteriaceae.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","Age_onset",,"HAMD"))
glm_analysis("Data_for_glm/BQ/g__unidentified_Corynebacteriaceae.for_glm.xls","Result/BQ/g__unidentified_Corynebacteriaceae.glm_result.correlation.xls"c("Sex","BMI","Age","ReadCount","Age_onset",,"HAMD"))
glm_analysis("Data_for_glm/BQ/s__Corynebacterium_sp_KPL1855.for_glm.xls","Result/BQ/s__Corynebacterium_sp_KPL1855.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","Age_onset",,"HAMD"))
glm_analysis2 = function(file_in,file_out_prefix, parameter){
dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1
dat_for_diff1 = subset(dat, Group=='PD')
# colums = c("ReadCount","Age","Age_onset","BMI","HAMA","HAMD","HY_state","Height","MMSE","MoCA","NMSS","Sex","UPDRS","Weight")
colums = c("ReadCount","Age","BMI","Sex")
if (! parameter %in% colums){colums = append(colums,parameter)}
dat_for_diff2 = dat_for_diff1[,colums]
dat_for_diff3 = na.omit(dat_for_diff2)
dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
#om <- glmmadmb(SiblingNegotiation~FoodTreatment*SexParent+(1|Nest)+offset(log(BroodSize)),zeroInflation=TRUE,family="nbinom",data=Owls)
if(! parameter %in% colums){
om <- glmmadmb(ReadCount~Age+Sex+BMI+parameter+1+(1|Sex),family="nbinom",data=dat_for_diff3)
}else{
om <- glmmadmb(ReadCount~Age+Sex+BMI+1+(1|Sex),family="nbinom",data=dat_for_diff3)
}
file_out = paste(file_out_prefix, parameter, "glm_result.correlation.xls",sep=".")
om_simple = summary(om)
first_line = colnames(om_simple$coefficients)
new_data_frame = NULL
new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);rownames(new_data_frame)[1] = 'Item'
new_data_frame = rbind(new_data_frame,om_simple$coefficients)
write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
cat("file_in:\t",file_in,"\nfile_out:\t",file_out,"\n")
}
glm_analysis3 = function(file_in,file_out_prefix, clin_vec){
dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1
dat_for_diff1 = subset(dat, Group=='PD')
colums = c("ReadCount","Age","BMI","Sex")
for (parameter in clin_vec){
colums2 = colums # colums2有可能增加新的列名进去
if (! parameter %in% colums){colums2 = append(colums,parameter);}
dat_for_diff2 = dat_for_diff1[,colums2]
dat_for_diff3 = na.omit(dat_for_diff2)
dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
indep_variable = colums2[1];
dep_variable = c(colums2[2:length(colums2)],"1+(1|Sex)")
dep_variable2 = paste(dep_variable,collapse='+')
formu = paste(indep_variable,dep_variable2,sep='~')
cat('formula:\t',formu,"\n")
om <- glmmadmb(as.formula(formu),family="nbinom",data=dat_for_diff3)
om_simple = summary(om)
first_line = colnames(om_simple$coefficients)
new_data_frame = NULL
new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);
rownames(new_data_frame)[1] = 'Item'
new_data_frame = rbind(new_data_frame,om_simple$coefficients)
file_out = paste(file_out_prefix, parameter, "glm_result.correlation.xls",sep=".")
write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
cat("file_in:\t",file_in,"\nfile_out:\t",file_out,"\n")
}
}
glm_analysis3("Data_for_glm/FB/c__Clostridia.for_glm.xls","Result/FB/c__Clostridia",c("Weight"))
glm_analysis3("Data_for_glm/FB/o__Clostridiales.for_glm.xls","Result/FB/o__Clostridiales",c("Weight"))
glm_analysis3("Data_for_glm/FB/f__Ruminococcaceae.for_glm.xls","Result/FB/f__Ruminococcaceae",c("MMSE"))
#===================================================KQ========================================================
glm_analysis3("Data_for_glm/KQ/p__Firmicutes.for_glm.xls","Result/KQ/p__Firmicutes",c("Age","UPDRS","HAMA","HAMD","NMSS"))
glm_analysis3("Data_for_glm/KQ/p__Proteobacteria.for_glm.xls","Result/KQ/p__Proteobacteria",c("HAMD"))
glm_analysis3("Data_for_glm/KQ/f__Neisseriaceae.for_glm.xls","Result/KQ/f__Neisseriaceae",c("HAMD"))
glm_analysis3("Data_for_glm/KQ/g__Neisseria.for_glm.xls","Result/KQ/g__Neisseria",c("HAMD"))
glm_analysis3("Data_for_glm/KQ/c__Bacilli.for_glm.xls","Result/KQ/c__Bacilli",c("UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/o__Lactobacillales.for_glm.xls","Result/KQ/o__Lactobacillales",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/f__Streptococcaceae.for_glm.xls","Result/KQ/f__Streptococcaceae",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/g__Streptococcus.for_glm.xls","Result/KQ/g__Streptococcus",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/s__Streptococcus_mitis.for_glm.xls","Result/KQ/s__Streptococcus_mitis",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/s__Neisseria_subflava.for_glm.xls","Result/KQ/s__Neisseria_subflava",c("UPDRS","HAMA","HAMD","NMSS"))
#===================================================BQ========================================================
glm_analysis3("Data_for_glm/BQ/o__Corynebacteriales.for_glm.xls","Result/BQ/o__Corynebacteriales", c("Age_onset","HAMD"))
glm_analysis3("Data_for_glm/BQ/f__Corynebacteriaceae.for_glm.xls","Result/BQ/f__Corynebacteriaceae",c("Age_onset","HAMD"))
glm_analysis3("Data_for_glm/BQ/g__unidentified_Corynebacteriaceae.for_glm.xls","Result/BQ/g__unidentified_Corynebacteriaceae", c("Age_onset","HAMD"))
glm_analysis3("Data_for_glm/BQ/s__Corynebacterium_sp_KPL1855.for_glm.xls","Result/BQ/s__Corynebacterium_sp_KPL1855",c("Age_onset","BMI","Weight","HAMD"))
#test
file_in = "Data_for_glm/FB/c__Clostridia.for_glm.xls"
file_out_prefix = "Result/FB/c__Clostridia"
clin_vec = c("Weight")
dat = read.table(file_in, head=T,sep='\t',check.names=F)
dat_for_diff1 = subset(dat, Group=='PD')
colums = c("ReadCount","Age","BMI","Sex")
#formu = "ReadCount~Age+BMI+Sex"
print("==========================================================")
print(colums)
for (parameter in clin_vec){
colums2 = colums # colums2有可能增加新的列名进去
if (! parameter %in% colums){colums2 = append(colums,parameter);}
print("**********************************************************")
print(colums2)
dat_for_diff2 = dat_for_diff1[,colums2]
dat_for_diff3 = na.omit(dat_for_diff2)
dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
indep_variable = colums2[1];
dep_variable = c(colums2[2:length(colums2)],"1+(1|Sex)")
dep_variable2 = paste(dep_variable,collapse='+')
formu = paste(indep_variable,dep_variable2,sep='~')
print(formu)
om <- glmmadmb(as.formula(formu),family="nbinom",data=dat_for_diff3)
# file_out = paste(file_out_prefix, parameter, "glm_result.correlation.xls",sep=".")
# om_simple = summary(om)
# print(om_simple)
# first_line = colnames(om_simple$coefficients)
# new_data_frame = NULL
# new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);
# rownames(new_data_frame)[1] = 'Item'
# new_data_frame = rbind(new_data_frame,om_simple$coefficients)
# write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
# cat("file_in:\t",file_in,"\nfile_out:\t",file_out,"\n")
}
xnam <- paste0("x", 1:25)
fmla <- as.formula(paste("y ~ ", paste(xnam, collapse= "+")))