R语言之 as.formula()

2023-11-14

今天学到一个东西, R 语言回归函数里面的公式函数, as.formula(). 其作用就是将字符串转换成公式。

> aa = "ReadCount~Age+BMI+Sex+HAMD+1+(1|Sex)"
> aa
[1] "ReadCount~Age+BMI+Sex+HAMD+1+(1|Sex)"
> as.formula(aa)
ReadCount ~ Age + BMI + Sex + HAMD + 1 + (1 | Sex)

 

library(glmmADMB)

#setwd('J:/Correlation')
file_in = "Data_for_glm/KQ/p__Firmicutes.for_glm.xls"
file_out = "Result/KQ/p__Firmicutes.glm_result.correlation.xls"
#colums = c("ReadCount","Age","Age_onset","BMI","HAMA","HAMD","HY_state","Height","MMSE","MoCA","NMSS","Sex","UPDRS","Weight")

dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1 

colums = c("ReadCount","Age","BMI","Sex")
colums = append(colums, 'HAMD')

dat_for_diff1 = subset(dat, Group=='PD')
dat_for_diff2 = dat_for_diff1[,colums]
dat_for_diff3 = na.omit(dat_for_diff2)
dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
#formu = paste(append(colums, '(1|Sex)'), collapse='+')
om <- glmmadmb(ReadCount~Age+Sex+BMI+HAMD+1+(1|Sex),family="nbinom",data=dat_for_diff3) 

#由于不同的指标在不同的样本中存在NA值,所以将所有指标都囊括进来,然后na.omit是不妥的。




glm_analysis = function(file_in,file_out,colums){
	dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1 

	dat_for_diff1 = subset(dat, Group=='PD')
	dat_for_diff2 = dat_for_diff1[,colums]
	dat_for_diff3 = na.omit(dat_for_diff2)
	dat_for_diff3$Sex = factor(dat_for_diff3$Sex)

	#om <- glmmadmb(SiblingNegotiation~FoodTreatment*SexParent+(1|Nest)+offset(log(BroodSize)),zeroInflation=TRUE,family="nbinom",data=Owls)
	
	om <- glmmadmb(ReadCount~Age+Sex+HAMA+NMSS+UPDRS+1+(1|Sex),family="nbinom",data=dat_for_diff3) #先不加入Weight

	om_simple = summary(om)
	first_line = colnames(om_simple$coefficients)
	new_data_frame = NULL
	new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);rownames(new_data_frame)[1] = 'Item'
	new_data_frame = rbind(new_data_frame,om_simple$coefficients)
	write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
}

setwd('J:/Correlation')

#Lst = read.table('single_bacteria.windows.list',head=T)
#
#for (Row in 1:nrow(Lst)){
#	infile_correlation = as.character(Lst[Row,"for_glm"])
#	outfile_correlation = as.character(Lst[Row,"correlation"])
#	glm_analysis(infile_4variable,outfile_4variable)
#}


#测试一下
#===================================================FB========================================================
glm_analysis("Data_for_glm/FB/c__Clostridia.for_glm.xls","Result/FB/c__Clostridia.glm_result.correlation.xls",c("Sex","Age","ReadCount","weight"))
glm_analysis("Data_for_glm/FB/o__Clostridiales.for_glm.xls","Result/FB/o__Clostridiales.glm_result.correlation.xls",c("Sex","Age","ReadCount","weight"))
glm_analysis("Data_for_glm/FB/f__Ruminococcaceae.for_glm.xls","Result/FB/f__Ruminococcaceae.glm_result.correlation.xls",c("Sex","Age","ReadCount","MMSE"))

#===================================================KQ========================================================
glm_analysis("Data_for_glm/KQ/p__Firmicutes.for_glm.xls","Result/KQ/p__Firmicutes.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS","HAMA","HAMD","NMSS"))
glm_analysis("Data_for_glm/KQ/p__Proteobacteria.for_glm.xls","Result/KQ/p__Proteobacteria.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","HAMD"))
glm_analysis("Data_for_glm/KQ/f__Neisseriaceae.for_glm.xls","Result/KQ/f__Neisseriaceae.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","HAMD"))
glm_analysis("Data_for_glm/KQ/g__Neisseria.for_glm.xls","Result/KQ/g__Neisseria.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","HAMD"))

glm_analysis("Data_for_glm/KQ/c__Bacilli.for_glm.xls","Result/KQ/c__Bacilli.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))

glm_analysis("Data_for_glm/KQ/o__Lactobacillales.for_glm.xls","Result/KQ/o__Lactobacillales.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/f__Streptococcaceae.for_glm.xls","Result/KQ/f__Streptococcaceae.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/g__Streptococcus.for_glm.xls","Result/KQ/g__Streptococcus.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/s__Streptococcus_mitis.for_glm.xls","Result/KQ/s__Streptococcus_mitis.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS", "HAMA"))
glm_analysis("Data_for_glm/KQ/s__Neisseria_subflava.for_glm.xls","Result/KQ/s__Neisseria_subflava.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","UPDRS","HAMA","HAMD","NMSS"))

#===================================================BQ========================================================
glm_analysis("Data_for_glm/BQ/o__Corynebacteriales.for_glm.xls","Result/BQ/o__Corynebacteriales.glm_result.correlation.xls", c("Sex","BMI","Age","ReadCount","Age_onset","HAMD"))
glm_analysis("Data_for_glm/BQ/f__Corynebacteriaceae.for_glm.xls","Result/BQ/f__Corynebacteriaceae.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","Age_onset",,"HAMD"))
glm_analysis("Data_for_glm/BQ/g__unidentified_Corynebacteriaceae.for_glm.xls","Result/BQ/g__unidentified_Corynebacteriaceae.glm_result.correlation.xls"c("Sex","BMI","Age","ReadCount","Age_onset",,"HAMD"))


glm_analysis("Data_for_glm/BQ/s__Corynebacterium_sp_KPL1855.for_glm.xls","Result/BQ/s__Corynebacterium_sp_KPL1855.glm_result.correlation.xls",c("Sex","BMI","Age","ReadCount","Age_onset",,"HAMD"))



glm_analysis2 = function(file_in,file_out_prefix, parameter){
	dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1 

	dat_for_diff1 = subset(dat, Group=='PD')
#	colums = c("ReadCount","Age","Age_onset","BMI","HAMA","HAMD","HY_state","Height","MMSE","MoCA","NMSS","Sex","UPDRS","Weight")
	colums = c("ReadCount","Age","BMI","Sex")
	if (! parameter %in% colums){colums = append(colums,parameter)}
	
	dat_for_diff2 = dat_for_diff1[,colums]
	dat_for_diff3 = na.omit(dat_for_diff2)
	dat_for_diff3$Sex = factor(dat_for_diff3$Sex)

	#om <- glmmadmb(SiblingNegotiation~FoodTreatment*SexParent+(1|Nest)+offset(log(BroodSize)),zeroInflation=TRUE,family="nbinom",data=Owls)
	if(! parameter %in% colums){
		om <- glmmadmb(ReadCount~Age+Sex+BMI+parameter+1+(1|Sex),family="nbinom",data=dat_for_diff3)
	}else{
		om <- glmmadmb(ReadCount~Age+Sex+BMI+1+(1|Sex),family="nbinom",data=dat_for_diff3)
	}
	
	file_out = paste(file_out_prefix, parameter, "glm_result.correlation.xls",sep=".")
	om_simple = summary(om)
	first_line = colnames(om_simple$coefficients)
	new_data_frame = NULL
	new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);rownames(new_data_frame)[1] = 'Item'
	new_data_frame = rbind(new_data_frame,om_simple$coefficients)
	write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
	cat("file_in:\t",file_in,"\nfile_out:\t",file_out,"\n")
}

glm_analysis3 = function(file_in,file_out_prefix, clin_vec){
	dat = read.table(file_in, head=T,sep='\t',check.names=F) #这里不能有row.names = 1 
	dat_for_diff1 = subset(dat, Group=='PD')

	colums = c("ReadCount","Age","BMI","Sex")
	for (parameter in clin_vec){
		colums2 = colums # colums2有可能增加新的列名进去
		if (! parameter %in% colums){colums2 = append(colums,parameter);}
		dat_for_diff2 = dat_for_diff1[,colums2]
		dat_for_diff3 = na.omit(dat_for_diff2)
		dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
		indep_variable = colums2[1];
		dep_variable = c(colums2[2:length(colums2)],"1+(1|Sex)")
		dep_variable2 = paste(dep_variable,collapse='+')
		formu = paste(indep_variable,dep_variable2,sep='~')
		cat('formula:\t',formu,"\n")
		om <- glmmadmb(as.formula(formu),family="nbinom",data=dat_for_diff3)
		om_simple = summary(om)
		first_line = colnames(om_simple$coefficients)
		new_data_frame = NULL
		new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);
		rownames(new_data_frame)[1] = 'Item'
		new_data_frame = rbind(new_data_frame,om_simple$coefficients)

		file_out = paste(file_out_prefix, parameter, "glm_result.correlation.xls",sep=".")
		write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
		cat("file_in:\t",file_in,"\nfile_out:\t",file_out,"\n")
	}
}

glm_analysis3("Data_for_glm/FB/c__Clostridia.for_glm.xls","Result/FB/c__Clostridia",c("Weight"))
glm_analysis3("Data_for_glm/FB/o__Clostridiales.for_glm.xls","Result/FB/o__Clostridiales",c("Weight"))
glm_analysis3("Data_for_glm/FB/f__Ruminococcaceae.for_glm.xls","Result/FB/f__Ruminococcaceae",c("MMSE"))

#===================================================KQ========================================================
glm_analysis3("Data_for_glm/KQ/p__Firmicutes.for_glm.xls","Result/KQ/p__Firmicutes",c("Age","UPDRS","HAMA","HAMD","NMSS"))
glm_analysis3("Data_for_glm/KQ/p__Proteobacteria.for_glm.xls","Result/KQ/p__Proteobacteria",c("HAMD"))
glm_analysis3("Data_for_glm/KQ/f__Neisseriaceae.for_glm.xls","Result/KQ/f__Neisseriaceae",c("HAMD"))
glm_analysis3("Data_for_glm/KQ/g__Neisseria.for_glm.xls","Result/KQ/g__Neisseria",c("HAMD"))

glm_analysis3("Data_for_glm/KQ/c__Bacilli.for_glm.xls","Result/KQ/c__Bacilli",c("UPDRS", "HAMA"))

glm_analysis3("Data_for_glm/KQ/o__Lactobacillales.for_glm.xls","Result/KQ/o__Lactobacillales",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/f__Streptococcaceae.for_glm.xls","Result/KQ/f__Streptococcaceae",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/g__Streptococcus.for_glm.xls","Result/KQ/g__Streptococcus",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/s__Streptococcus_mitis.for_glm.xls","Result/KQ/s__Streptococcus_mitis",c("Age","UPDRS", "HAMA"))
glm_analysis3("Data_for_glm/KQ/s__Neisseria_subflava.for_glm.xls","Result/KQ/s__Neisseria_subflava",c("UPDRS","HAMA","HAMD","NMSS"))

#===================================================BQ========================================================
glm_analysis3("Data_for_glm/BQ/o__Corynebacteriales.for_glm.xls","Result/BQ/o__Corynebacteriales", c("Age_onset","HAMD"))
glm_analysis3("Data_for_glm/BQ/f__Corynebacteriaceae.for_glm.xls","Result/BQ/f__Corynebacteriaceae",c("Age_onset","HAMD"))
glm_analysis3("Data_for_glm/BQ/g__unidentified_Corynebacteriaceae.for_glm.xls","Result/BQ/g__unidentified_Corynebacteriaceae", c("Age_onset","HAMD"))


glm_analysis3("Data_for_glm/BQ/s__Corynebacterium_sp_KPL1855.for_glm.xls","Result/BQ/s__Corynebacterium_sp_KPL1855",c("Age_onset","BMI","Weight","HAMD"))





#test
file_in = "Data_for_glm/FB/c__Clostridia.for_glm.xls"
file_out_prefix = "Result/FB/c__Clostridia"
clin_vec = c("Weight")
dat = read.table(file_in, head=T,sep='\t',check.names=F) 
dat_for_diff1 = subset(dat, Group=='PD')
colums = c("ReadCount","Age","BMI","Sex")
#formu = "ReadCount~Age+BMI+Sex"
print("==========================================================")
print(colums)
for (parameter in clin_vec){
	colums2 = colums # colums2有可能增加新的列名进去
	if (! parameter %in% colums){colums2 = append(colums,parameter);}
	print("**********************************************************")
	print(colums2)
	dat_for_diff2 = dat_for_diff1[,colums2]
	dat_for_diff3 = na.omit(dat_for_diff2)
	dat_for_diff3$Sex = factor(dat_for_diff3$Sex)
	indep_variable = colums2[1];
	dep_variable = c(colums2[2:length(colums2)],"1+(1|Sex)")
	dep_variable2 = paste(dep_variable,collapse='+')
	formu = paste(indep_variable,dep_variable2,sep='~')
	print(formu)
	om <- glmmadmb(as.formula(formu),family="nbinom",data=dat_for_diff3)
#	file_out = paste(file_out_prefix, parameter, "glm_result.correlation.xls",sep=".")
#	om_simple = summary(om)
#	print(om_simple)
#	first_line = colnames(om_simple$coefficients)
#	new_data_frame = NULL
#	new_data_frame = rbind(new_data_frame,first_line); colnames(new_data_frame) = colnames(om_simple$coefficients);
#	rownames(new_data_frame)[1] = 'Item'
#	new_data_frame = rbind(new_data_frame,om_simple$coefficients)
#	write.table(new_data_frame, file=file_out,sep='\t',quote=F,row.names=T,col.names=F)
#	cat("file_in:\t",file_in,"\nfile_out:\t",file_out,"\n")
}

xnam <- paste0("x", 1:25)
fmla <- as.formula(paste("y ~ ", paste(xnam, collapse= "+")))

 

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

R语言之 as.formula() 的相关文章

随机推荐

  • 聚类算法(K-means & AGNES & DBSCAN)

    一 聚类算法基本概念 1 定义 聚类就是按照某个特定标准 如距离准则 把一个数据集分割成不同的类或簇 使得同一个簇内的数据对象的相似性尽可能大 即聚类后同一类的数据尽可能聚集到一起 不同数据尽量分离 简单来讲就是把相似的东西分到一起 2 无
  • 哈工大2021机器学习期末考试题

    一 说明参数正则化和参数后验之间的联系 并解释在机器学习模型参数估计中使用正则化的目的 二 给出条件熵的定义 举一个本课程中应用该方法的例子 说明使用条件熵的好处 给出你的直观解释 三 朴素贝叶斯的基本假设是什么 有什么好处 当假设满足时
  • webpack : 无法加载文件 C:\Users\12987\AppData\Roaming\npm\webpack.ps1,因为在此系统上禁止运行脚本。

    通过查询综合了意见给出以下解决办法 webpack src index js o build built js mode development出现的问题 解决 1 管理员身份cmd输入 set ExecutionPolicy Remote
  • ubuntu 18.04 安装 opencv-2.4.13.6

    ubuntu 18 04 安装 opencv 2 4 13 6 1 opencv 2 4 13 6下载 2 安装opencv 2 4 13 6 1 解压opencv 2 4 13 6 zip到根目录下 2 安装opencv 2 4 13 6
  • 几个友好Java代码习惯建议

    我工作多年 遇到过各种各样的同事 我见过各种代码 优秀的 垃圾的 没有吸引力的等等 所以这篇文章记录了一个优秀的Java开发应该具备哪些良好的开发习惯或最佳实践 1 封装方法参数 当你的方法参数过多时 建议封装一个对象 下面是反面教材 谁教
  • 理解傅里叶分析

    一 什么是频域 从我们出生 我们看到的世界都以时间贯穿 股票的走势 人的身高 汽车的轨迹都会随着时间发生改变 这种以时间作为参照来观察动态世界的方法我们称其为时域分析 而我们也想当然的认为 世间万物都在随着时间不停的改变 并且永远不会静止下
  • 【Mybatis】mybatis3入门

    mybatis简介 MyBatis 是一款优秀的持久层框架 它支持定制化 SQL 存储过程以及高级映射 MyBatis 避免了几乎所有的 JDBC 代码和手动设置参数以及获取结果集 MyBatis 可以使用简单的 XML 或注解来配置和映射
  • 【经验分享】- 这是一份来自 IT 男的电脑使用建议

    这是一份来自 IT 男的电脑使用建议 1 写在前面 2018 年高考结束我拿到了第一台笔记本电脑 此前对电脑接触地并不多 因此在这几年的电脑使用过程中积累了一些个人使用经验和使用技巧想要分享给可能还是电脑小白的你 个人一直以来用的还是 Wi
  • 自己动手定制Chromium系列之四:Chromium 58的一个编译配置

    aec untrusted delay for testing Current value from the default false From third party webrtc modules audio processing BU
  • (成功解决)Python连接clickhouse

    第一次尝试用Python连接clickhouse数据库 踩了不少坑 特此记录 帮助后人少犯错 运行环境 python 3 8 3 clickhouse driver 0 2 3 clickhouse sqlalchemy 0 2 0 sql
  • Linux-(C/C++)动态链接库生成以及使用(libxxx.so)

    linux静态库生成与使用 http www cnblogs com johnice archive 2013 01 17 2864319 html Linux中so文件为共享库 与windows下dll类似 不过实现要简单 so可以供多个
  • 小熊错误_小熊派4G开发板初体验

    开发板硬件资源介绍 前阵子小熊派发布了一款超高性价比的4G开发板 但是板子仅限量1000套 小熊派官方给我送了一块 我们一起来学习学习 板子做得小巧精致 控制核心用的是移远的EC100Y LTE Cat1无线通信模组 该模组可对所有用户开放
  • Python开发环境Wing IDE如何使用调试功能

    在使用Wing IDE开始调试的时候 需要设置断点的行 读取GetItemCount函数的返回 这可以通过单击行并选择Break工具栏条目 或通过单击行左边的黑色边缘 断点应该以实心红圈的形式出现 接下来使用绿色箭头图标开始调试或在Debu
  • 基于微信小程序的健身小助手小程序

    文末联系获取源码 开发语言 Java 框架 ssm JDK版本 JDK1 8 服务器 tomcat7 数据库 mysql 5 7 8 0 数据库工具 Navicat11 开发软件 eclipse myeclipse idea Maven包
  • Mysql中分组后取最新的一条数据

    在 SQL 中 你可以使用子查询和 ORDER BY 子句来实现按照特定字段进行分组 并获取每个分组中最新的一条记录 SELECT t1 FROM your table t1 INNER JOIN SELECT id MAX timesta
  • 云技术平台赋能媒体融合发展创新

    欢迎大家前往腾讯云技术社区 获取更多腾讯海量技术实践干货哦 作者 熊普江 媒体行业是传统而又新兴的行业 在数字化 信息化 移动化快速演进的今天 无论是用户 社会还是行业 政府都意识到 传统媒体与新兴媒体融合发展是必然之路 但媒体融合需要内容
  • 2021-07-14 React 代码规范整理

    文章目录 React 代码规范 1 基础规则 2 组件声明 1 组件名称和定义该组件的文件名称建议要保持一致 2 不要使用 displayName 属性来定义组件的名称 应该在 class 或者 function 关键字后面直接声明组件的名
  • 烟花代码

    div div
  • 日志-Log4J

    日志 程序中的日志可以用来记录程序在运行的时候点点滴滴 并可以进行永久存储 日志和输出语句的区别 输出语句 日志技术 取消日志 需要修改代码 灵活性比较差 不需要修改代码 灵活性比较好 输出位置 只能是控制台 可以将日志信息写入到文件或者数
  • R语言之 as.formula()

    今天学到一个东西 R 语言回归函数里面的公式函数 as formula 其作用就是将字符串转换成公式 gt aa ReadCount Age BMI Sex HAMD 1 1 Sex gt aa 1 ReadCount Age BMI Se