我正在尝试使用运行大量回归dplyr
对于个人 ID (cusip) 和年份 (fyear) 但我不知道如何利用summary
功能。我需要运行模型,获取系数,将它们加在一起,然后mutate
结果到另一个变量beta
。这是一些代码,虽然不起作用,但可以理解这个想法。
可重现的例子:
tdata <- structure(list(cusip = c("02136810", "02136810", "02136810",
"02136810", "02136810", "02136810", "02136810", "02136810", "02136810",
"02136810", "02136810", "02136810", "02136810", "02136810", "02136810",
"02136810", "02136810", "02136810", "02136810", "02136810", "02136810",
"02136810", "02136810", "02136810", "01650910", "01650910", "01650910",
"01650910", "01650910", "01650910"), fyear = c(1979L, 1979L,
1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L, 1979L,
1979L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L, 1980L,
1980L, 1980L, 1980L, 1980L, 1965L, 1965L, 1965L, 1965L, 1965L,
1965L), ret = c("0.000000", "0.000000", "0.111111", "-0.063636",
"0.203883", "0.032258", "0.078125", "0.000000", "-0.014493",
"-0.014706", "0.044776", "0.457143", "0.039216", "-0.009434",
"-0.200000", "-0.047619", "0.100000", "0.022727", "0.144444",
"0.067961", "-0.009091", "0.009174", "0.109091", "-0.077869",
"0.418182", "-0.089744", "0.014085", "-0.041667", "-0.086957",
"0.000000"), vwretd = c(0.049489, -0.026766, 0.065618, 0.008522,
-0.013576, 0.04685, 0.014991, 0.064728, 0.001428, -0.07266, 0.063603,
0.028212, 0.065607, 0.001015, -0.120224, 0.052288, 0.06009, 0.037714,
0.069438, 0.023553, 0.029498, 0.020093, 0.104951, -0.034409,
0.038646, 0.006946, -0.009715, 0.033652, -0.00435, -0.051868),
date = c(19790131L, 19790228L, 19790330L, 19790430L, 19790531L,
19790629L, 19790731L, 19790831L, 19790928L, 19791031L, 19791130L,
19791231L, 19800131L, 19800229L, 19800331L, 19800430L, 19800530L,
19800630L, 19800731L, 19800829L, 19800930L, 19801031L, 19801128L,
19801231L, 19650129L, 19650226L, 19650331L, 19650430L, 19650528L,
19650630L)), .Names = c("cusip", "fyear", "ret", "vwretd",
"date"), row.names = c(NA, 30L), class = "data.frame")
dplyr 代码:
test <- tdata %>%
group_by(cusip, fyear) %>%
arrange(desc(date) %>%
summary(fm <- lm(ret ~ vwretd + lag(vwretd), data = tdata)) %>%
mutate(beta <- summary(fm)$coefficients[2,1] + summary(fm)$coefficients[3,1])
Edit :
样本数据 :https://www.dropbox.com/s/4padnsjjnt4uvy2/tdata.csv?dl=0 https://www.dropbox.com/s/4padnsjjnt4uvy2/tdata.csv?dl=0
完整样本:https://www.dropbox.com/s/4padnsjjnt4uvy2/tdata.csv?dl=0 https://www.dropbox.com/s/4padnsjjnt4uvy2/tdata.csv?dl=0