这是一种方法Map
or mapply
:
让我们先制作一些玩具数据:
dt <- data.table(
variable1 = rnorm(100),
variable2 = rnorm(100),
variable3 = rnorm(100),
variable4 = rnorm(100),
grp = sample(letters[1:5], 100, replace = T)
)
colsToMean <- c("variable1", "variable2")
colsToMax <- c("variable3")
colsToSd <- c("variable4")
Then,
scols <- list(colsToMean, colsToMax, colsToSd)
funs <- rep(c(mean, max, sd), lengths(scols))
# summary
dt[, Map(function(f, x) f(x), funs, .SD), by = grp, .SDcols = unlist(scols)]
# or replace the original values with summary statistics as in OP
dt[, unlist(scols) := Map(function(f, x) f(x), funs, .SD), by = grp, .SDcols = unlist(scols)]
GForce 打开的另一个选项:
scols <- list(colsToMean, colsToMax, colsToSd)
funs <- rep(c('mean', 'max', 'sd'), lengths(scols))
jexp <- paste0('list(', paste0(funs, '(', unlist(scols), ')', collapse = ', '), ')')
dt[, eval(parse(text = jexp)), by = grp, verbose = TRUE]
# Detected that j uses these columns: variable1,variable2,variable3,variable4
# Finding groups using forderv ... 0.000sec
# Finding group sizes from the positions (can be avoided to save RAM) ... 0.000sec
# Getting back original order ... 0.000sec
# lapply optimization is on, j unchanged as 'list(mean(variable1), mean(variable2), max(variable3), sd(variable4))'
# GForce optimized j to 'list(gmean(variable1), gmean(variable2), gmax(variable3), gsd(variable4))'
# Making each group and running j (GForce TRUE) ... 0.000sec