您必须提供行索引和列索引:matrix
. cbind
可以用这两个向量创建行和列matrix
.
df2 <- data.frame(matrix( nrow = 3, ncol = 2 ))
df2[cbind(df1$row, df1$col)] <- df1$value
df2
# X1 X2
#1 3 1
#2 NA 4
#3 5 2
或者更好(更快,更少的内存消耗)填充matrix
然后将其转换为data.frame
.
df2 <- matrix(NA_integer_, 3, 2)
df2[cbind(df1$row, df1$col)] <- df1$value
df2 <- as.data.frame(df2)
或者一步一步完成。
df2 <- as.data.frame(`[<-`(matrix(NA, 3, 2), cbind(df1$row, df1$col), df1$value))
或者计算向量中索引的位置。
df2 <- matrix(NA_integer_, 3, 2)
df2[df1$row + (df1$col - 1L) * 3L] <- df1$value
df2 <- as.data.frame(df2)
基准
set.seed(0)
NR <- 10000L
NC <- 100L
df1 <- cbind(expand.grid(row=seq_len(NR), col=seq_len(NC)), value=
sample(0:9, NR*NC, TRUE))[sample(NR*NC, floor(0.9 * NR * NC)),]
library(tidyverse)
library(data.table)
library(Matrix)
bench::mark(check=FALSE,
cbindTwoStep = {df2 <- data.frame(matrix( nrow = NR, ncol = NC ))
df2[cbind(df1$row, df1$col)] <- df1$value} ,
cbindMatTwoStep = {df2 <- matrix(NA_integer_, NR, NC)
df2[cbind(df1$row, df1$col)] <- df1$value
df2 <- as.data.frame(df2)} ,
cbindMatDF = df2 <- data.frame(`[<-`(matrix(NA_integer_, NR, NC), cbind(df1$row, df1$col), df1$value)),
cbindMatADF = df2 <- as.data.frame(`[<-`(matrix(NA_integer_, NR, NC), cbind(df1$row, df1$col), df1$value)),
vecIndex = {df2 <- matrix(NA_integer_, NR, NC)
df2[df1$row + (df1$col - 1L) * NR] <- df1$value
df2 <- as.data.frame(df2)},
pivot = {df2 <- df1 |>
arrange(row, col) |>
pivot_wider(names_from = col, values_from = value) |>
select(!row)},
data.table = df2 <- dcast(as.data.table(df1), row ~ paste("Column", col))[, -1],
sparseMatrix = df2 <- as.matrix(sparseMatrix(i = df1[, 1], j = df1[, 2], x = df1[, 3])),
xtabs = df2 <- as.data.frame.matrix(xtabs(value ~ ., df1)) )
Result
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
1 cbindTwoStep 59.99ms 132.42ms 6.41 79.2MB 11.2 4 7
2 cbindMatTwoStep 8.6ms 9.48ms 92.5 21.8MB 21.6 47 11
3 cbindMatDF 8.63ms 9.51ms 61.2 21.8MB 13.8 31 7
4 cbindMatADF 8.24ms 9.4ms 95.7 21.8MB 19.9 48 10
5 vecIndex 8.12ms 9ms 97.7 14.9MB 19.9 49 10
6 pivot 97.59ms 99.75ms 9.99 92.6MB 12.0 5 6
7 data.table 171.63ms 172.34ms 5.41 88.4MB 5.41 3 3
8 sparseMatrix 23.73ms 30.18ms 17.6 38.5MB 7.82 9 4
9 xtabs 1.1s 1.1s 0.909 269.9MB 5.46 1 6
填写中的值matrix
using cbind
然后将其转换为data.frame
比填充 a 更高效data.frame
。计算向量上的索引稍快一些,并且使用的额外内存量最少。