这似乎有效(但我认为有更快的sql
方法到此)
sql.l <- lapply(filelist , file)
df_list2 <- lapply(sql.l, function(i) sqldf("select * from i" ,
dbname = tempfile(), file.format = list(header = TRUE, row.names = FALSE)))
查看速度 - 部分摘自 mnel 的帖子在 R 中快速读取非常大的表作为数据帧 https://stackoverflow.com/questions/1727772/quickly-reading-very-large-tables-as-dataframes-in-r/1820610#1820610
library(data.table)
library(sqldf)
# test data
n=1e6
DT = data.table( a=sample(1:1000,n,replace=TRUE),
b=sample(1:1000,n,replace=TRUE),
c=rnorm(n),
d=sample(c("foo","bar","baz","qux","quux"),n,replace=TRUE),
e=rnorm(n),
f=sample(1:1000,n,replace=TRUE) )
# write 5 files out
lapply(1:5, function(i) write.table(DT,paste0("test", i, ".dat"),
sep=",",row.names=FALSE,quote=FALSE))
read: 数据表
filelist <- list.files(pattern = "*.dat")
system.time(df_list <- lapply(filelist, fread))
# user system elapsed
# 5.244 0.200 5.457
read: sqldf
sql.l <- lapply(filelist , file)
system.time(df_list2 <- lapply(sql.l, function(i) sqldf("select * from i" ,
dbname = tempfile(), file.format = list(header = TRUE, row.names = FALSE))))
# user system elapsed
# 35.594 1.432 37.357
检查 - 除了属性之外似乎没问题
all.equal(df_list , df_list2)