更新的答案
这是一个data.table
版本答案避免使用for
loop.
dt <- data.table(id = c(1, 1, 1),
start = as.POSIXct(c("1970-01-10 06:01:16", "1970-01-10 12:01:16", "1970-01-10 09:34:49")),
end = as.POSIXct(c("1970-01-10 12:01:16", "1970-01-10 17:01:16", "1970-01-11 07:49:48")),
value = c(1, 0, 3))
time_seq <- dt[,.(start=unique(sort(c(start,end))))]
dt[, `:=`(
start_pos = sapply(start, function(x) which(x == time_seq$start)),
end_pos = sapply(end, function(x) which(x == time_seq$start))
)][,num := end_pos - start_pos]
# same size with expected result
dt_desired <- dt[rep(seq(.N), num)][,order:=rowid(num)]
dt_final <- dt_desired[order == 1,
.(id,
start = time_seq[start_pos:(end_pos - 1), start],
end = time_seq[(start_pos + 1):end_pos, start],
value),
by = num]
原答案
这是我非常直观的解决方案。关键是创建独特的有序时间序列time_seq,这是所有的连接start and end。然后循环所有行生成start and end from time_seq.
library(data.table)
dt <- data.table(id = c(1, 1, 1),
start = as.POSIXct(c("1970-01-10 06:01:16", "1970-01-10 12:01:16", "1970-01-10 09:34:49")),
end = as.POSIXct(c("1970-01-10 12:01:16", "1970-01-10 17:01:16", "1970-01-11 07:49:48")),
value = c(1, 0, 3))
# initialization
dt_tmp <- data.table()
tem <- data.table()
# unique full time series with order
time_seq <- data.table(start = unique(sort(c(dt$start,dt$end)) ))
for (i in 1:nrow(dt)){
# select date between start and end of the i-th row
tem <- time_seq[dt[i,start] <= start & start <= dt[i,end]]
len <- length(tem$start)
if(len > 2)
tmp <- data.table(id = dt[i,id],
start = tem[1:len-1,start],
end = tem[2:len,start],
value = dt[i,value])
else
tmp <- dt[i,]
dt_tmp <- rbind(dt_tmp,tmp)
}
dt_tmp