我们按“代码”列进行分组,然后为“付款”的每个唯一元素创建一个序列列,然后将序列也与“代码”添加到fill
前一个非 NA id 中“ID”中缺失的元素,ungroup
,删除“ID”仍然为 NA 的行(drop_na
),创建一个付款列abs
olute 值,按“代码”、“ID”和绝对付款列排序,然后按相同列分组,通过采用以下序列创建分组列sign
“付款”的值,添加为分组列,以及filter
仅具有单行的组
library(dplyr)
library(data.table)
library(tidyr)
library(lubridate)
df1 %>%
group_by(Code) %>%
filter(sum(payment) != 0) %>%
arrange(Code, abs(payment), !is.na(ID)) %>%
mutate(ind = rowid(payment)) %>%
group_by(ind, .add = TRUE) %>%
fill(ID, .direction = 'downup') %>%
ungroup %>%
drop_na(ID) %>%
mutate(absPayment = abs(payment)) %>%
arrange(ID, Code, absPayment) %>%
group_by(Code, ID, absPayment) %>%
mutate(grp = rowid(sign(payment))) %>%
group_by(grp, .add = TRUE) %>%
filter(n() == 1) %>%
ungroup %>%
select(names(df1))
-output
# A tibble: 5 × 4
Date payment Code ID
<chr> <dbl> <chr> <chr>
1 22/07/2002 200 M300 11
2 15/07/2002 1200 M567 111
3 17/07/2002 1200 M567 111
4 25/06/2002 1000 M567 98
5 02/07/2002 -1000 M567 M11
data
df1 <- structure(list(Date = c("24/06/2002", "24/06/2002", "25/06/2002",
"02/07/2002", "24/07/2002", "08/07/2002", "08/07/2002", "15/07/2002",
"17/07/2002", "22/07/2002", "22/07/2002", "28/07/2002", "29/07/2002"
), payment = c(200, 1000, 1000, -1000, -1000, 1200, -1200, 1200,
1200, 200, 56700, -56700, -200), Code = c("ABC", "M567", "M567",
"M567", "M567", "M567", "M567", "M567", "M567", "M300", "M678",
"M678", "ABC"), ID = c(NA, "98", "98", "M11", NA, "K999", "K999",
"111", "111", "11", "12345", NA, NA)), row.names = c(NA, -13L
), class = "data.frame")