我目前正在尝试为每个分组列选择非连续日期。
换句话说,我有以下数据框:
我基本上想group_by(Site)
然后为每个分组站点仅保留 3 个随机非连续日期。例如,如果 HP37P1B 的日期对应于 3 月 12 日、3 月 13 日、3 月 14 日和 3 月 7 日 - 我需要一个仅包含以下内容的数据框(例如):
HP37P1B 12th March
HP37P1B 14th March
HP37P1B 7th March
到目前为止,我已经尝试了一些使用 stackoverflow 的帖子diff()
, ave()
,以及lubridate
包,但我还没有取得任何成功。
EDIT
根据下面的评论,我试图使这个问题可重现
dput(uniqueSiteDate)
structure(list(Site = c("HP37P1B", "HP37P2B", "HP37P4B", "HP4008U",
"INME03R", "INME03U", "INOA03R", "IPTO04R", "IPTO04U", "IPTO06R",
"IPTO06U", "OLCAP2B", "OLCAP3B", "OLCAP5B", "PANMP1B", "PANMP2B",
"PANMP3B", "STIN02R", "STIN02U", "UPMAP1B", "UPMAP3B", "UPMAP4B",
"UPMAP5B", "UPMAP6B", "VAR210R", "VAR310R", "VAR310U", "VAR410R",
"VAR410U", "HP36P1B", "HP36P3B", "HP36P4B", "HP4008R", "INBS04R",
"INBS04U", "SEL107R", "SEL107U", "SEL207R", "SEL207U", "OLV110R",
"OLV110U", "OLV208R", "OLV208U", "THEN10U", "HP37P1B", "HP37P2B",
"HP37P4B", "HP4008U", "INME03R", "INME03U", "INOA03R", "IPTO04R",
"IPTO04U", "IPTO06R", "IPTO06U", "OLCAP2B", "OLCAP3B", "OLCAP5B",
"PANMP1B", "PANMP2B", "PANMP3B", "STIN02R", "STIN02U", "UPMAP1B",
"UPMAP3B", "UPMAP4B", "UPMAP5B", "UPMAP6B", "VAR210R", "VAR310R",
"VAR310U", "VAR410R", "VAR410U", "OLV110R", "OLV110U", "OLV208R",
"OLV208U", "THEN10U", "HP37P1B", "HP37P2B", "HP37P4B", "HP4008U",
"INME03R", "INME03U", "INOA03R", "IPTO04R", "IPTO04U", "IPTO06R",
"IPTO06U", "OLCAP2B", "OLCAP3B", "OLCAP5B", "PANMP1B", "PANMP2B",
"PANMP3B", "STIN02R", "STIN02U", "UPMAP1B", "UPMAP3B", "UPMAP4B",
"UPMAP5B", "UPMAP6B", "VAR210R", "VAR310R", "VAR310U", "VAR410R",
"VAR410U", "OLV110R", "OLV110U", "OLV208R", "OLV208U", "THEN10U",
"HP37P1B", "HP37P2B", "HP37P4B", "HP4008U", "INME03R", "INME03U",
"INOA03R", "IPTO04R", "IPTO04U", "IPTO06R", "IPTO06U", "OLCAP2B",
"OLCAP3B"), Date = structure(c(18333, 18333, 18333, 18333, 18335,
18335, 18335, 18338, 18335, 18338, 18335, 18333, 18333, 18333,
18334, 18334, 18334, 18331, 18331, 18331, 18330, 18330, 18330,
18330, 18332, 18332, 18332, 18332, 18332, 18325, 18325, 18325,
18325, 18327, 18327, 18327, 18327, 18327, 18328, 18340, 18340,
18340, 18340, 18340, 18334, 18334, 18334, 18334, 18336, 18336,
18336, 18339, 18336, 18340, 18336, 18335, 18334, 18334, 18335,
18335, 18335, 18332, 18332, 18332, 18331, 18331, 18331, 18331,
18333, 18333, 18333, 18333, 18333, 18341, 18341, 18341, 18341,
18341, 18335, 18335, 18335, 18335, 18383, 18383, 18383, 18384,
18384, 18384, 18384, 18385, 18385, 18335, 18342, 18342, 18341,
18383, 18383, 18345, 18349, 18349, 18349, 18349, 18340, 18339,
18340, 18341, 18339, 18386, 18386, 18348, 18346, 18347, 18328,
18328, 18328, 18328, 18390, 18389, 18391, 18392, 18392, 18392,
18392, 18392, 18392), class = "Date")), row.names = c(NA, -125L
), groups = structure(list(Site = c("HP36P1B", "HP36P3B", "HP36P4B",
"HP37P1B", "HP37P2B", "HP37P4B", "HP4008R", "HP4008U", "INBS04R",
"INBS04U", "INME03R", "INME03U", "INOA03R", "IPTO04R", "IPTO04U",
"IPTO06R", "IPTO06U", "OLCAP2B", "OLCAP3B", "OLCAP5B", "OLV110R",
"OLV110U", "OLV208R", "OLV208U", "PANMP1B", "PANMP2B", "PANMP3B",
"SEL107R", "SEL107U", "SEL207R", "SEL207U", "STIN02R", "STIN02U",
"THEN10U", "UPMAP1B", "UPMAP3B", "UPMAP4B", "UPMAP5B", "UPMAP6B",
"VAR210R", "VAR310R", "VAR310U", "VAR410R", "VAR410U"), .rows = structure(list(
30L, 31L, 32L, c(1L, 45L, 79L, 113L), c(2L, 46L, 80L, 114L
), c(3L, 47L, 81L, 115L), 33L, c(4L, 48L, 82L, 116L), 34L,
35L, c(5L, 49L, 83L, 117L), c(6L, 50L, 84L, 118L), c(7L,
51L, 85L, 119L), c(8L, 52L, 86L, 120L), c(9L, 53L, 87L, 121L
), c(10L, 54L, 88L, 122L), c(11L, 55L, 89L, 123L), c(12L,
56L, 90L, 124L), c(13L, 57L, 91L, 125L), c(14L, 58L, 92L),
c(40L, 74L, 108L), c(41L, 75L, 109L), c(42L, 76L, 110L),
c(43L, 77L, 111L), c(15L, 59L, 93L), c(16L, 60L, 94L), c(17L,
61L, 95L), 36L, 37L, 38L, 39L, c(18L, 62L, 96L), c(19L, 63L,
97L), c(44L, 78L, 112L), c(20L, 64L, 98L), c(21L, 65L, 99L
), c(22L, 66L, 100L), c(23L, 67L, 101L), c(24L, 68L, 102L
), c(25L, 69L, 103L), c(26L, 70L, 104L), c(27L, 71L, 105L
), c(28L, 72L, 106L), c(29L, 73L, 107L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -44L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
为了回答其他问题,有时每个网站有超过 3 个日期,但有时每个网站只有 1 个日期。但想法是选择给定站点的 n 个非连续日期。换句话说,如果某个特定网站有 4 个日期,我需要 3 个不连续的日期。如果某个特定网站只有 1 个日期,我们就将其保留。