我在试图解决这个看似简单的问题时感到沮丧。我有一个像这样的数据集(df):
structure(list(Year = c(2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L), Unknown = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), Temp = c(21L, 21L, 21L, 23L, 23L, 21L, 21L, 22L, 21L, 23L,
23L, 22L, 21L, 21L, 22L, 22L, 21L, 21L, 23L, 23L), Obs = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L), .Label = c("mdk", "sde"), class = "factor"), State = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "ma", class = "factor"), Zone = c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), Segment = c(8L, 7L, 4L, 17L, 18L, 7L, 2L, 12L, 1L, 17L,
18L, 12L, 9L, 7L, 13L, 11L, 8L, 9L, 17L, 18L), Subseg = c(1L,
3L, 3L, 2L, 2L, 2L, 4L, 0L, 10L, 4L, 2L, 0L, 1L, 1L, 3L, 1L,
2L, 2L, 1L, 1L), Wdir = structure(c(2L, 2L, 1L, 3L, 3L, 2L, 2L,
1L, 2L, 3L, 3L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("na",
"ne", "nw"), class = "factor"), Wvel = structure(c(1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L), .Label = c("5", "na"), class = "factor"), Clouds = structure(c(1L,
1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L,
1L, 3L, 3L), .Label = c("1", "4", "na"), class = "factor"), Temp.1 = structure(c(1L,
1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L,
1L, 3L, 3L), .Label = c("20", "25", "na"), class = "factor"),
Species = structure(c(7L, 21L, 1L, 21L, 16L, 4L, 16L, 6L,
1L, 17L, 5L, 7L, 5L, 1L, 1L, 6L, 7L, 7L, 24L, 5L), .Label = c("ABDU",
"ABDU", "ABDU", "ABDU", "ABDU", "CAGO", "CAGO", "CAGO", "CAGO",
"CAGO", "GOLD", "GOLD", "GOLD", "GOLD", "GOLD", "MERG", "MERG",
"MERG", "MERG", "MERG", "SCOT", "SCOT", "SCOT", "SCOT",
"SCOT", "SCOT", "SCOT"), class = "factor"), Count = c(5L,
1L, 150L, 3L, 20L, 8L, 5L, 10L, 5L, 1L, 20L, 10L, 2L, 2L,
80L, 40L, 1L, 1000L, 2L, 20L)), .Names = c("Year", "Unknown",
"Temp", "Obs", "State", "Zone", "Segment", "Subseg", "Wdir",
"Wvel", "Clouds", "Temp.1", "Species", "Count"), row.names = c(666L,
614L, 2060L, 1738L, 1459L, 536L, 197L, 2467L, 98L, 1794L, 1449L,
2464L, 696L, 483L, 2644L, 2350L, 686L, 844L, 2989L, 2934L), class = "data.frame")
标题看起来像这样:
Year Unknown Temp Obs State Zone Segment Subseg Wdir Wvel
666 2015 1 21 mdk ma 2 8 1 ne 5
614 2015 1 21 mdk ma 2 7 3 ne 5
2060 2015 1 21 sde ma 2 4 3 na na
1738 2015 1 23 mdk ma 2 17 2 nw 5
1459 2015 1 23 mdk ma 2 18 2 nw 5
536 2015 1 21 mdk ma 2 7 2 ne 5
Clouds Temp.1 Species Count
666 1 20 CAGO 5
614 1 20 SCOT 1
2060 na na ABDU 150
1738 1 20 SCOT 3
1459 1 20 MERG 20
536 1 20 ABDU 8
除 dplyr 中的其他内容外,当我按段分组时,我希望将每个物种的总和作为新列。这是我尝试过多种变体的最终代码。
df_group = df %>%
group_by(Segment) %>%
summarise(temp = round(mean(Temp)),
WDir = round(mean(Wdir)),
ABDU = sum(which(Species=="ABDU"),Count),
CAGO = sum(which(Species=="CAGO"),Count),
GOLD = sum(which(Species=="GOLD"),Count),
MERG = sum(which(Species=="MERG"),Count),
SCOT = sum(which(Species=="SCOT"),Count))
这就是我得到的(显示正确的格式):
Segment temp WDir ABDU CAGO GOLD MERG SCOT
1 1 21 2 6 5 5 5 5
2 2 21 2 5 5 5 6 5
3 4 21 1 151 150 150 150 150
4 7 21 2 16 11 11 11 12
5 8 21 2 6 9 6 6 6
6 9 21 2 1003 1004 1002 1002 1002
格式和总体思路是我想要的,但数字并没有按照我想要的方式相加。我确信这很简单,但需要一些帮助!谢谢。