可能的方法(注意 20 x A 的存在以检查是否返回了两个)。
library(tidyverse)
# Data (note 20 As)
var = c(rep("A",20),rep("B",10),rep("C",3),rep("D",5),"E","F")
value = rnorm(40)
dat = tibble(var, value)
# Possible approach
dat %>%
group_by(var) %>%
mutate(min = if_else(n() * 0.1 >= 1, n() * 0.1, 1),
random = sample(n())) %>%
filter(random <= min) |>
select(var, value)
#> # A tibble: 7 × 2
#> # Groups: var [6]
#> var value
#> <chr> <dbl>
#> 1 A 0.0105
#> 2 A 0.171
#> 3 B -1.89
#> 4 C 1.89
#> 5 D 0.612
#> 6 E 0.516
#> 7 F 0.185
Created on 2022-06-02 by the reprex package https://reprex.tidyverse.org (v2.0.1)
加权版本:
library(tidyverse)
# Data (note 20 As)
var = c(rep("A",20),rep("B",10),rep("C",3),rep("D",5),"E","F")
value = rnorm(40)
dat = tibble(var, value)
# Possible approach
dat %>%
add_count(name = "n_all") %>%
group_by(var) %>%
mutate(
weight = n() / n_all,
min = if_else(n() * weight >= 1, n() * weight, 1),
random = sample(n())
) %>%
filter(random <= min) |>
select(var, value)
#> # A tibble: 16 × 2
#> # Groups: var [6]
#> var value
#> <chr> <dbl>
#> 1 A 0.339
#> 2 A 1.77
#> 3 A -0.145
#> 4 A -0.915
#> 5 A 0.146
#> 6 A 0.896
#> 7 A -0.407
#> 8 A -1.30
#> 9 A 1.22
#> 10 A 0.0527
#> 11 B -0.602
#> 12 B -0.432
#> 13 C -0.0540
#> 14 D -1.45
#> 15 E 1.54
#> 16 F 0.879
Created on 2022-06-09 by the reprex package https://reprex.tidyverse.org (v2.0.1)