这是一种不需要生成和洗牌一个巨大列表的方法,以防万一N
很大但是k
is not:
std::vector<int> pick(int N, int k) {
std::random_device rd;
std::mt19937 gen(rd());
std::unordered_set<int> elems = pickSet(N, k, gen);
// ok, now we have a set of k elements. but now
// it's in a [unknown] deterministic order.
// so we have to shuffle it:
std::vector<int> result(elems.begin(), elems.end());
std::shuffle(result.begin(), result.end(), gen);
return result;
}
现在实施的天真的方法pickSet
is:
std::unordered_set<int> pickSet(int N, int k, std::mt19937& gen)
{
std::uniform_int_distribution<> dis(1, N);
std::unordered_set<int> elems;
while (elems.size() < k) {
elems.insert(dis(gen));
}
return elems;
}
But if k
相对于N
,该算法可能会导致大量冲突,并且速度可能相当慢。我们可以做得更好,保证我们可以在每次插入时添加一个元素(由罗伯特·弗洛伊德 http://www.nowherenearithaca.com/2013/05/robert-floyds-tiny-and-beautiful.html):
std::unordered_set<int> pickSet(int N, int k, std::mt19937& gen)
{
std::unordered_set<int> elems;
for (int r = N - k; r < N; ++r) {
int v = std::uniform_int_distribution<>(0, r)(gen);
// there are two cases.
// v is not in candidates ==> add it
// v is in candidates ==> well, r is definitely not, because
// this is the first iteration in the loop that we could've
// picked something that big.
if (!elems.insert(v).second) {
elems.insert(r);
}
}
return elems;
}