C ++: выборка из дискретного распределения без замены

Question

C ++: выборка из дискретного распределения без замены

Я хотел бы сделать выборку из дискретного распределения без замены (то есть без повторения).

С функцией discrete_distribution, Можно попробовать с заменой. И с помощью этой функции я реализовал выборку без замены очень грубым способом:

#include <iostream>
#include <random>
#include <vector>
#include <array>

int main()
{
const int sampleSize = 8;   // Size of the sample
std::vector<double> weights = {2,2,1,1,2,2,1,1,2,2}; // 10 possible outcome with different weights

std::random_device rd;
std::mt19937 generator(rd());

/// WITH REPLACEMENT

std::discrete_distribution<int> distribution(weights.begin(), weights.end());

std::array<int, 10> p ={};
for(int i=0; i<sampleSize; ++i){
int number = distribution(generator);
++p[number];
}

std::cout << "Discrete_distribution with replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;/// WITHOUT REPLACEMENT

p = {};
for(int i=0; i<sampleSize; ++i){
std::discrete_distribution<int> distribution(weights.begin(), weights.end());
int number = distribution(generator);
weights[number] = 0; // the weight associate to the sampled value is set to 0
++p[number];
}

std::cout << "Discrete_distribution without replacement:" << std::endl;
for (int i=0; i<10; ++i)
std::cout << i << ": " << std::string(p[i],'*') << std::endl;return 0;
}

Вы когда-нибудь кодировали такую выборку без замены? Возможно, более оптимизированным способом?

Спасибо.

Ура,

T.A.

1

c++distribution sampling

Решение

Другие решения

Других решений пока нет …

Источник

Accepted Answer

Это решение может быть немного короче. К сожалению, для этого нужно создать discrete_distribution<> объект на каждом шаге, что может быть непозволительно при рисовании большого количества образцов.

#include <iostream>
#include <boost/random/discrete_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>

using namespace boost::random;

int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 };
discrete_distribution<> dist(w);
int n = 10;
boost::random::mt19937 gen;
std::vector<int> samples;
for (auto i = 0; i < n; i++) {
samples.push_back(dist(gen));
w[*samples.rbegin()] = 0;
dist = discrete_distribution<>(w);
}
for (auto iter : samples) {
std::cout << iter << " ";
}

return 0;
}

Улучшенный ответ:

После тщательного поиска аналогичного вопроса на этом сайте (Более быстрый взвешенный отбор без замены), Я нашел потрясающе простой алгоритм для взвешенной выборки без замены, его немного сложно реализовать в C ++. Обратите внимание, что это не самый эффективный алгоритм, но он мне кажется самым простым для реализации.

В https://doi.org/10.1016/j.ipl.2005.11.003 метод подробно описан.

Особенно это неэффективно, если размер выборки намного меньше, чем основная популяция.

#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>

using namespace boost::random;

int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 10 };
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(), [&dist,&gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(), [&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair<double,size_t>(v,index++); });
std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(samples), [](auto v) { return v.second; });

for (auto iter : samples) {
std::cout << iter << " ";
}

return 0;
}

Проще ответить

Я просто удалил некоторые функции STL и заменил их простыми циклами for.

#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <algorithm>

using namespace boost::random;

int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 1000 };
uniform_01<> dist;
boost::random::mt19937 gen(342575235);
std::vector<double> vals;
for (auto iter : w) {
vals.push_back(std::pow(dist(gen), 1. / iter));
}
// Sorting vals, but retain the indices.
// There is unfortunately no easy way to do this with STL.
std::vector<std::pair<int, double>> valsWithIndices;
for (size_t iter = 0; iter < vals.size(); iter++) {
valsWithIndices.emplace_back(iter, vals[iter]);
}
std::sort(valsWithIndices.begin(), valsWithIndices.end(), [](auto x, auto y) {return x.second > y.second; });

std::vector<size_t> samples;
int sampleSize = 8;
for (auto iter = 0; iter < sampleSize; iter++) {
samples.push_back(valsWithIndices[iter].first);
}
for (auto iter : samples) {
std::cout << iter << " ";
}

return 0;
}

0