Вставьте boost :: dynamic_bitset & lt; & gt; в повышение :: BIMAP

Я пытаюсь вставить boost::dynamic_bitset<> в boost::bimap, Тем не менее, это очень медленно по сравнению со вставкой целых чисел или строк. Минимальный пример указан код, который показан ниже

// Example program
#include <iostream>
#include <string>
#include <boost/bimap.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>namespace std {
template <typename Block, typename Alloc> struct hash<boost::dynamic_bitset<Block, Alloc> > {
size_t operator()(boost::dynamic_bitset<Block, Alloc> const& bs) const {
size_t seed = boost::hash_value(bs.size());

std::vector<Block> blocks(bs.num_blocks());
boost::hash_range(seed, blocks.begin(), blocks.end());

return seed;
}
};
}namespace bimaps = boost::bimaps;
typedef boost::dynamic_bitset<> Bitset;
typedef boost::bimap<
bimaps::unordered_set_of<Bitset, std::hash<Bitset>>,
bimaps::unordered_multiset_of<Bitset, std::hash<Bitset> > > bimap_reference;
typedef bimap_reference::value_type position;
bimap_reference reference_index_vector;class bitsets{
public:
void encode_string(
std::string &sequence_content,
std::string &binary_sequence);

void encode_positions(
std::string &pos,
std::string &pos_binary_sequence);

};int main()
{
bitsets b;
std::string binary_sequence, decoded_string ;
std::string pos_binary_sequence, decoded_positions;
std::string sequence_content = "ADGFGGFFAAACGFGCAAFGCAAFGCAFGCAAFGCAFGCAAGGGCAGDDDCGGAFFGCA";

for(size_t i = 0; i < sequence_content.size(); i++){
std::string substr = sequence_content.substr(i,15);
b.encode_string(substr, binary_sequence);
boost::dynamic_bitset<> bits = boost::dynamic_bitset<> (binary_sequence);
std::string pos = std::to_string(i);
b.encode_positions(pos, pos_binary_sequence);
boost::dynamic_bitset<> pos_bits = boost::dynamic_bitset<> (pos_binary_sequence);
reference_index_vector.insert(position(pos_bits, bits));
binary_sequence.clear();
pos_binary_sequence.clear();
i += 14;
}
for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
}void bitsets::encode_string(std::string &substr, std::string &binary_sequence){
for (size_t i = 0; i < substr.size(); ++i){
switch (substr[i]){
case 'A':
case 'a':
binary_sequence += "00";
break;
case 'C':
case 'c':
binary_sequence += "01";
break;
case 'D':
case 'd':
binary_sequence += "10";
break;
case 'G':
case 'g':
binary_sequence += "110";
break;
case 'F':
case 'f':
binary_sequence += "110";
break;
}
}
}

void bitsets::encode_positions(std::string &pos, std::string &pos_binary_sequence){
for(size_t i = 0; i < pos.size(); ++i){
switch (pos[i]){
case '0':
pos_binary_sequence += "1101";
break;
case '1':
pos_binary_sequence += "100";
break;
case '2':
pos_binary_sequence += "1110";
break;
case '3':
pos_binary_sequence += "1100";
break;
case '4':
pos_binary_sequence += "101";
break;
case '5':
pos_binary_sequence += "000";
break;
case '6':
pos_binary_sequence += "001";
break;
case '7':
pos_binary_sequence += "011";
break;
case '8':
pos_binary_sequence += "1111";
break;
case '9':
pos_binary_sequence += "010";
break;
}
}
}

Для строки длиной 5 миллионов символов требуется много времени (около 5 минут), а иногда целых несколько строк для вставки в bimap, Зачем boost::dynamic_bitset<> медленно и как я могу улучшить это.

0

Решение

Я немного реорганизовал, исправил хэш-функцию (я думаю — пожалуйста, проверьте ее) и запустил тест для последовательности из 5 000 000 случайных штуковин (jleles?)

С выпуском сборки тест проходил за 900 миллисекунд — менее одной секунды.

Вот код Я не претендую на правильность или эффективность алгоритма. Я подозреваю, что есть место для улучшения.

// Example program
#include <iostream>
#include <string>
#include <boost/bimap.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/bimap/unordered_set_of.hpp>
#include <boost/bimap/unordered_multiset_of.hpp>
#include <random>
#include <chrono>
#include <thread>

namespace std {
template <typename Block, typename Alloc>
struct hash<boost::dynamic_bitset<Block, Alloc> > {

using bitset_type = boost::dynamic_bitset<Block, Alloc>;
using block_type = typename bitset_type::block_type ;

size_t operator()(boost::dynamic_bitset<Block, Alloc> const& bs) const
{
thread_local static std::vector<block_type> block_data;
auto blocks = bs.num_blocks();
block_data.assign(blocks, 0);
to_block_range(bs, block_data.begin());
return boost::hash<std::vector<block_type>>()(block_data);
}
};
}namespace bimaps = boost::bimaps;
typedef boost::dynamic_bitset<> Bitset;
typedef boost::bimap<
bimaps::unordered_set_of<Bitset, std::hash<Bitset>>,
bimaps::unordered_multiset_of<Bitset, std::hash<Bitset> > > bimap_reference;
typedef bimap_reference::value_type position;class bitsets{
public:
void encode_string(
std::string &sequence_content,
std::string &binary_sequence);

void encode_positions(
std::string &pos,
std::string &pos_binary_sequence);

};

bimap_reference test(const std::string& sequence_content)
{
bimap_reference reference_index_vector;

bitsets b;
std::string binary_sequence, decoded_string ;
std::string pos_binary_sequence, decoded_positions;

for(size_t i = 0; i < sequence_content.size(); i++){
std::string substr = sequence_content.substr(i,15);
b.encode_string(substr, binary_sequence);
boost::dynamic_bitset<> bits = boost::dynamic_bitset<> (binary_sequence);
std::string pos = std::to_string(i);
b.encode_positions(pos, pos_binary_sequence);
boost::dynamic_bitset<> pos_bits = boost::dynamic_bitset<> (pos_binary_sequence);
reference_index_vector.insert(position(pos_bits, bits));
binary_sequence.clear();
pos_binary_sequence.clear();
i += 14;
}

return reference_index_vector;
}int main()
{
static const std::string valid_chars = "ADGF";
std::random_device rd;
auto eng = std::default_random_engine(rd());
auto dist = std::uniform_int_distribution<int>(0, valid_chars.size() - 1);

static const auto sequence_size = std::size_t(5'000'000);

auto sequence_content = std::string(sequence_size, ' ');
std::generate(sequence_content.begin(), sequence_content.end(), [&]{ return valid_chars[dist(eng)]; });

auto start_time = std::chrono::system_clock::now();
auto reference_index_vector = test(sequence_content);

auto end_time = std::chrono::system_clock::now();

std::cout
<< "test took: "<< std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count()
<< " milliseconds\n";
std::this_thread::sleep_for(std::chrono::seconds(5));for( bimap_reference::const_iterator iter = reference_index_vector.begin(), iend = reference_index_vector.end();
iter != iend; ++iter ) {
std::cout << iter->left << " <--> "<< iter->right <<std::endl;
}
}void bitsets::encode_string(std::string &substr, std::string &binary_sequence){
for (size_t i = 0; i < substr.size(); ++i){
switch (substr[i]){
case 'A':
case 'a':
binary_sequence += "00";
break;
case 'C':
case 'c':
binary_sequence += "01";
break;
case 'D':
case 'd':
binary_sequence += "10";
break;
case 'G':
case 'g':
binary_sequence += "110";
break;
case 'F':
case 'f':
binary_sequence += "110";
break;
}
}
}

void bitsets::encode_positions(std::string &pos, std::string &pos_binary_sequence){
for(size_t i = 0; i < pos.size(); ++i){
switch (pos[i]){
case '0':
pos_binary_sequence += "1101";
break;
case '1':
pos_binary_sequence += "100";
break;
case '2':
pos_binary_sequence += "1110";
break;
case '3':
pos_binary_sequence += "1100";
break;
case '4':
pos_binary_sequence += "101";
break;
case '5':
pos_binary_sequence += "000";
break;
case '6':
pos_binary_sequence += "001";
break;
case '7':
pos_binary_sequence += "011";
break;
case '8':
pos_binary_sequence += "1111";
break;
case '9':
pos_binary_sequence += "010";
break;
}
}
}
2

Другие решения

Других решений пока нет …

По вопросам рекламы [email protected]