Save state of c++11 random generator without using iostream - c++11

What is the best way to store the state of a C++11 random generator without using the iostream interface. I would like to do like the first alternative listed here[1]? However, this approach requires that the object contains the PRNG state and only the PRNG state. In partucular, it fails if the implementation uses the pimpl pattern(at least this is likely to crash the application when reloading the state instead of loading it with bad data), or there are more state variables associated with the PRNG object that does not have to do with the generated sequence.
The size of the object is implementation defined:
g++ (tdm64-1) 4.7.1 gives sizeof(std::mt19937)==2504 but
Ideone http://ideone.com/41vY5j gives 2500
I am missing member functions like
size_t state_size();
const size_t* get_state() const;
void set_state(size_t n_elems,const size_t* state_new);
(1) shall return the size of the random generator state array
(2) shall return a pointer to the state array. The pointer is managed by the PRNG.
(3) shall copy the buffer std::min(n_elems,state_size()) from the buffer pointed to by state_new
This kind of interface allows more flexible state manipulation. Or are there any PRNG:s whose state cannot be represented as an array of unsigned integers?
[1]Faster alternative than using streams to save boost random generator state

I've written a simple (-ish) test for the approach I mentioned in the comments of the OP. It's obviously not battle-tested, but the idea is represented - you should be able to take it from here.
Since the amount of bytes read is so much smaller than if one were to serialize the entire engine, the performance of the two approaches might actually be comparable. Testing this hypothesis, as well as further optimization, are left as an exercise for the reader.
#include <iostream>
#include <random>
#include <chrono>
#include <cstdint>
#include <fstream>
using namespace std;
struct rng_wrap
{
// it would also be advisable to somehow
// store what kind of RNG this is,
// so we don't deserialize an mt19937
// as a linear congruential or something,
// but this example only covers mt19937
uint64_t seed;
uint64_t invoke_count;
mt19937 rng;
typedef mt19937::result_type result_type;
rng_wrap(uint64_t _seed) :
seed(_seed),
invoke_count(0),
rng(_seed)
{}
rng_wrap(istream& in) {
in.read(reinterpret_cast<char*>(&seed), sizeof(seed));
in.read(reinterpret_cast<char*>(&invoke_count), sizeof(invoke_count));
rng = mt19937(seed);
rng.discard(invoke_count);
}
void discard(unsigned long long z) {
rng.discard(z);
invoke_count += z;
}
result_type operator()() {
++invoke_count;
return rng();
}
static constexpr result_type min() {
return mt19937::min();
}
static constexpr result_type max() {
return mt19937::max();
}
};
ostream& operator<<(ostream& out, rng_wrap& wrap)
{
out.write(reinterpret_cast<char*>(&(wrap.seed)), sizeof(wrap.seed));
out.write(reinterpret_cast<char*>(&(wrap.invoke_count)), sizeof(wrap.invoke_count));
return out;
}
istream& operator>>(istream& in, rng_wrap& wrap)
{
wrap = rng_wrap(in);
return in;
}
void test(rng_wrap& rngw, int count, bool quiet=false)
{
uniform_int_distribution<int> integers(0, 9);
uniform_real_distribution<double> doubles(0, 1);
normal_distribution<double> stdnorm(0, 1);
if (quiet) {
for (int i = 0; i < count; ++i)
integers(rngw);
for (int i = 0; i < count; ++i)
doubles(rngw);
for (int i = 0; i < count; ++i)
stdnorm(rngw);
} else {
cout << "Integers:\n";
for (int i = 0; i < count; ++i)
cout << integers(rngw) << " ";
cout << "\n\nDoubles:\n";
for (int i = 0; i < count; ++i)
cout << doubles(rngw) << " ";
cout << "\n\nNormal variates:\n";
for (int i = 0; i < count; ++i)
cout << stdnorm(rngw) << " ";
cout << "\n\n\n";
}
}
int main(int argc, char** argv)
{
rng_wrap rngw(123456790ull);
test(rngw, 10, true); // this is just so we don't start with a "fresh" rng
uint64_t seed1 = rngw.seed;
uint64_t invoke_count1 = rngw.invoke_count;
ofstream outfile("rng", ios::binary);
outfile << rngw;
outfile.close();
cout << "Test 1:\n";
test(rngw, 10); // test 1
ifstream infile("rng", ios::binary);
infile >> rngw;
infile.close();
cout << "Test 2:\n";
test(rngw, 10); // test 2 - should be identical to 1
return 0;
}

Related

How to access map in vector for building trie

#include <string>
#include <iostream>
#include <vector>
#include <map>
using std::map;
using std::vector;
using std::string;
typedef map<char, int> edges;
typedef vector<edges> trie;
trie build_trie(vector<string> & patterns) {
trie t;
// write your code here
for(int j=0;j<patterns.size();j++){
//current NOde = root;
int currNI=0;
for(int i=0;i<patterns[j].size();i++){
char currS = patterns[j][i];
auto it = t[currNI].begin();
//auto mit = it->edges.begin();
if(t[currNI].edges.find(currS)!=t[currNI].edges.end()){
currNI = t[currNI].edges.find(currS)->second;
}else{
t.push_back(edges.insert(currS,t.size()));
t[currNI].edges.insert(currS,t.size());
currNI = t.size();
}
}
}
return t;
}
int main() {
size_t n;
std::cin >> n;
vector<string> patterns;
for (size_t i = 0; i < n; i++) {
string s;
std::cin >> s;
patterns.push_back(s);
}
trie t = build_trie(patterns);
for (size_t i = 0; i < t.size(); ++i) {
for (const auto & j : t[i]) {
std::cout << i << "->" << j.second << ":" << j.first << "\n";
}
}
return 0;
}
Hi I was trying to build trie using vector and map but I am not able to access elements of map.
I have also added image of pseudo code for better clarity.
I have used iterators and many other tricks that are already present on stackoverflow and on other platform but somehow I am not able to access what I want.
Thank You
To answer your questions: in your code, edges is a type, not an object.
t[currNI] is of type edges, thus is a map<char, int>.
You should try t[currNI].find(currS) and t[currNI].end() directly.
NB: after you fix this, there are still other errors in your code.

Why does the left shift on a unsigned int happens from the 16th bit?

I am trying to put the values from the vector into the int.
Given vector :'1','0','1','1','1','0','1','1','1','0','1','1','1','0','1','1' :
Expected output (binary representation for the variable out):
00000000000000001011101110111011.
However, I am getting the following output:
10111011101110110000000000000000
Notice: the insertion begun at the 16bit from right end instead of beginning from the leftmost bit
#include<vector>
#include<iostream>
int main() {
std::vector<unsigned char> test = {'1','0','1','1','1','0','1','1','1','0','1','1','1','0','1','1'};
std::vector<unsigned int> out(1);
int j = 0;
for (int i =0; i < test.size(); i++) {
out[j] = out[j] << 1;
if (test[i] == '1') {out[j] |=0x1;}
}
j++;
for (int p = 0; p < j; p++) {
for (int k = 0; k<32; k++ ) {
std::cout << !!((out[p]<<k)&0x8000);
}
std::cout << std::endl;
}
std::cout << "Size Of:" << sizeof(int);
return 0;
}
The reason why this happens is that you are using a wrong constant for the mask: 0x8000 has its 16-bit set, while you probably meant to use 0x80000000 with the 32-nd bit set. To avoid mistakes like that it's best to construct masks with shifts, for example
(1 << 31)
This expression is evaluated at compile time, so the result is the same as if you computed the constant yourself.
Note that both 0x8000 and 0x80000000 constants are system-dependent. Moreover, 0x80000000 assumes 32-bit int, which is not guaranteed.
A better approach would be shifting the number right instead of left, and masking with 1.
The block of code creating out[j] works just fine.
Your problem is in the output block, due to use of 0x8000. Whenever k >= 16, the low 16 bits will be zero, guaranteeing that 0x8000 is zero.
Your code seems overly complicated to me. Here's my version of a C program that transforms a string of 1's and 0's into an int and one going from int to string.
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char **argv);
int main (int argc, char **argv) {
char str [] = "1010101010101010";
int x;
int out;
for (x=0;x<16;x++) {
if (str[x] == '1') {
out |= (1 << x);
}
}
printf("%d", out) ;
}
and
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char **argv);
int main (int argc, char **argv) {
char str [] = "1010101010101010";
int in = 21845;
char out[17] = {0};
for (x=0;x<16;x++) {
if (in & (1<<x)) {
out[x] = '1';
}
else {
out[x] = '0';
}
}
printf("%s", out) ;
}

vector accessing non zero elements but output as zero

I' did this program what suppose save pairs of string ,int on one vector and print the strings of the maximum number on vector
but when i try to find this strings don't appears nothing so I try print all values of int's on vector and although was finding the maximum of 10 all values in the vector was printing as 0. Someone can explain was it occurred and how I can access the values , please.
#include <iostream>
#include <utility>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
typedef vector<pair<string,int>> vsi;
bool paircmp(const pair<string,int>& firste,const pair<string,int>& seconde );
int main(int argc, char const *argv[]) {
vsi v(10);
string s;
int n,t;
cin>>t;
for (size_t i = 0;i < t;i++) {
for (size_t j = 0; j < 10; j++) {
cin>>s>>n;
v.push_back(make_pair(s,n));
}
sort(v.begin(),v.end(),paircmp);
int ma=v[v.size()-1].second;
cout<<ma<<endl;
for (size_t j = 0; j < 10; j++) {
cout << v.at(j).second <<endl;
if(v[j].second == ma)
cout<<v[j].first<<endl;
}
}
return 0;
}
bool paircmp(const pair<string,int>& firste,const pair<string,int>& seconde ){
return firste.second < seconde.second;
}
This line
vsi v(10);
creates you a std::vector filled with 10 default-constructed std::pair<std::string, int>s. That is, an empty string and zero.
You then push_back other values to your vector but they happen to be sorted after those ten initial elements, probably because they all have positive ints in them.
Therefore, printing the first member of the first ten elements prints ten empty strings.
This is all I can guess from what you have provided. I don't know what you are trying to accomplish with this code.
Try something like
for (const auto& item : v)
{
std::cout << "{ first: '" << item.first << "', "
<< "second: " << item.second << " }\n";
}
to print all elements of the vector v.

"Warning : Non-POD class type passed through ellipsis" for simple thrust program

In spite of reading many answers on the same kind of questions on SO I am not able to figure out solution in my case. I have written the following code to implement a thrust program. Program performs simple copy and display operation.
#include <stdio.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
int main(void)
{
// H has storage for 4 integers
thrust::host_vector<int> H(4);
H[0] = 14;
H[1] = 20;
H[2] = 38;
H[3] = 46;
// H.size() returns the size of vector H
printf("\nSize of vector : %d",H.size());
printf("\nVector Contents : ");
for (int i = 0; i < H.size(); ++i) {
printf("\t%d",H[i]);
}
thrust::device_vector<int> D = H;
printf("\nDevice Vector Contents : ");
for (int i = 0; i < D.size(); i++) {
printf("%d",D[i]); //This is where I get the warning.
}
return 0;
}
Thrust implements certain operations to facilitate using elements of a device_vector in host code, but this apparently isn't one of them.
There are many approaches to addressing this issue. The following code demonstrates 3 possible approaches:
explicitly copy D[i] to a host variable, and thrust has an appropriate method defined for that.
copy the thrust device_vector back to a host_vector before print-out.
use thrust::copy to directly copy the elements of the device_vector to a stream.
Code:
#include <stdio.h>
#include <iostream>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
int main(void)
{
// H has storage for 4 integers
thrust::host_vector<int> H(4);
H[0] = 14;
H[1] = 20;
H[2] = 38;
H[3] = 46;
// H.size() returns the size of vector H
printf("\nSize of vector : %d",H.size());
printf("\nVector Contents : ");
for (int i = 0; i < H.size(); ++i) {
printf("\t%d",H[i]);
}
thrust::device_vector<int> D = H;
printf("\nDevice Vector Contents : ");
//method 1
for (int i = 0; i < D.size(); i++) {
int q = D[i];
printf("\t%d",q);
}
printf("\n");
//method 2
thrust::host_vector<int> Hnew = D;
for (int i = 0; i < Hnew.size(); i++) {
printf("\t%d",Hnew[i]);
}
printf("\n");
//method 3
thrust::copy(D.begin(), D.end(), std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
return 0;
}
Note that for methods like these, thrust is generating various kinds of device-> host copy operations to facilitate the use of device_vector in host code. This has performance implications, so you might want to use the defined copy operations for large vectors.

Optimizing N-queen with openmp

I am learning OPENMP and wrote the following code to solve nqueens problem.
//Full Code: https://github.com/Shafaet/Codes/blob/master/OPENMP/Parallel%20N- Queen%20problem.cpp
int n;
int call(int col,int rowmask,int dia1,int dia2)
{
if(col==n)
{
return 1;
}
int row,ans=0;
for(row=0;row<n;row++)
{
if(!(rowmask & (1<<row)) & !(dia1 & (1<<(row+col))) & !(dia2 & (1<<((row+n-1)-col))))
{
ans+=call(col+1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
}
}
return ans;
}
double parallel()
{
double st=omp_get_wtime();
int ans=0;
int i;
int rowmask=0,dia1=0,dia2=0;
#pragma omp parallel for reduction(+:ans) shared(i,rowmask)
for(i=0;i<n;i++)
{
rowmask=0;
dia1=0,dia2=0;
int col=0,row=i;
ans+=call(1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
}
printf("Found %d configuration for n=%d\n",ans,n);
double en=omp_get_wtime();
printf("Time taken using openmp %lf\n",en-st);
return en-st;
}
double serial()
{
double st=omp_get_wtime();
int ans=0;
int i;
int rowmask=0,dia1=0,dia2=0;
for(i=0;i<n;i++)
{
rowmask=0;
dia1=0,dia2=0;
int col=0,row=i;
ans+=call(1,rowmask|1<<row,dia1|(1<<(row+col)), dia2|(1<<((row+n-1)-col)));
}
printf("Found %d configuration for n=%d\n",ans,n);
double en=omp_get_wtime();
printf("Time taken without openmp %lf\n",en-st);
return en-st;
}
int main()
{
double average=0;
int count=0;
for(int i=2;i<=13;i++)
{
count++;
n=i;
double stime=serial();
double ptime=parallel();
printf("OpenMP is %lf times faster for n=%d\n",stime/ptime,n);
average+=stime/ptime;
puts("===============");
}
printf("On average OpenMP is %lf times faster\n",average/count);
return 0;
}
Parallel code is already faster than normal one but i wonder how can i optimize it more using openmp pragmas. I want to know what i should do for better performance and what i should not do.
Thanks in advance.
(Please dont suggest any optimizations which are non-related to parallel programming)
Your code seems to use classic backtracking N-Queens recursive algorithm, which is not the fastest possible for N-Queens solving, but (due to simplicity) is the most vivid one in terms of practicing with parallelism basics.
That's being said: this is very simple, thus you don't expect it to naturally demonstrate lots of advanced OpenMP means except basic "parallel for" and reduction.
But, as far as you're looking for learning parallelism and probably for more clearness and better learning curve, there is one more (out of many possible) implementation available, which uses the same algorithm but tends to be more readable and vivid from educational perspective:
void setQueen(int queens[], int row, int col) {
//check all previously placed rows for attacks
for(int i=0; i<row; i++) {
// vertical attacks
if (queens[i]==col) {
return;
}
// diagonal attacks
if (abs(queens[i]-col) == (row-i) ) {
return;
}
}
// column is ok, set the queen
queens[row]=col;
if(row==size-1) {
#pragma omp atomic
nrOfSolutions++; //Placed final queen, found a solution
}
else {
// try to fill next row
for(int i=0; i<size; i++) {
setQueen(queens, row+1, i);
}
}
}
//Function to find all solutions for nQueens problem on size x size chessboard.
void solve() {
#pragma omp parallel for
for(int i=0; i<size; i++) {
// try all positions in first row
int * queens = new int[size]; //array representing queens placed on a chess board. Index is row position, value is column.
setQueen(queens, 0, i);
delete[](queens);
}
}
This given code is one of Intel Advisor XE samples (for both C++ and Fortran); the parallelization aspects for given sample are discussed in very detailed manner in Chapter 10 of given Parallel Programming Book (in fact, given chapter just uses N-Queens to demonstrate how to use tools in order to parallelize serial code in general).
Given Advisor n-queens sample uses essentially the same algorithm as yours, but it replaces explicit reduction with combination of simple parallel for + atomic. This code is expected to be less efficient, but more "procedural-style" and more "educational", since it demonstrates "hidden" data race. In case you upload given samplecode, you will actually find 4 equialent N-Queens parallel implementatons using TBB, Cilk Plus and OpenMP (OMP is for C++ and Fortran).
I know I am a little late for the party, but you can use task queueing for further optimization.(about 7-10% faster results).No idea why. Here's the code,that i am using :
#include <iostream> // std::cout, cin, cerr ...
#include <iomanip> // modify std::out
#include <omp.h>
using namespace std;
int nrOfSolutions=0;
int size=0;
void print(int queens[]) {
cerr << "Solution " << nrOfSolutions << endl;
for(int row=0; row<size; row++) {
for(int col=0; col<size; col++) {
if(queens[row]==col) {
cout << "Q";
}
else {
cout << "-";
}
}
cout << endl;
}
}
void setQueen(int queens[], int row, int col, int id) {
for(int i=0; i<row; i++) {
// vertical attacks
if (queens[i]==col) {
return;
}
// diagonal attacks
if (abs(queens[i]-col) == (row-i) ) {
return;
}
}
// column is ok, set the queen
queens[row]=col;
if(row==size-1) {
// only one thread should print allowed to print at a time
{
// increasing the solution counter is not atomic
#pragma omp critical
nrOfSolutions++;
#ifdef _DEBUG
#pragma omp critical
print(queens);
#endif
}
}
else {
// try to fill next row
for(int i=0; i<size; i++) {
setQueen(queens, row+1, i, id);
}
}
}
void solve() {
int myid=0 ;
#pragma omp parallel
#pragma omp single
{
for(int i=0; i<size; i++) {
/*
#ifdef _OMP //(???)
myid = omp_get_thread_num();
#endif
#ifdef _DEBUG
cout << "ThreadNum: " << myid << endl ;
#endif
*/
// try all positions in first row
// create separate array for each recursion
// started here
#pragma omp task
setQueen(new int[size], 0, i, myid);
}
}
}
int main(int argc, char*argv[]) {
if(argc !=2) {
cerr << "Usage: nq-openmp-taskq boardSize.\n";
return 0;
}
size = atoi(argv[1]);
cout << "Starting OpenMP Task Queue solver for size " << size << "...\n";
double st=omp_get_wtime();
solve();
double en=omp_get_wtime();
printf("Time taken using openmp %lf\n",en-st);
cout << "Number of solutions: " << nrOfSolutions << endl;
return 0;
}

Resources