How make a stride chunk iterator thrust cuda - parallel-processing

I need a class iterator like this
https://github.com/thrust/thrust/blob/master/examples/strided_range.cu
but that this new iterator do the next sequence
[k * size_stride, k * size_stride+1, ...,k * size_stride+size_chunk-1,...]
with
k = 0,1,...,N
Example:
size_stride = 8
size_chunk = 3
N = 3
then the sequence is
[0,1,2,8,9,10,16,17,18,24,25,26]
I don't know how do this efficiently...

The strided range interator is basically a carefully crafted permutation iterator with a functor that gives the appropriate indices for permutation.
Here is a modification to the strided range iterator example. The main changes were:
include the chunk size as an iterator parameter
modify the functor that provides the indices for the permutation iterator to spit out the desired sequence
adjust the definitions of .end() iterator to provide the appropriate length of sequence.
Worked example:
$ cat t1280.cu
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/functional.h>
#include <thrust/fill.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
#include <thrust/sequence.h>
#include <iostream>
#include <assert.h>
// this example illustrates how to make strided-chunk access to a range of values
// examples:
// strided_chunk_range([0, 1, 2, 3, 4, 5, 6], 1,1) -> [0, 1, 2, 3, 4, 5, 6]
// strided_chunk_range([0, 1, 2, 3, 4, 5, 6], 2,1) -> [0, 2, 4, 6]
// strided_chunk_range([0, 1, 2, 3, 4, 5, 6], 3,2) -> [0 ,1, 3, 4, 6]
// ...
template <typename Iterator>
class strided_chunk_range
{
public:
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
struct stride_functor : public thrust::unary_function<difference_type,difference_type>
{
difference_type stride;
int chunk;
stride_functor(difference_type stride, int chunk)
: stride(stride), chunk(chunk) {}
__host__ __device__
difference_type operator()(const difference_type& i) const
{
int pos = i/chunk;
return ((pos * stride) + (i-(pos*chunk)));
}
};
typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
// type of the strided_range iterator
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_chunk_range(Iterator first, Iterator last, difference_type stride, int chunk)
: first(first), last(last), stride(stride), chunk(chunk) {assert(chunk<=stride);}
iterator begin(void) const
{
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride, chunk)));
}
iterator end(void) const
{
int lmf = last-first;
int nfs = lmf/stride;
int rem = lmf-(nfs*stride);
return begin() + (nfs*chunk) + ((rem<chunk)?rem:chunk);
}
protected:
Iterator first;
Iterator last;
difference_type stride;
int chunk;
};
int main(void)
{
thrust::device_vector<int> data(50);
thrust::sequence(data.begin(), data.end());
typedef thrust::device_vector<int>::iterator Iterator;
// create strided_chunk_range
std::cout << "stride 3, chunk 2, length 7" << std::endl;
strided_chunk_range<Iterator> scr1(data.begin(), data.begin()+7, 3, 2);
thrust::copy(scr1.begin(), scr1.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << std::endl;
std::cout << "stride 8, chunk 3, length 50" << std::endl;
strided_chunk_range<Iterator> scr(data.begin(), data.end(), 8, 3);
thrust::copy(scr.begin(), scr.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << std::endl;
return 0;
}
$ nvcc -arch=sm_35 -o t1280 t1280.cu
$ ./t1280
stride 3, chunk 2, length 7
0 1 3 4 6
stride 8, chunk 3, length 50
0 1 2 8 9 10 16 17 18 24 25 26 32 33 34 40 41 42 48 49
$
This is probably not the most optimal implementation, in particular because we are doing division in the permutation functor, but it should get you started.
I assume (and test for) chunk<=stride, because this seemed reasonable to me, and simplified my thought process. I'm sure it could be modified, with an appropriate example of what sequence you would like to see, for the case where chunk>stride.

Related

How to assign variadic template arguments to std::array

I have an std::array and I have a variadic template function with the number of parameters that matches the size of the array. I need to assign the arguments to the elements of the array. In other words, in the code below I wish a to get values {1, 2, 3} and b to get values {1, 2, 3, 4, 5}
std::array<int, 3> a;
std::array<int, 5> b;
assign_values(a, 1, 2, 3);
assign_values(b, 1, 2, 3, 4, 5);
The question is how to implement the assign_values variadic template function.
I'm limited with the C++14 version.
Update:
The arguments can be of different types: assign_values(b, 1, 2u, 3., '4', 5l);
Sth like this:
template<class T, size_t N, class ... Values>
void assign_values(std::array<T,N>& arr, Values... vals) {
static_assert(N == sizeof...(vals));
int j = 0;
for (auto i : std::initializer_list< std::common_type_t<Values...> >{vals...})
arr[j++] = i;
}
Demo
I'm limited with the C++14 version
The good old trick of the unused array initialization (pre C++17 surrogate of template folding) should works (also C++11)
template <typename T, std::size_t N, typename ... Values>
void assign_values (std::array<T,N> & arr, Values... vals)
{
static_assert(N == sizeof...(vals));
using unused = int[];
int j = 0;
(void)unused { 0, (arr[j++] = vals, 0)... };
}

Creating a C++ template function that allows multiple types of array containers

In modern C++ you can create arrays by three primary methods shown below.
// Traditional method
int array_one[] = {1, 2, 3, 4}
// Vector container
std::vector<int> array_two = {1, 2, 3, 4}
// array container
std::array<int, 4> array_three = {1, 2, 3, 4}
While each array method contains the same data, they are inherently different containers. I am writing a very simple Unit Test class with template functions to make it easier to pass multiple data types. I have an example shown below for the .hpp and .cpp calling file. The one method shown in the file takes a std::vector and compares it to another std::vector indice by indice to ensure that each value is within a certain tolerance of the other.
// main.cpp
#include <iostream>
#include <string>
#include <vector>
#include <array>
#include "unit_test.hpp"
int main(int argc, const char * argv[]) {
int array_one[] = {1, 2, 3, 4};
std::vector<int> array_two = {1, 2, 3, 4};
std::vector<float> array_four = {0.99, 1.99, 2.99, 3.99};
std::array<int, 4> array_three {1, 2, 3, 4};
std::string c ("Vector Test");
UnitTest q;
double unc = 0.1;
q.vectors_are_close(array_two, array_four, unc, c);
return 0;
}
and
#ifndef unit_test_hpp
#define unit_test_hpp
#endif /* unit_test_hpp */
#include <string>
#include <typeinfo>
#include <iostream>
#include <cmath>
class UnitTest
{
public:
template <class type1, class type2>
void vectors_are_close(const std::vector<type1> &i, const std::vector<type2> &j,
double k, std::string str);
private:
template <class type1, class type2>
void is_close(type1 &i, type2 &j, double k);
};
template <class type1, class type2>
void UnitTest::vectors_are_close(const std::vector<type1> &i, const std::vector<type2> &j,
double k, std::string str)
{
unsigned long remain;
remain = 50 - str.length();
if (i.size() != j.size()) {
std::cout << str + std::string(remain, '.') +
std::string("FAILED") << std::endl;
}
else {
try {
for (int a = 0; a < i.size(); a++){
is_close(i[a], j[a], k);
}
std::cout << str + std::string(remain, '.') +
std::string("PASSED") << std::endl;
} catch (const char* msg) {
std::cout << str + std::string(remain, '.') +
std::string("FAILED") << std::endl;
}
}
}
template <class type1, class type2>
void UnitTest::is_close(type1 &i, type2 &j, double k)
{
double percent_diff = abs((j - i) / ((i + j) / 2.0));
if (percent_diff > k) {
throw "Number not in Tolerance";
}
}
In this example the code compares two vectors; however, if I want to compare std::array containers I will have to crate a whole new function to do that, and if I want to compare two generic arrays, I will have to yet again create another function to do that. In addition, if I want to compare data in a std::array container to a std::vector container, again, I will have to create another function. I would like to create a single templated member function that I can pass any type of container to the function and have it compare it against any other type of container. In other words instead of;
void UnitTest::vectors_are_close(const std::vector<type1> &i, const std::vector<type2> & j);
I would like a simpler function such as;
void UnitTest::arrays_are_close(const type1, const type2);
where type1 and type2 do not just refer to the data in the container, but also the type of container as well. In this way I could pass a std::vector to type1 and std::array to type, or other combinations of the traditional way of creating arrays, array containers and vector containers. Is there any way to facilitate this behavior?
With a few changes to your implementation it is possible to do that:
template <class container1, class container2>
void UnitTest::vectors_are_close(const container1 &i, const container2 &j,
double k, std::string str)
{
unsigned long remain;
remain = 50 - str.length();
if (std::size(i) != std::size(j)) {
std::cout << str + std::string(remain, '.') +
std::string("FAILED") << std::endl;
}
else {
try {
for (int a = 0; a < std::size(i); a++){
is_close(i[a], j[a], k);
}
std::cout << str + std::string(remain, '.') +
std::string("PASSED") << std::endl;
} catch (const char* msg) {
std::cout << str + std::string(remain, '.') +
std::string("FAILED") << std::endl;
}
}
}
This function should work for std::vector, std::array and C-style arrays.

std::map operator< pointer address compare vs pointer value compare

I was investigating how map handles custom types and I came across some odd behavior.
I created a custom type ´ComplexType´ that has 1 member, a pointer to an int.
I first compared using the value of this int, which gave the expected behavior.
#include <iostream>
#include <map>
struct ComplexType
{
ComplexType(int i): index(new int(i)){
};
ComplexType(const ComplexType& cT): index(new int(*cT.index)){
}
~ComplexType(){
if(index){
delete index;
}
}
bool operator<(const ComplexType cT) const
{
return *index < *cT.index;
}
int* index;
};
int main(){
int pi[] = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 8};
std::map< ComplexType , int > container;
for(int i = 0; i < 12; ++i){
container[ComplexType(i)] = pi[i];
}
std::cout << "Loop map, size: " << container.size() << std::endl;
for(auto it = container.begin();it != container.end(); it++){
std::cout << "Show index map, size: " << container.size() << std::endl;
std::cout << *it->first.index << std::endl;
}
return 0;
}
With the output:
Loop map, size: 12
Show index map, size: 12
0
Show index map, size: 12
1
Show index map, size: 12
2
Show index map, size: 12
3
Show index map, size: 12
4
Show index map, size: 12
5
Show index map, size: 12
6
Show index map, size: 12
7
Show index map, size: 12
8
Show index map, size: 12
9
Show index map, size: 12
10
Show index map, size: 12
11
Now I changed my compare function to compare on the address of the pointer.
#include <iostream>
#include <map>
struct ComplexType
{
ComplexType(int i): index(new int(i)){
};
ComplexType(const ComplexType& cT): index(new int(*cT.index)){
}
~ComplexType(){
if(index){
delete index;
}
}
bool operator<(const ComplexType cT) const
{
return index < cT.index;
}
int* index;
};
int main(){
int pi[] = {3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 8};
std::map< ComplexType , int > container;
for(int i = 0; i < 12; ++i){
container[ComplexType(i)] = pi[i];
}
std::cout << "Loop map, size: " << container.size() << std::endl;
for(auto it = container.begin();it != container.end(); it++){
std::cout << "Show index map, size: " << container.size() << std::endl;
std::cout << *it->first.index << std::endl;
}
return 0;
}
I expected this to result in a random order based on what addresses the pointer got on the heap. Instead I got the following:
Loop map, size: 12
Show index map, size: 12
1
Show index map, size: 12
0
I compiled using g++ (GCC) 5.3.0
\randomness map>g++ -std=c++11 -o mapConstructionComplexType mapConstructionComplexType.cpp
\randomness map>g++ -std=c++11 -o mapConstructionComplexTypePointerCmp mapConstructionComplexTypePointerCmp.cpp
Can anyone explain this odd behavior?

Replace pointer to pointer by initializer_list

#include <initializer_list>
#include <iostream>
#include <vector>
//this api is anti intuition
void original(int const **data)
{
for(size_t i = 0; i != 3; ++i){
int const *ptr = *data;
//std::cout<<*ptr++<<", "<<*ptr<<std::endl; //this line may cause undefined behavior
std::cout<<ptr[0]<<", "<<ptr[1]<<std::endl;
++data;
}
}
//my eyes prefer this api than original like api
void replace_original(std::initializer_list<std::initializer_list<int>> list)
{
std::vector<int const*> results(list.size());
for(auto data : list){
results.push_back(std::begin(data)); //#1
}
original(&results[0]);
}
int main()
{
int first[] = {0, 1};
int second[] = {2, 3};
int third[] = {4, 5};
int const *array[] = {first, second, third};
original(array);
replace_original({ {0, 1}, {2, 3}, {4, 5} });
return 0;
}
The results are
0, 1
2, 3
4, 5
expected results are
0, 1
2, 3
4, 5
0, 1
2, 3
4, 5
I want to encapsulate the api of original(old, c style api) by the api like replace_original
But can't figure out why #1 can't work.
Ah, stupid mistake, I should change the loop to
size_t const size = list.size();
std::vector<int const*> results(size);
for(size_t i = 0; i != size; ++i){
results[i] = std::begin( *(std::begin(list) + i) );
}
Do you have a better solution to encapsulate this kind of api?
After google, I find out that in c++14, size() of initializer_list will
become constexpr so we should be able to use std::array to replace std::vector

boost relocate function, what is the effect?

What does relocate() mean in boost multi-index container?
I have read the manual from boost documentations, but I want to see a simple example and see the difference of using and not using the relocate function. The examples on the web are not simple though....
It merely relocates (moves) item(s) in a sequenced index:
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#include <iostream>
using namespace boost::multi_index;
typedef multi_index_container<
int,
indexed_by<sequenced<> >
> Ints;
int main()
{
Ints ints;
ints.insert(ints.end(), 1);
ints.insert(ints.end(), 2);
ints.insert(ints.end(), 3);
ints.insert(ints.end(), 4);
std::for_each (ints.begin(), ints.end(), [&](int i) { std::cout << i << std::endl; }); // 1, 2, 3, 4
auto i = find(ints.begin(), ints.end(), 2);
ints.relocate(ints.end(), i);
std::for_each (ints.begin(), ints.end(), [&](int i) { std::cout << i << std::endl; }); // 1, 3, 4, 2
}

Resources