I've been trying to parallelize my class and class constructor by using CUDA.
You can find the both serial version and parallelized version of my class. It compiles correctly and I wonder if there are any improvements in my parallelized code.
Serial Code Ray.h:
#pragma once
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <math.h>
#include <vector>
#include <algorithm>
#include <complex>
#include "arithmatic_operations.h"
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
class Ray
{
public:
std::vector<std::vector<double>>Point = { { 0,0,0 } ,{ 0,0,0 } ,{ 0,0,0 } };
std::vector<std::vector<double>>Direction = { { 0,0,0 } ,{ 0,0,0 } ,{ 0,0,0 } };
double no_bounces = -1;
double length = -1;
std::vector<double>E_thei = { 0,0,0 };
std::vector<double>E_phii = { 0,0,0 };
std::complex<double> Er_the_the = 0;
std::complex<double> Er_phi_the = 0;
std::complex<double> Er_the_phi = 0;
std::complex<double> Er_phi_phi = 0;
double Ai = 0;
Ray(std::vector<double>OO, std::vector<std::vector<double>>DD, double delta)
{
//Point.push_back({ 0,0,0 });
this->Point[no_bounces + 1][0] = OO[0];
this->Point[no_bounces + 1][1] = OO[1];
this->Point[no_bounces + 1][2] = OO[2];
std::vector<double>first_row_DD = { 0,0,0 };
first_row_DD[0] = DD[0][0];
first_row_DD[1] = DD[0][1];
first_row_DD[2] = DD[0][2];
//Direction.push_back({ 0,0,0 });
this->Direction[no_bounces + 1][0] = DD[0][0] / norm(first_row_DD);
this->Direction[no_bounces + 1][1] = DD[0][1] / norm(first_row_DD);
this->Direction[no_bounces + 1][2] = DD[0][2] / norm(first_row_DD);
this->E_thei[0] = DD[1][0];
this->E_thei[1] = DD[1][1];
this->E_thei[2] = DD[1][2];
this->E_phii[0] = DD[2][0];
this->E_phii[1] = DD[2][1];
this->E_phii[2] = DD[2][2];
this->Ai = pow(delta, 2);
}
};
and below there is the code I parallelized:
Ray.cuh:
#include <iostream>
#include <array>
#include <fstream>
#include <string>
#include <sstream>
#include <cstddef>
#include <utility>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/fill.h>
#include <thrust/copy.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/zip_function.h>
#include <thrust/execution_policy.h>
__device__ double square(double& x) { return x * x; }
struct myPow
{
__device__
double operator()(double& x) const { return square(x); }
};
struct CalculateNormValues
{
__device__
double operator()(const thrust::tuple<double, double, double>& t) const
{
double x = thrust::get<0>(t);
double y = thrust::get<1>(t);
double z = thrust::get<2>(t);
return (double)sqrt(x * x + y * y + z * z);
}
};
struct Normalize
{
__device__
thrust::tuple<double, double, double> operator()(const thrust::tuple<double, double, double, double>& t) const
{
double x = thrust::get<0>(t);
double y = thrust::get<1>(t);
double z = thrust::get<2>(t);
double norm = thrust::get<3>(t);
return thrust::make_tuple(x / norm, y / norm, z / norm);
}
};
class Ray {
static constexpr int n_dims = 3;
static constexpr int cn_dims = 2;
using Container = thrust::device_vector<double>;
using Vectors = std::array<Container, n_dims>;
using Matrices = std::array<Container, n_dims* n_dims>;
using Complexes = std::array<Container, cn_dims>;
public:
std::ptrdiff_t n_rays{};
Vectors E_thei;
Vectors E_phii;
Matrices Point;
Matrices Direction;
Complexes Er_the_the;
Complexes Er_phi_the;
Complexes Er_the_phi;
Complexes Er_phi_phi;
Container Ai;
Container no_bounces;
Container length;
Container normValues;
Ray(thrust::device_vector<double>& OO_0,
thrust::device_vector<double>& OO_1,
thrust::device_vector<double>& OO_2,
thrust::device_vector<double>& DD_00,
thrust::device_vector<double>& DD_01,
thrust::device_vector<double>& DD_02,
thrust::device_vector<double>& DD_10,
thrust::device_vector<double>& DD_11,
thrust::device_vector<double>& DD_12,
thrust::device_vector<double>& DD_20,
thrust::device_vector<double>& DD_21,
thrust::device_vector<double>& DD_22,
thrust::device_vector<double>& delta) :
n_rays{ static_cast<std::ptrdiff_t>(OO_0.size()) },
Direction{ std::move(DD_00), //Normalize direction components later
std::move(DD_01),
std::move(DD_02) },
Point{ std::move(OO_0),
std::move(OO_1),
std::move(OO_2) },
E_thei{ std::move(DD_10),
std::move(DD_11),
std::move(DD_12) },
E_phii{ std::move(DD_20),
std::move(DD_21),
std::move(DD_22) },
Ai{ std::move(delta) } //Multiply Ai values later
{
thrust::transform(Ai.begin(), Ai.end(), Ai.begin(), myPow()); //Multiplies Ai values
//Calculate normalized values
normValues.resize(3);
thrust::transform(
thrust::make_zip_iterator(thrust::make_tuple(Direction[0].begin(), Direction[1].begin(), Direction[2].begin())),
thrust::make_zip_iterator(thrust::make_tuple(Direction[0].end(), Direction[1].end(), Direction[2].end())),
normValues.begin(),
CalculateNormValues{});
//Normalize Direction
thrust::transform(
thrust::make_zip_iterator(
thrust::make_tuple(Direction[0].begin(), Direction[1].begin(), Direction[2].begin(), normValues.begin())),
thrust::make_zip_iterator(
thrust::make_tuple(Direction[0].end(), Direction[1].end(), Direction[2].end(), normValues.end())),
thrust::make_zip_iterator(
thrust::make_tuple(Direction[0].begin(), Direction[1].begin(), Direction[2].begin())),
Normalize{});
}
};
The program compiles but I'd like to ask some questions.
When I use thrust::transform I know that the thrust library does the memory allocation and copying on the device for me. I wonder after the operation is done, does it copy back to the host? After
thrust::transform(Ai.begin(), Ai.end(), Ai.begin(), myPow());
if I write a line like this Ai[0]=5. Is this line executed on CPU or GPU?
My second question is I wonder if I can write a device function in my parallelized class by using __global__ and cuda threads. If the answer is yes, for example after moving device_vectors OO_1 and OO_2 to the member Point, If I want to do math on these device vectors in __global__ function since device_vectors are host only I need to copy them to C arrays and allocate memory on device and do the math right?
Related
Consider the following set implementation. Here I have ordered the set based on fScore parameter. What should I do If I want to search for an element of particular 'id' in 'NodeData'.
I know I can use 'find' to search for any element of 'fScore' in the set with O(logn).
Is there any efficient way to search for 'id' (less time) than a linear search (implemented below)?
#include<iostream>
#include<algorithm>
#include<iterator>
#include<set>
#include<stdlib.h>
#include<vector>
struct NodeData{
int id;
int parent;
double fScore, gScore, hScore;
std::vector<double> nScores;
NodeData(const int& idIn = 0,
const int& parentIn = -1,
const double& fIn = 1,
const double& gIn = 1,
const double& hIn = 1):id(idIn), parent(parentIn),
fScore(fIn), gScore(gIn), hScore(hIn)
{
}
bool operator<(const NodeData& rhs) const {
return fScore < rhs.fScore;
}
};
class test
{
public:
std::set<NodeData> NodeList;
};
int main()
{
test q;
for(int i=1;i<=5;i++)
{
NodeData n1 = {i,1,i,1,1};
q.NodeList.insert(n1);
}
std::set<NodeData>::iterator it;
//search for node with fScore 1 - cost O(logn)
it = q.NodeList.find(1);
if(it != q.NodeList.end()){
std::cout<<"node with fScore 1 found. id = "<<it->id<<std::endl;
}
else{
std::cout<<"node not found = "<<std::endl;
}
//searching for id=3 - Linear search - cost O(n)
int searchId = 3;
std::set<NodeData>::iterator it1 = q.NodeList.begin();
while(it1 != q.NodeList.end())
{
if(it1->id == searchId)
{
std::cout <<"found node with id = "<<it1->id<<std::endl;
}
it1++;
}
}
Does your set changes often? If not - you could consider building an "index" - unordered_map<> of whatever field you need to the element of your set.
There is a cost of maintaining such "index", you should see if it overweights the faster search.
You can't achieve this without using different/additional data structures. If you're using C++ and you're OK with using a library, you can find this functionality in the Boost multi-index containers library.
Adding to Falk's answer, here's an example of how the thing could be done with Boost.MultiIndex:
Live On Coliru
#include <iostream>
#include <algorithm>
#include <iterator>
#include <stdlib.h>
#include <vector>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/identity.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/ordered_index.hpp>
struct NodeData{
int id;
int parent;
double fScore, gScore, hScore;
std::vector<double> nScores;
NodeData(const int& idIn = 0,
const int& parentIn = -1,
const double& fIn = 1,
const double& gIn = 1,
const double& hIn = 1):id(idIn), parent(parentIn),
fScore(fIn), gScore(gIn), hScore(hIn)
{
}
bool operator<(const NodeData& rhs) const {
return fScore < rhs.fScore;
}
};
class test
{
public:
typedef boost::multi_index_container<
NodeData,
boost::multi_index::indexed_by<
boost::multi_index::ordered_unique<
boost::multi_index::identity<NodeData>
>,
boost::multi_index::ordered_unique<
boost::multi_index::member<NodeData, int, &NodeData::id>
>
>
> NodeListType;
NodeListType NodeList;
};
int main()
{
test q;
for(int i=1;i<=5;i++)
{
NodeData n1 = {i,1,double(i),1,1};
q.NodeList.insert(n1);
}
test::NodeListType::iterator it;
//search for node with fScore 1 - cost O(logn)
it = q.NodeList.find(1);
if(it != q.NodeList.end()){
std::cout<<"node with fScore 1 found. id = "<<it->id<<std::endl;
}
else{
std::cout<<"node not found = "<<std::endl;
}
//searching for id=3 on second index - cost O(logn)
int searchId = 3;
test::NodeListType::nth_index<1>::type::iterator it1 = q.NodeList.get<1>().find(searchId);
if(it1 != q.NodeList.get<1>().end()){
std::cout <<"found node with id = "<<it1->id<<std::endl;
}
}
If, instead of an ordered index, you use a hashed index for NodeData::id, lookup is constant (in average).
I would like to be able to display in a Dicom image in a Qt project with the same render as a Dicom Viewer Program could give.
I was able to do it but with a very bad contrast. I heard you need to operate on the pixels but I'm not sure. Do you have a working example ?
EDIT: I add my code in case it helps you, I commented a lot of things because I noticed the result was exactly the same
#include "mainwindow.h"
#include "ui_mainwindow.h"
#include <iostream>
#undef UNICODE
#undef _UNICODE
#include <dcmtk/config/osconfig.h>
#include <dcmtk/dcmdata/dctk.h>
#include <dcmtk/dcmimgle/dcmimage.h>
#include <QPixmap>
#include <QLabel>
#include <QImageReader>
using namespace std;
MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
ui(new Ui::MainWindow)
{
ui->setupUi(this);
//int sizeX = 600;
// int sizeY = 600;
//initialize random seed
//srand (time(NULL));
//QImage image = QImage( sizeX, sizeY, QImage::Format_RGB32 );
/*for( int l=0; l<sizeX; l++ )
{
for( int c=0; c<sizeY; c++ )
{
///Random color for each pixel
//image.setPixel( l, c, qRgb(rand() % 256, rand() % 256, rand() % 256) );
///Fixed color for each pixel
image.setPixel( l, c, qRgb(100, 150, 200) );
}
}*/
const char *file = "/home/x4rkz/project/Laura/QTImage/IMG00000";
DicomImage *image = new DicomImage(file);
if (image != NULL)
{
if (image->getStatus() == EIS_Normal)
{
Uint8 *pixelData = (Uint8 *)(image->getOutputData(8 )); // bits per sample
// Uint8 is a pointer to internal memory buffer
if (pixelData != NULL)
{
// do something useful with the pixel data
QImage img(pixelData,image->getWidth(), image->getHeight(), QImage::Format_Indexed8 );
/*QColor color;
QImage *img;
void *pDicomDibits;
uchar *px;
// uchar pixel[4];
const int width = (int)(image->getWidth());
const int height = (int)(image->getHeight());
if (image->isMonochrome()){
img = new QImage(width, height, QImage::Format_Indexed8);
img->setColorCount(256);
// define gray palette here
for (int i=0; i<256; i++) {
color.setRgb(i, i, i);
img->setColor(i, color.rgb());
}
image->createWindowsDIB(pDicomDibits, 0, 0, 8, 0, 1);
unsigned char * pd;
pd=(unsigned char *)pDicomDibits;
for (int y=0; y < (long) height; y++)
{
px = img->scanLine(y);
for (int x=0; x < (long) width; x++)
{
px[x] = (unsigned char) (*pd);
pd++;
}
}*/
QGraphicsScene * graphic = new QGraphicsScene( this );
graphic->addPixmap( QPixmap::fromImage( img ) );
ui->graphicsView->setScene(graphic);
/* }else
cout << "Non monochrome image" << endl;*/
}
} else
cerr << "Error: cannot load DICOM image (" << DicomImage::getString(image->getStatus()) << ")" << endl;
}
}
MainWindow::~MainWindow()
{
delete ui;
}
#include "mainwindow.h"
#include <QApplication>
#include <iostream>
#undef UNICODE
#undef _UNICODE
#include <dcmtk/config/osconfig.h>
#include <dcmtk/dcmdata/dctk.h>
#include <dcmtk/dcmimgle/dcmimage.h>
#include <QPixmap>
#include <QLabel>
#include <QImageReader>
using namespace std;
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.show();
return a.exec();
}
As you cant see, the result has no constrast.
As you cant see, the result has no constrast.
If the rendered image has such a low contrast, you should try to set an appropriate VOI (Value of Interest) window, e.g. using image->setMinMaxWndow(). See API documentation for details.
I've this driver:
#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
#include <linux/kernel.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <sound/pcm.h>
#include <sound/soc.h>
#include <sound/soc-dai.h>
#include <sound/soc-dapm.h>
static const struct snd_soc_dapm_route max9880_dapm_routes[] = {
{"Mono out", NULL, "Mono Mixer"}
};
static struct snd_soc_codec_driver soc_codec_dev_max9880 = {
.component_driver = {
.dapm_routes = max9880_dapm_routes,
.num_dapm_routes = ARRAY_SIZE(max9880_dapm_routes)
}
};
static struct snd_soc_dai_driver max9880_dai = {
.name = "max9880",
.playback = {
.stream_name = "Playback",
.channels_min = 1,
.channels_max = 1
}
};
static int max9880_platform_probe(struct platform_device *pdev)
{
int ret;
snd_printk(KERN_ALERT "1. platform probe");
ret = snd_soc_register_codec(&pdev->dev, &soc_codec_dev_max9880,
&max9880_dai, 1);
return ret;
}
static int max9880_platform_remove(struct platform_device *pdev)
{
snd_printk(KERN_ALERT "2. platform remove");
snd_soc_unregister_codec(&pdev->dev);
return 0;
}
static const struct of_device_id max9880_device_id[] = {
{ .compatible = "max9880" },
{}
};
MODULE_DEVICE_TABLE(of, max9880_device_id);
static struct platform_driver max9880_platform_driver = {
.driver = {
.name = "max9880",
.of_match_table = of_match_ptr(max9880_device_id),
},
.probe = &max9880_platform_probe,
.remove = &max9880_platform_remove,
};
module_platform_driver(max9880_platform_driver);
and I use insmod to load the module into the kernel. This all seems to work well, and I'm also able to do a rmmod without any problem. However I don't get any entries in /proc/asound/cards meaning that my module isn't recoqnized as a sound card. What am I missing?
Is it possible to insert range of struct directly into vector of the same type (same type of a member of struct).
Let's have a struct and vectors like this:
struct pnt {
char _name;
int _type;
bool _aux;
};
std::vector<pnt> pnts;
std::vector<int> pntType;
The question is that how to insert a range of pnts into pntType using single standard line of C++98:
void insert (iterator position, InputIterator first, InputIterator last);
or even Boost library.
Since I am using this often in different parts of my code, I am trying to avoid doing this in a loop. The last option is defining a function for that.
EDIT:
I know the insert syntax. What I cannot do is how to insert from pnts (only _type of each member) into pntType
UPDATE: There is a better way than my first suggestion (see bottom), since we're already using Boost. The problem with std::transform and std::insert_iterator is that v2 is resized several times, which is wasteful considering that we know the width of the range in advance. Using boost::transform_iterator and boost::bind, it is possible to avoid the problem like this:
#include <boost/bind.hpp>
#include <boost/iterator/transform_iterator.hpp>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <vector>
struct A {
int x;
};
int main() {
A arr[] = {
{ 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }
};
std::vector<A> v1(arr, arr + 6);
std::vector<int> v2;
v2.insert(v2.begin(),
boost::make_transform_iterator(v1.begin() + 2, boost::bind(&A::x, _1)),
boost::make_transform_iterator(v1.begin() + 4, boost::bind(&A::x, _1)));
std::copy(v2.begin(), v2.end(), std::ostream_iterator<int>(std::cout, "\n"));
}
OLD SUGGESTION:
boost::bind works with data member pointers, so using C++98 and Boost, you could do something like this without changing your struct:
#include <boost/bind.hpp>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <vector>
struct A {
int x;
};
int main() {
A arr[] = {
{ 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }
};
std::vector<A> v1(arr, arr + 6);
std::vector<int> v2;
// one-liner here:
std::transform(v1.begin() + 2,
v1.begin() + 4,
std::insert_iterator<std::vector<int> >(v2, v2.begin()),
boost::bind(&A::x, _1));
std::copy(v2.begin(), v2.end(), std::ostream_iterator<int>(std::cout, "\n"));
}
Using boost range:
boost::copy(pnts | transformed(std::mem_fn(&pnt::_type)), std::back_inserter(pntType));
Or even
boost::copy_range<std::vector<int>>(pnts | transformed(std::mem_fn(&pnt::_type)));
See it Live on Coliru
Note you can use boost::bind(&pnt:_type,_1) instead of mem_fn to allow for your compiler version
Updated To show with specific first/last iterators, and compiling in c++03 mode:
Live On Coliru
#include <boost/range/algorithm.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/range/iterator_range.hpp>
#include <boost/bind.hpp>
using namespace boost::adaptors;
using namespace boost;
struct pnt {
char _name;
int _type;
bool _aux;
};
int main() {
std::vector<pnt> pnts(6);
std::vector<int> pntType;
boost::copy(
make_iterator_range(pnts.begin(), pnts.begin()+3) | transformed(bind(&pnt::_type, _1)),
std::back_inserter(pntType));
}
Inserting one container into the other works like this:
pntType.insert(pntType.begin(),pnts.begin(),pnts.end());
To be able to insert the correct type, you should add a conversion operator to int to your struct.
struct pnt {
char _name;
int _type;
bool _aux;
operator int (){
return _type;
}
};
I am having trouble in implementing segment tree with lazy propagation. I just read about segment trees and tried to do a simple question (http://www.codechef.com/problems/FLIPCOIN) using it but I am getting wrong answer. Please help me with the implementation. Here is my code(If you prefer ideone:http://ideone.com/SHVZ5y):
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <utility>
#include <map>
#include <vector>
#include <list>
#include <string>
#include <set>
#include <queue>
#define s(x) scanf("%d",&x)
#define sil(x) scanf("%llu",&x)
#define sd(x) scanf("%ld",&x)
#define FOR(i,a,b) for( typeof(a) i=(a); i<(b); ++i) // exclusive for
#define FORR(i,a,b) for( typeof(a) i=(a-1) ; i>=(b); --i)
#define REP(k,a,b) for(typeof(a) k=(a); k <= (b); ++k) // inclusive for
#define REPR(i,a,b) for( typeof(a) i=(a) ; i>=(b); --i)
#define ALL(c) (c).begin(), (c).end()
#define PB push_back
#define MP make_pair
#define SZ(x) ((int)((x).size()))
#define SRT(v) std::sort(ALL(v))
#define CTN(x) std::cout<<x<<'\n' //cout with newline
#define CTS(x) std::cout<<x<<" " //cout with space
#define CLR(x) std::memset(x,0,sizeof(x))
#define FILL(x,n) std::fill_n(x,sizeof(x),n)
#define DBGA(x,n) {FOR(i,0,n) cout<<x[i]<<" "; CTN(" ");}
//#define NL printf("\n")
typedef std::vector<int> VI;
typedef std::vector<long long int> VL;
typedef std::vector<std::string> VS;
typedef std::map<int,int> MI;
typedef std::pair<int,int> PII;
typedef unsigned long long ull;
typedef long long ll;
using namespace std;
struct node{
int h; //number of head
int t; //number of tail
int lazy;
node()
{
h=0;
t=0;
lazy=0;
}
}tree[300000];
void build_tree(int n,int a,int b)
{
//cout<<"wo"<<endl;
if(a>b)
return;
if(a==b)
{
tree[n].h=0;
tree[n].t=1;
//cout<<tree[n]<<" "<<a<<" "<<b<<" "<<n<<endl;
return;
}
build_tree(2*n+1,a,(a+b)/2);
build_tree(2*n+2,(a+b)/2+1,b);
tree[n].t=tree[2*n+1].t+tree[2*n+2].t;
//cout<<tree[n]<<" "<<a<<" "<<b<<" "<<n<<endl;
}
int query(int n,int ql,int qr,int l,int r)
{
if(tree[n].lazy!=0)
{
int tmp=tree[n].h;
tree[n].h=tree[n].t;
tree[n].t=tmp;
if(r!=l)
{
tree[2*n+1].lazy=1;
tree[2*n+2].lazy=1;
}
tree[n].lazy=0;
}
if(l>qr || r<ql)
return 0;
if(l>=ql && r<=qr)
return tree[n].h;
return query(2*n+1,ql,qr,l,(l+r)/2)+query(2*n+2,ql,qr,(l+r)/2+1,r);
}
void update(int n,int ul,int ur,int l,int r)
{
if(tree[n].lazy!=0)
{
int tmp=tree[n].h;
tree[n].h=tree[n].t;
tree[n].t=tmp;
if(r!=l)
{
tree[2*n+1].lazy=1;
tree[2*n+2].lazy=1;
}
tree[n].lazy=0;
}
if(l>ur || r<ul)
return ;
if(l>=ul && r<=ur)
{
int tmp=tree[n].h;
tree[n].h=tree[n].t;
tree[n].t=tmp;
if(r!=l)
{
tree[2*n+1].lazy=1;
tree[2*n+2].lazy=1;
}
return;
}
update(2*n+1,ul,ur,l,(l+r)/2);
update(2*n+2,ul,ur,(l+r)/2+1,r);
tree[n].h=tree[2*n+1].h+tree[2*n+2].h;
tree[n].t=tree[2*n+1].t+tree[2*n+2].t;
}
int main()
{
std::ios_base::sync_with_stdio(false);
int n;cin>>n;
build_tree(0,0,n-1);
int q;cin>>q;
while(q--)
{
int t;cin>>t;int l,r;cin>>l>>r;
if(t)
{
cout<<query(0,l,r,0,n-1)<<'\n';
}
else
{
update(0,l,r,0,n-1);
/*CTN(" ");
FOR(i,0,7)
cout<<i<<" "<<tree[i].h<<'\n';
CTN(" ");*/
}
}
}
There was a problem with the lazy propagation part. It should be:
tail[2*n+1].lazy=1-tail[2*n+1].lazy
and not
tail[2*n+1].lazy=1