OpenBlas parallelisation from OpenMP Thread

OpenBlas parallelisation from OpenMP Thread - parallel-processing

I tried to call an OpenBlas function from an OpenMP thread while the Blas parallelisation is set to a value unequal to one. I am using OpenBlas 0.3.9, after downloading the source I untared it and called
make USE_OPENMP=1
make PREFIX=/someFolder/ install
However I always get the following error message from my executeable
OpenBLAS Warning : Detect OpenMP Loop and this application may hang. Please rebuild the library with USE_OPENMP=1 option.
Does anyone know, why this is the case and how I can change it? Here is a minimal example of my code:
#include <complex>
#include <vector>
#include <random>
#include <iostream>
#include <algorithm>
#include <omp.h>
#include <cblas.h>
#include <lapacke.h>
int main(int, char**) {
int const blas_threads = 2,
omp_threads = 2,
matrix_size = 100;
openblas_set_num_threads(blas_threads);
omp_set_max_active_levels(2);
double alpha = 1.,
beta = 0.;
std::vector<std::vector<double>> as(omp_threads,
std::vector<double>(matrix_size*matrix_size));
std::vector<std::vector<double>> bs(omp_threads,
std::vector<double>(matrix_size*matrix_size));
std::vector<std::vector<double>> cs(omp_threads,
std::vector<double>(matrix_size*matrix_size));
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<double> dis;
for(int i = 0; i < omp_threads; ++i) {
std::generate(as[i].begin(),
as[i].end(),
[&dis,&gen]() { return dis(gen); });
std::generate(bs[i].begin(),
bs[i].end(),
[&dis,&gen]() { return dis(gen); });
}
// for(int i = 0; i < matrix_size*matrix_size; ++i) {
// std::cout << as[0][i] << " " << bs[0][i] << std::endl;
// }
#pragma omp parallel for num_threads(omp_threads), schedule(static, 1)
for(int i = 0; i < omp_threads; ++i) {
cblas_dgemm(CblasColMajor,
CblasNoTrans,
CblasNoTrans,
matrix_size,
matrix_size,
matrix_size,
alpha,
as[i].data(),
matrix_size,
bs[i].data(),
matrix_size,
beta,
cs[i].data(),
matrix_size);
}
// for(int i = 0; i < matrix_size*matrix_size; ++i) {
// std::cout << cs[0][i] << std::endl;
// }
return 0;
}

Related

<random> generates same number in Windows, but not in Linux

I do not why, but in Windows (with MinGW) this code generates for 3/4 time the same pseudo-random number.
I think that is because I set badly the seed, but I can not correct it.
Thank you for your help.
Here there is the code:
#include <iostream>
#include <random>
#include <chrono>
int main()
{
double Nprove = 50.0;
double p = 0.2;
const int Ncampioni = 100; // number of samples
int cappa = 0;
double sample[Ncampioni];
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::mt19937 gen(seed);
std::binomial_distribution<> d(Nprove, 0.9);
for(cappa = 0; cappa < Ncampioni; cappa = cappa +1){
sample[cappa] = d(gen);
std::cout << cappa << "," << sample[cappa] << std::endl;
}
}

wrong result boost gmp float

I need to compute 5^64 with boost multiprecision library which should yield 542101086242752217003726400434970855712890625 but boost::multiprecision::pow() takes mpfloat and gives 542101086242752217003726392492611895881105408.
However If I loop and repeatedly multiply using mpint I get correct result.
Is it a bug ? or I am using boost::multiprecision::pow() in a wrong way ? or I there is an alternative of using boost::multiprecision::pow() ?
#include <iostream>
#include <string>
#include <boost/multiprecision/gmp.hpp>
typedef boost::multiprecision::mpz_int mpint;
typedef boost::multiprecision::number<boost::multiprecision::gmp_float<4> > mpfloat;
int main(){
mpfloat p = boost::multiprecision::pow(mpfloat(5), mpfloat(64));
std::cout << p.template convert_to<mpint>() << std::endl;
mpint res(1);
for(int i = 0; i < 64; ++i){
res = res * 5;
}
std::cout << res << std::endl;
}

How to write a video?

I'm using opencv with visual studio 2010 in Windows 7 with 32 bit OS.... While running the sample program of People detection, it shows the output video playing in a window... But I'm unable to open the output video, stored in a particular location... Kindly help me... Thankyou...
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <opencv2/core/core.hpp>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
Mat img; char _filename[1024];
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
namedWindow("people detector", 1);
CvCapture *cap=cvCaptureFromFile("E:/Phase_I_output/2.walk.avi");
img=cvQueryFrame(cap);
for(;;)
{
img=cvQueryFrame(cap);
if(img.empty())
break;
fflush(stdout);
vector<Rect> found, found_filtered;
double t = (double)getTickCount();
int can = img.channels();
hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
t = (double)getTickCount() - t;
printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
size_t i, j;
for( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for( j = 0; j < found.size(); j++ )
if( j != i && (r & found[j]) == r)
break;
if( j == found.size() ) found_filtered.push_back(r);
}
for( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);
}
Size size2 = Size(640,480);
int codec = CV_FOURCC('M', 'J', 'P', 'G');
VideoWriter writer2("E:/Phase_I_output/video_.avi",codec,50.0,size2,true);
writer2.open("E:/Phase_I_output/video_.avi",codec,15.0,size2,true);
writer2.write(img);
imshow("people detector", img);
if(waitKey(1) == 27)
break;
}
std::cout << "Completed" << std::endl ;
waitKey();
return 0;
}

You should initialize the videowriter before the infinite loop, and release the videowriter (not necessary with the C++ API) and the videocapture once there are no more frame to grab :
Size size2 = Size(640,480);
int codec = CV_FOURCC('M', 'J', 'P', 'G');
VideoWriter writer2("E:/Phase_I_output/video_.avi",codec,50.0,size2,true);
writer2.open("E:/Phase_I_output/video_.avi",codec,50.0,size2,true);
for(;;){
//do your stuff
//write the current frame
writer2.write(img);
}
cvReleaseVideoWriter( writer2 );
cvReleaseCapture( &cap );
You should also use the C++ API of openCV. I believe every function beginning with 'cv' is part of the C API (and no longer supported). Check the openCV documentation to find the corresponding C++ function.
For example :
img=cvQueryFrame(cap);
Will become :
cap >> img;
Edit
I corrected your code to use the C++ API of openCV, and it's working fine (the people detection seems to give false positive though). Here is the code :
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <opencv2/core/core.hpp>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
Mat img;
string _filename;
_filename = "path/to/video.avi";
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
namedWindow("people detector", WND_PROP_AUTOSIZE);
VideoCapture cap = VideoCapture(_filename);
if(!cap.isOpened()){
cout<<"error opening : "<<_filename<<endl;
return 1;
}
Size size2 = Size(640,480);
int codec = static_cast<int>(cap.get(CV_CAP_PROP_FOURCC));
double fps = cap.get(CV_CAP_PROP_FPS);
VideoWriter writer2("../outputVideo_.avi",codec,fps,size2,true);
for(;;)
{
cap >> img;
if(img.empty()){
cout<<"frame n° "<<cap.get(CV_CAP_PROP_FRAME_COUNT)<<endl;
break;
}
fflush(stdout);
vector<Rect> found, found_filtered;
double t = (double)getTickCount();
int can = img.channels();
hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
t = (double)getTickCount() - t;
printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
size_t i, j;
for( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for( j = 0; j < found.size(); j++ )
if( j != i && (r & found[j]) == r)
break;
if( j == found.size() ) found_filtered.push_back(r);
}
for( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(img, r.tl(), r.br(), Scalar(0,255,0), 3);
}
//writer2.write(img);
writer2 << img;
imshow("people detector", img);
if(waitKey(1) == 27)
{
break;
}
}
writer2.release();
cap.release();
cout << "Completed" << endl ;
waitKey();
destroyAllWindows();
return 0;
}

Interleave random numbers

I would like to interleave a random number with some alphanumeric characters, for example: HELLO mixed with the random number 25635 → H2E5L6L3O5. I know %1d controls the spacing, although I'm not sure how to interleave text between the random numbers or how accomplish this.
Code:
int main(void) {
int i;
srand(time(NULL));
for (i = 1; i <= 10; i++) {
printf("%1d", 0 + (rand() % 10));
if (i % 5 == 0) {
printf("\n");
}
}
return 0;
}
btw - if my random number generator isn't very good i'm open to suggestions - thanks

If you're okay with using C++11, you could use something like this:
#include <iostream>
#include <random>
#include <string>
int main() {
std::random_device rd;
std::default_random_engine e1(rd());
std::uniform_int_distribution<int> uniform_dist(0, 9);
std::string word = "HELLO";
for (auto ch : word) {
std::cout << ch << uniform_dist(e1);
}
std::cout << '\n';
}
...which produces e.g.:
H3E6L6L1O5
If you're stuck with an older compiler, you could use rand and srand from the standard C library for your random numbers:
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <string>
int main() {
std::srand(std::time(NULL));
std::string word = "HELLO";
for (int i = 0; i < word.size(); ++i) {
std::cout << word[i] << (rand() % 10);
}
std::cout << '\n';
}

OpenCV: Load multiple images

I am updating some older OpenCV code that was written in (I guess) an OpenCV 1.1 manner (i.e. using IplImages).
What I want to accomplish right now is to simply load a series of images (passed as command line arguments) as Mats. This is part of a larger task. The first code sample below is the old code's image loading method. It loads 5 images from the command line and displays them in sequence, pausing for a key hit after each, then exits.
The second code sample is my updated version using Mat. It works fine so far, but is this the best way to do this? I've used an array of Mats. Should I use an array of pointers to Mats instead? And is there a way to do this such that the number of images is determined at run time from argc and does not need to be set ahead of time with IMAGE_NUM.
Basically, I'd like to be able to pass any number (within reason) of images as command line arguments, and have them loaded into some convenient array or other similar storage for later reference.
Thanks.
Old code:
#include <iostream>
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
using namespace std;
using namespace cv;
// the number of input images
#define IMAGE_NUM 5
int main(int argc, char **argv)
{
uchar **imgdata;
IplImage **img;
int index = 0;
char *img_file[IMAGE_NUM];
cout << "Loading files" << endl;
while(++index < argc)
if (index <= IMAGE_NUM)
img_file[index-1] = argv[index];
// malloc memory for images
img = (IplImage **)malloc(IMAGE_NUM * sizeof(IplImage *)); // Allocates memory to store just an IplImage pointer for each image loaded
imgdata = (uchar **)malloc(IMAGE_NUM * sizeof(uchar *));
// load images. Note: cvLoadImage actually allocates the memory for the images
for (index = 0; index < IMAGE_NUM; index++) {
img[index] = cvLoadImage(img_file[index], 1);
if (!img[index]->imageData){
cout << "Image data not loaded properly" << endl;
return -1;
}
imgdata[index] = (uchar *)img[index]->imageData;
}
for (index = 0; index < IMAGE_NUM; index++){
imshow("myWin", img[index]);
waitKey(0);
}
cvDestroyWindow("myWin");
cvReleaseImage(img);
return 0;
}
New code:
#include <iostream>
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
#include <time.h>
using namespace std;
using namespace cv;
// the number of input images
#define IMAGE_NUM 5
int main(int argc, char **argv)
{
Mat img[IMAGE_NUM];
int index = 0;
for (index = 0; index < IMAGE_NUM; index++) {
img[index] = imread(argv[index+1]);
if (!img[index].data){
cout << "Image data not loaded properly" << endl;
cin.get();
return -1;
}
}
for (index = 0; index < IMAGE_NUM; index++) {
imshow("myWin", img[index]);
waitKey(0);
}
cvDestroyWindow("myWin");
return 0;
}

you can use a vector instead of an array:
for example
#include <iostream>
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
#include <time.h>
#include <vector>
using namespace std;
using namespace cv;
int main(int argc, char **argv)
{
vector<Mat> img;
//Mat img[IMAGE_NUM];
int index = 0;
for (index = 0; index < IMAGE_NUM; index++) {
//img[index] = imread(argv[index+1]);
img.push_back(imread(argy[index+1]));
if (!img[index].data){
cout << "Image data not loaded properly" << endl;
cin.get();
return -1;
}
}
vector<Mat>::iterator it;
for (it = img.begin(); it != img.end() ; it++) {
imshow("myWin", (*it));
waitKey(0);
}
cvDestroyWindow("myWin");
return 0;
}

It took me a while to get back around to this, but what I've ended up doing is as follows, which is probably functionally the same as Gootik's suggestion. This has worked well for me. Notice that for functions that take Mat& (i.e. a single cv::Mat), you can just de-ref the array of Mats and pass that, which is a notation I'm more comfortable with after doing a lot of image processing work in Matlab.
#include <iostream>
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
using namespace std;
using namespace cv;
int main(int argc, char **argv)
{
if (argc==1){
cout << "No images to load!" << endl;
cin.get();
return 0;
}
int index = 0;
int image_num = argc-1;
Mat *img = new Mat[image_num]; // allocates table on heap instead of stack
// Load the images from command line:
for (index = 0; index < image_num; index++) {
img[index] = imread(argv[index+1]);
if (!img[index].data){
cout << "Image data not loaded properly" << endl;
cin.get();
return -1;
}
}
for (index = 0; index < image_num; index++) {
imshow("myWin", img[index]);
waitKey(0);
}
cvDestroyWindow("myWin");
delete [] img; // notice the [] when deleting an array.
return 0;
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

OpenBlas parallelisation from OpenMP Thread - parallel-processing

Related

<random> generates same number in Windows, but not in Linux

wrong result boost gmp float

How to write a video?

Interleave random numbers

OpenCV: Load multiple images

Categories

Resources