Applgrid makefile (undefined reference) - compilation

I've a question that could be naive for the experienced users, but I'm kinda stuck. I'm trying to get ApplGrid code working (https://applgrid.hepforge.org/index.htm), so I wish to run the provided examples.
As far as I've understood, the dependencies are ROOT (root.cern.ch) and LHAPDF libraries (lhapdf.hepforge.org/index.html), which work well on my machine.
The aforementioned examples were build to work with an older version of LHAPDF, so I had to change the name of an invoked method.
When I try to make, I get an error regarding the undefined reference of that function, but if I isolate that instruction and execute with
g++ extract.cxx `lhapdf-config --cflags --ldflags`
it works like a charm.
The output of the config command is:
-I/home/matteo/software/gcc-4.7/lhapdf/install/include -I/home/matteo/software/gcc-4.7/boost/install/include -L/home/matteo/software/gcc-4.7/lhapdf/install/lib -lLHAPDF
If I remove the "LHAPDF-related commands" from the original cxx file I can run "make" without any problem.
As a final test, I've tried to isolate the "applgrid related code" and try to compile with:
g++ extract_2.cxx `applgrid-config --cxxflags --ldfflags`
but I get an undefined reference error.
The output of the config command is:
-I/home/matteo/software/gcc-4.7/applgrid/install/include -pthread -m64 -I/home/matteo/software/gcc-4.7/root/install/include -L/home/matteo/software/gcc-4.7/applgrid/install/lib -lfAPPLgrid -lAPPLgrid -m64 -L/home/matteo/software/gcc-4.7/root/install/lib -lCore -lCint -lRIO -lNet -lHist -lGraf -lGraf3d -lGpad -lTree -lRint -lPostscript -lMatrix -lPhysics -lMathCore -lThread -pthread -lm -ldl -rdynamic -L/home/matteo/software/gcc-4.7/hoppet/install/lib -lhoppet_v1
the content of the makefile is the following:
CXX = g++
F77 = gfortran
FFLAGS += -O3 -fPIC
CXXFLAGS += -O3 -fPIC
# root
ROOTINCS = $(shell root-config --cflags)
ROOTLIBS = $(shell root-config --glibs)
ROOTARCH = $(findstring -m64, $(ROOTINCS) )
#LHAPDF
LHAPDFINCS = $(shell lhapdf-config --cflags)
LHAPDFDIR = $(shell lhapdf-config --libdir)
LHAPDFLIBS = $(shell lhapdf-config --libs)
# applgrid
APPLCXXFLAGS = $(shell applgrid-config --cxxflags)
APPLCLIBS = $(shell applgrid-config --ldcflags)
APPLFLIBS = $(shell applgrid-config --ldflags)
# hoppet
HOPPETLIBS = $(shell hoppet-config --libs)
# get the fotran runtime library for linking fortran
FRTLLIB = $(shell gfortran $(CXXFLAGS) -print-file-name=libgfortran.a)
FRTLIB = -L$(subst /libgfortran.a, ,$(FRTLLIB) ) -lgfortran
# now set up the compile and link flags and libs
CXXFLAGS += $(ROOTARCH) $(ROOTINCS) $(APPLCXXFLAGS) $(LHAPDFINCS)
LDFLAGS += $(ROOTARCH)
FFLAGS += $(ROOTARCH)
CLIBS += $(ROOTLIBS) $(LHAPDFLIBS) $(HOPPETLIBS) $(APPLCLIBS)
FLIBS += $(ROOTLIBS) $(LHAPDFLIBS) $(HOPPETLIBS) $(APPLFLIBS) $(APPLCLIBS) $(FRTLIB)
install : all
all : stand fnlo fstand getdata
stand: stand.o
$(CXX) $(LDFLAGS) -o $# $< $(CLIBS)
fstand: fstand.o fmain.o
$(CXX) $(LDFLAGS) -o $# fstand.o fmain.o $(FLIBS)
% : %.o
$(CXX) $(LDFLAGS) -o $# $< $(CLIBS)
fnlo: fnmain.o
$(CXX) $(LDFLAGS) -o $# $< $(CLIBS)
.SUFFIXES : .cxx .o .f .c
.f.o :
$(F77) $(FFLAGS) -c $<
.cxx.o:
$(CXX) $(CXXFLAGS) -c $<
#########################################################
# just type make getdata and it will download all
# the required data for these examples
#########################################################
.PRECIOUS : fnlodata appldata
getdata : getfnlodata getappldata
getfnlodata: fnl0004.tab
fnl0004.tab:
# echo "\n************************\ndownloading fastnlo data\n************************\n"
(curl http://fastnlo.hepforge.org/code/src/fnl0004.tab.gz | gunzip > fnl0004.tab )
getappldata: atlas-incljets-arxiv-1009.5908v2
atlas-incljets-arxiv-1009.5908v2:
# echo "\n*************************************\n downloading atlas inclusive jet data\n***********************************\n"
(curl http://www.hepforge.org/archive/applgrid/atlas-incljets-arxiv-1009.5908v2.tgz | tar -xzf - )
clean:
rm -rf ./.libs ./.obj *.lo *.o *.la stand fnlo fstand *~
while the other files in the directory are the following:
fmain.cxx
extern "C" void fstand_();
int main() {
fstand_();
return 0
fnmain.cxx
#include <iostream>
#include <stdio.h>
#include "appl_grid/fastnlo.h"
#include "TFile.h"
#include "TH1D.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
if ( argc<2 ) {
std::cerr << "usage: fnlo fastnlogrid.tab" << std::endl;
return -1;
}
std::cout << "set up lhapdf..." << std::endl;
const std::string _pdfname = "cteq6mE.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// don't need to hard code, can use a runtime parameter...
std::string gridname = argv[1];
// std::string gridname = "fnt1007midp.tab";
// std::string gridname = "fnt1008midp.tab";
// std::string gridname = "fnt2004.tab";
// std::string gridname = "fnl2004.tab";
fastnlo f( gridname );
std::vector<appl::grid*> g = f.grids();
// g.push_back( new appl::grid("atlas-incljets04-eta1.root") );
/// histograms and file for seeing the results
std::vector<TH1D*> hc(g.size());
std::string foutname = "appl.root";
TFile fout( foutname.c_str(),"recreate");
for ( int i=0 ; i<g.size() ; i++ ) {
// trim the grids (not actually needed, done in
// the fastnlo constructor
g[i]->trim();
char hname[64];
sprintf(hname, "hist%02d", i);
/// optionally print out the grid documentation
std::cout << "\n" << g[i]->getDocumentation() << std::endl;
/// perform the convolution
hc[i] = g[i]->convolute( evolvepdf_, alphaspdf_ );
hc[i]->SetName(hname);
hc[i]->SetDirectory(&fout);
hc[i]->Write();
/// print out the results
for ( int j=1 ; j<=hc[i]->GetNbinsX() ; j++ ) {
std::cout << "xsec(" << j-1 << ")=" << hc[i]->GetBinContent(j) << std::endl;
}
}
std::cout << "writing file " << foutname << std::endl;
fout.Close();
return 0;
}
}
fstand.f
C----------------------------------------------------------
C dummy routines that call the pdf and alphas routines
C this example, they just call the lhapdf routine
C for apdf fit, call your own routines
C----------------------------------------------------------
double precision function fnalphas(Q)
double precision Q
double precision alphaspdf
fnalphas = alphaspdf(Q)
return
end
subroutine fnpdf(x, Q, xf)
double precision x, Q
double precision xf(13)
call evolvePDF(x, Q, xf)
return
end
C----------------------------------------------------------
stand.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TPad.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets04-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
// get name of grid from user and create from grid file
appl::grid g(gridname);
g.trim(); // trim away uneeded memory
/// print the grid documentation
std::cout << g.getDocumentation() << std::endl;
// initialise lhapdf
std::cout << "setting up lhapdf" << std::endl;
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
// do the convolution into a vector
std::cout << "doing standalone convolution" << std::endl;
// struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
// double atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
for ( int i=0 ; i<xsec.size() ; i++ ) std::cout << "xsec[" << i << "]\t= " << xsec[i] << std::endl;
// do the convolution into a TH1D
// atimer = appl_timer_start();
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
// atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
hxsec->SetLineColor(kRed);
hxsec->SetMarkerColor(kRed);
hxsec->SetMarkerStyle(20);
hxsec->DrawCopy();
gPad->SetLogy(true);
gPad->Print("xsec.pdf");
return 0;
}
stand-full.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TPad.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets04-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
// get name of grid from user and create from grid file
appl::grid g(gridname);
g.trim(); // trim away uneeded memory
std::cout << "setting up lhapdf" << std::endl;
// initialise lhapdf
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
// do the convolution into a vector
std::cout << "doing standalone convolution" << std::endl;
std::cout << g.getDocumentation() << std::endl;
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop(atimer);
const int Nsubproc = g.subProcesses();
std::cout << "time : " << atime << " ms\tNsubproc : " << Nsubproc << std::endl;
std::vector<std::vector<double> > xsb(Nsubproc);
for ( int i=0 ; i<Nsubproc ; i++ ) {
xsb[i] = g.vconvolute( i, evolvepdf_, alphaspdf_ );
}
// print results
for ( int i=0 ; i<xsec.size() ; i++ ) {
std::cout << "xsec(" << i << ")=" << xsec[i] << std::endl;
}
for ( int i=0 ; i<Nsubproc ; i++ ) {
std::cout << "subproc: " << i << std::endl;
for ( int j=0 ; j<xsec.size() ; j++ ) {
std::cout << "xsec(" << i << ")=" << xsb[i][j] << std::endl;
}
}
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
std::vector<TH1D*> hxsb(Nsubproc);
for ( int i=0 ; i<Nsubproc ; i++ ) {
hxsb[i] = g.convolute( i, evolvepdf_, alphaspdf_ );
}
hxsec->SetLineColor(kRed);
hxsec->SetMarkerColor(kRed);
hxsec->SetMarkerStyle(20);
hxsec->SetMinimum(0.000001);
hxsec->DrawCopy();
gPad->SetLogy(true);
for ( int i=0 ; i<Nsubproc ; i++ ) {
hxsb[i]->SetLineColor(kRed+i%5);
hxsb[i]->SetMarkerColor(kRed+i%5);
hxsb[i]->SetLineStyle(1+i%5);
hxsb[i]->SetMarkerStyle(20+i%5);
hxsb[i]->DrawCopy("same");
}
gPad->Print("xsec.pdf");
double escale = 7e3/8e3;
TH1D* hxsec8 = g.convolute( escale, evolvepdf_, alphaspdf_ );
escale = 7e3/2.76e3;
TH1D* hxsec276 = g.convolute( escale, evolvepdf_, alphaspdf_ );
hxsec->DrawCopy();
hxsec8->SetLineColor(kRed+1);
hxsec8->SetLineStyle(2);
hxsec8->DrawCopy("same");
gPad->Print("xsec8.pdf");
hxsec276->SetLineColor(kRed+2);
hxsec276->SetLineStyle(3);
hxsec276->DrawCopy("same");
gPad->Print("xsec276.pdf");
// or get into a histogram
// TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
// hxsec->SetName("xsec");
return 0;
}
stand-save.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TPad.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets04-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
// get name of grid from user and create from grid file
appl::grid g(gridname);
g.trim(); // trim away uneeded memory
/// print the grid documentation
std::cout << g.getDocumentation() << std::endl;
// initialise lhapdf
std::cout << "setting up lhapdf" << std::endl;
// const string _pdfname = "cteq6mE.LHgrid";
const string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
// do the convolution into a vector
std::cout << "doing standalone convolution" << std::endl;
// struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
// double atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
for ( int i=0 ; i<xsec.size() ; i++ ) std::cout << "xsec[" << i << "]\t= " << xsec[i] << std::endl;
// do the convolution into a TH1D
// atimer = appl_timer_start();
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
// atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
hxsec->SetLineColor(kRed);
hxsec->SetMarkerColor(kRed);
hxsec->SetMarkerStyle(20);
hxsec->DrawCopy();
gPad->SetLogy(true);
gPad->Print("xsec.pdf");
return 0;
}
stand-tutorial.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TLatex.h"
#include "TFile.h"
#include "TH2D.h"
#include "TStyle.h"
#include "TCanvas.h"
#include "TPad.h"
#include "TStyle.h"
#include "TColor.h"
#include "TPaveStats.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& x, const double& Q, double* xf);
extern "C" double alphaspdf_(const double& Q);
void setpalette()
{
const Int_t NRGBs = 7;
const Int_t NCont = 98;
Double_t stops[NRGBs] = { 0.00, 0.20, 0.40, 0.60, 0.70, 0.90, 1.00 };
Double_t red[NRGBs] = { 0.00, 0.00, 0.00, 0.60, 0.90, 1.00, 0.50 };
Double_t green[NRGBs] = { 0.00, 0.00, 0.40, 0.95, 1.00, 0.20, 0.00 };
Double_t blue[NRGBs] = { 0.00, 1.00, 1.00, 0.10, 0.00, 0.00, 0.00 };
TColor::CreateGradientColorTable(NRGBs, stops, red, green, blue, NCont);
gStyle->SetNumberContours(NCont);
}
TH2D* smooth(TH2D* h) {
int Nnbins = 10;
TH2D* hi = new TH2D("h2","",
h->GetXaxis()->GetNbins()*Nnbins,
h->GetXaxis()->GetBinLowEdge(1),
h->GetXaxis()->GetBinLowEdge(h->GetXaxis()->GetNbins()+1),
h->GetYaxis()->GetNbins()*Nnbins,
h->GetYaxis()->GetBinLowEdge(1),
h->GetYaxis()->GetBinLowEdge(h->GetYaxis()->GetNbins()+1) );
hi->GetXaxis()->SetTitle( h->GetXaxis()->GetTitle() );
hi->GetYaxis()->SetTitle( h->GetYaxis()->GetTitle() );
hi->GetYaxis()->SetTitleOffset(1.1);
for ( int i=1 ; i<=h->GetXaxis()->GetNbins() ; i++ ) {
for ( int j=1 ; j<=h->GetYaxis()->GetNbins() ; j++ ) {
double z1 = h->GetBinContent(i,j);
double z2 = h->GetBinContent(i+1,j);
double z3 = h->GetBinContent(i,j+1);
double z4 = h->GetBinContent(i+1,j+1);
if ( z2==0 ) z2 = z1;
if ( z3==0 ) z3 = z1;
if ( z4==0 ) z4 = z3;
for ( int ii=0 ; ii<Nnbins ; ii++ ) {
double fx = 1.0*ii/Nnbins;
for ( int jj=0 ; jj<Nnbins ; jj++ ) {
double fy = 1.0*jj/Nnbins;
double z = ( (z4-z3-z2+z1)*fx + z3 - z1 )*fy + (z2-z1)*fx + z1;
// std::cout << (i-1)*Nnbins+ii+1 << " " << (j-1)*Nnbins+jj+1 << "\t z : " << z << "\tz1-4: " << z1 << " " << z2 << " " << z3 << " " << z4 << std::endl;
hi->SetBinContent( (i-1)*Nnbins+ii+1, (j-1)*Nnbins+jj+1, z);
// hi->SetBinContent( (i-1)*Nnbins+ii+1, (j-1)*Nnbins+jj+1, h->GetBinContent(i,j) );
}
}
}
}
return hi;
}
int main(int argc, char** argv) {
gStyle->SetOptStat(0);
setpalette();
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets-arxiv-1009.5908v2/r04/atlas-incljets-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
appl::grid g(gridname);
// g.trim(); // trim away uneeded memory
/// print the grid documentation
std::cout << g.getDocumentation() << std::endl;
// initialise lhapdf
std::cout << "setting up lhapdf" << std::endl;
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
/// ----------------- example 1 --------------------
if ( true ) {
// do the convolution into a vector
std::cout << "example 1: standalone convolution" << std::endl;
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop(atimer);
std::cout << "time : " << atime << " ms" << std::endl;
for ( int i=0 ; i<xsec.size() ; i++ ) std::cout << "xsec[" << i << "]\t= " << xsec[i] << std::endl;
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
hxsec->SetTitle("");
hxsec->DrawCopy();
gPad->SetLogy(true);
gPad->Print("example-1.pdf");
}
/// ----------------- example 2 (corrections) --------------------
if ( false ) {
std::cout << "example 2: multiplicative corrections" << std::endl;
std::cout << g.getApplyCorrections() << std::endl;
g.setApplyCorrections(false);
TH1D* huncor = g.convolute( evolvepdf_, alphaspdf_ );
g.setApplyCorrections(true);
TH1D* hcor = g.convolute( evolvepdf_, alphaspdf_ );
huncor->SetLineStyle(2);
huncor->SetTitle("");
huncor->DrawCopy();
hcor->DrawCopy("same");
gPad->Print("example-2.pdf");
}
/// ----------------- example 3 (cms scaling) --------------------
if ( false ) {
std::cout << "example 3: centre-of-mass energy scaling " << std::endl;
TH1D* h7 = g.convolute( evolvepdf_, alphaspdf_ );
double Escale = 7/2.46;
TH1D* h246 = g.convolute( Escale, evolvepdf_, alphaspdf_ );
std::cout << "cms scale " << g.getCMSScale() << std::endl;
h7->SetTitle("");
h7->SetMinimum(0.001);
gStyle->SetOptStat(0);
h7->DrawCopy();
h246->SetLineColor(kRed);
h246->DrawCopy("same");
gPad->SetLogy(true);
gPad->Print("example-3.pdf");
}
/// ------------------ example 4 (scale variation) -----------------
if ( false ) {
std::cout << "example 4: scale variation " << std::endl;
int nloops = 1; // loop order
int Nbins = 30;
TH2D* hscale = new TH2D("scale", "", Nbins, 0.45, 2.05, Nbins, 0.45, 2.05 );
struct timeval atimer = appl_timer_start();
for ( int i=1 ; i<=hscale->GetNbinsX() ; i++ ) {
double fscale = hscale->GetXaxis()->GetBinCenter(i);
for ( int j=1 ; j<=hscale->GetNbinsY() ; j++ ) {
double rscale = hscale->GetYaxis()->GetBinCenter(j);
std::vector<double> xs = g.vconvolute( evolvepdf_, alphaspdf_, nloops, rscale, fscale );
double total = 0;
for ( int k=xs.size() ; k-- ; ) total += xs[k];
hscale->Fill( fscale, rscale, total );
}
}
double atime = appl_timer_stop(atimer);
std::cout << "time " << atime << " ms" << std::endl;
/// plot ....
gStyle->SetPalette(52);
gStyle->SetPadRightMargin(0.17);
TCanvas* tc = new TCanvas( "","", 650, 600 );
hscale->GetXaxis()->SetTitle("factorisation scale factor");
hscale->GetYaxis()->SetTitle("renormalisation scale factor");
hscale->GetYaxis()->SetTitleOffset(1.1);
TH2D* _hscale = smooth(hscale);
_hscale->DrawCopy("colz");
gPad->Print("example-4.png");
TFile fs("scale.root","recreate");
fs.cd();
hscale->Write();
fs.Write();
fs.Close();
}
/// ------------------ example 5 (subprocesses) -----------------
if ( false ) {
std::cout << "example 4: subprocess contributions " << std::endl;
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_);
std::vector<TH1D*> xsec_sub(g.subProcesses());
for ( int i=0 ; i<g.subProcesses() ; i++ ) {
xsec_sub[i] = g.convolute_subproc( i, evolvepdf_, alphaspdf_);
for ( int j=0 ; j<xsec_sub[i]->GetNbinsX() ; j++ ) {
xsec_sub[i]->SetBinContent( j+1, xsec_sub[i]->GetBinContent(j+1)/xsec[j] );
}
}
/// plot ...
int col[7] = { kRed, kBlue, kMagenta, kCyan, kGreen, kBlack, kViolet };
xsec_sub[0]->SetMaximum(0.6);
xsec_sub[0]->SetMinimum(0);
xsec_sub[0]->GetXaxis()->SetTitle("P_{T} [GeV]");
xsec_sub[0]->GetYaxis()->SetTitle("subprocess fraction");
xsec_sub[0]->SetTitle("");
xsec_sub[0]->DrawCopy();
for ( int i=0 ; i<g.subProcesses() ; i++ ) {
xsec_sub[i]->SetLineColor(col[i]);
xsec_sub[i]->DrawCopy("same");
TLatex* tl = new TLatex(0,0,"");
tl->SetTextColor(col[i]);
tl->SetTextSize(0.04);
char label[64];
sprintf(label, "subprocess %d", i);
tl->DrawLatex(430, 0.55-0.035*i, label );
}
gPad->SetLogy(false);
gPad->Print("example-5.pdf");
}
/// -------------------- example 6 (pdf sets) -----------------------
if ( false ) {
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
std::vector<double> _xs = g.vconvolute( evolvepdf_, alphaspdf_ );
int Nset = LHAPDF::numberPDF();
std::vector<TH1D*> xs(Nset);
for ( int i=0 ; i<Nset ; i++ ) {
struct timeval atimer = appl_timer_start();
LHAPDF::initPDFSet( _pdfname, i );
double atime = appl_timer_stop(atimer);
std::cout << "set " << i << "\tinitPDF time " << atime << " ms" << std::endl;
atimer = appl_timer_start();
xs[i] = g.convolute( evolvepdf_, alphaspdf_ );
atime = appl_timer_stop(atimer);
std::cout << "convolution time " << atime << " ms" << std::endl;
for ( int j=_xs.size() ; j-- ; ) xs[i]->SetBinContent( j+1, xs[i]->GetBinContent(j+1)/_xs[j]);
}
xs[0]->SetMaximum(1.1);
xs[0]->SetMinimum(0.9);
gPad->SetLogy(false);
gPad->SetLogx(true);
xs[0]->DrawCopy("l");
for ( int i=0 ; i<Nset ; i++ ) xs[i]->DrawCopy("lsame");
gPad->Print("example-6.pdf");
gPad->SetLogx(false);
}
/// ------------------- example 7 (timers) ---------------------
if ( false ) {
gStyle->SetOptStat(2220);
std::cout << "example 7 : timings" << std::endl;
TH1D* htrim = new TH1D("htrim", "", 100, 38, 45);
// TH1D* htrim2 = new TH1D("htrim2", "", 100, 37, 45);
TH1D* huntrim = new TH1D("huntrim", "", 100, 38, 45);
g.trim();
std::cout << "grid size : " << g.size() << std::endl;
for ( int i=400 ; i-- ; ) {
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop( atimer );
htrim->Fill(atime);
}
g.untrim();
std::cout << "grid size : " << g.size() << std::endl;
for ( int i=400 ; i-- ; ) {
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop( atimer );
huntrim->Fill(atime);
}
if ( huntrim->GetMaximum()<htrim->GetMaximum() ) huntrim->SetMaximum(1.1*htrim->GetMaximum());
else htrim->SetMaximum(1.1*huntrim->GetMaximum());
huntrim->SetLineStyle(2);
huntrim->SetLineColor(kRed);
htrim->GetXaxis()->SetTitle("time [ms]");
huntrim->GetXaxis()->SetTitle("time [ms]");
huntrim->Draw();
htrim->Draw("same");
// htrim2->DrawCopy("same");
huntrim->Draw("same");
htrim->Draw("sames+");
huntrim->Draw("sames+");
gPad->Update();
TPaveStats* tp1 = (TPaveStats*)htrim->GetListOfFunctions()->FindObject("stats");
TPaveStats* tp2 = (TPaveStats*)huntrim->GetListOfFunctions()->FindObject("stats");
std::cout << tp1 << " " << tp2 << std::endl;
tp2->SetTextColor(kRed);
tp1->SetY1NDC(0.93-0.15);
tp1->SetY2NDC(0.93);
tp2->SetY1NDC(0.93-0.16-0.15);
tp2->SetY2NDC(0.93-0.16);
tp1->Draw();
tp2->Draw();
gPad->Update();
gPad->Print("example-7.pdf");
}
return 0;
}
any help will be greatly appreciated, thank you.

Related

Where did torch::jit::load go?

Namespace "torch::jit" is missing member "load"
The official reference says it is there, but I can't use it.
It's not just an intelligence problem, but when I run it, it throws an error saying that there is no "jit::load".
Why?
source code
#include <torch/torch.h>
#include <iostream>
#include <Windows.h>
#include <gdiplus.h>
#include <gdipluspixelformats.h> // PixelFormat24bppRGB
#include <vector>
#include <cstdlib>
#pragma comment(lib, "gdiplus.lib")
int main()
{
Gdiplus::GdiplusStartupInput input;
ULONG_PTR token;
Gdiplus::GdiplusStartup(&token, &input, NULL);
std::vector<uint8_t> pixels;
Gdiplus::BitmapData bmpData;
LARGE_INTEGER freq, start, end;
QueryPerformanceFrequency(&freq);
std::wstring path = L"C:\\Users\\baiji\\Documents\\triggerBot\\data2\\0.jpg";
std::wstring path2 = L"D:\\screenshot\\result\\output.bmp";
auto image = Gdiplus::Bitmap::FromFile(path2.c_str());
QueryPerformanceCounter(&start);
int bWidth = image->GetWidth();
int bHeight = image->GetHeight();
std::cout << bWidth << std::endl;
std::cout << bHeight << std::endl;
auto stride = 3 * bWidth;
pixels.resize(stride * bHeight);
Gdiplus::Rect rect(0, 0, bWidth, bHeight);
image->LockBits(&rect, Gdiplus::ImageLockModeRead, PixelFormat24bppRGB, &bmpData);
for (int y = 0; y < bHeight; ++y) {
memcpy(pixels.data() + y * stride, (byte*)bmpData.Scan0 + y * bmpData.Stride, stride);
}
image->UnlockBits(&bmpData);
Gdiplus::GdiplusShutdown(token);
uint8_t buf1, buf2;
for (int i = 2;i < pixels.size(); i += 3) {
buf1 = pixels[i - 2];
buf2 = pixels[i];
pixels[i-2] = buf2;
pixels[i] = buf1;
}
std::cout << "要素数: " << pixels.size() << "\n";
torch::Tensor tsr = torch::tensor(torch::ArrayRef<uint8_t>(pixels)).to(torch::kFloat64) / 256;
torch::Tensor input = torch::reshape(tsr, { bWidth,bHeight,3 });
torch::jit::script::Module module;
module = torch::jit::load("model to path/traced_model.pt");
QueryPerformanceCounter(&end);
double time = static_cast<double>(end.QuadPart - start.QuadPart) * 1000.0 / freq.QuadPart;
std::cout << time << "ms\n";
//system("PAUSE");
return 1;
}
How can I run torch::jit::load?

How to read chunk of the data from a hdf5 file in c++?

I want to read a chunk of data which is just one frame of many frames stored in one dataset. The shape of the whole dataset is (10, 11214,3), 10 frames each frame has 11214 rows and 4 columns. Here is the file. The chunk I want to read would have the shape (11214,3). I can print the predefined array using, but I'm not sure how can I read data from a hdf5 file. Here is my code,
#include <h5xx/h5xx.hpp>
#include <boost/multi_array.hpp>
#include <iostream>
#include <vector>
#include <cstdio>
typedef boost::multi_array<int, 2> array_2d_t;
const int NI=10;
const int NJ=NI;
void print_array(array_2d_t const& array)
{
for (unsigned int j = 0; j < array.shape()[1]; j++)
{
for (unsigned int i = 0; i < array.shape()[0]; i++)
{
printf("%2d ", array[j][i]);
}
printf("\n");
}
}
void write_int_data(std::string const& filename, array_2d_t const& array)
{
h5xx::file file(filename, h5xx::file::trunc);
std::string name;
{
// --- create dataset and fill it with the default array data (positive values)
name = "integer array";
h5xx::create_dataset(file, name, array);
h5xx::write_dataset(file, name, array);
// --- create a slice object (aka hyperslab) to specify the location in the dataset to be overwritten
std::vector<int> offset; int offset_raw[2] = {4,4}; offset.assign(offset_raw, offset_raw + 2);
std::vector<int> count; int count_raw[2] = {2,2}; count.assign(count_raw, count_raw + 2);
h5xx::slice slice(offset, count);
}
}
void read_int_data(std::string const& filename)
{
h5xx::file file(filename, h5xx::file::in);
std::string name = "integer array";
// read and print the full dataset
{
array_2d_t array;
// --- read the complete dataset into array, the array is resized and overwritten internally
h5xx::read_dataset(file, name, array);
printf("original integer array read from file, negative number patch was written using a slice\n");
print_array(array);
printf("\n");
}
}
int main(int argc, char** argv)
{
std::string filename = argv[0];
filename.append(".h5");
// --- do a few demos/tests using integers
{
array_2d_t array(boost::extents[NJ][NI]);
{
const int nelem = NI*NJ;
int data[nelem];
for (int i = 0; i < nelem; i++)
data[i] = i;
array.assign(data, data + nelem);
}
write_int_data(filename, array);
read_int_data(filename);
}
return 0;
}
I'm using the h5xx — a template-based C++ wrapper for the HDF5 library link and boost library.
The datasets are stored in particles/lipids/box/positions path. The dataset name value holds the frames.
argv[0] is not what you want (arguments start at 1, 0 is the program name). Consider bounds checking as well:
std::vector<std::string> const args(argv, argv + argc);
std::string const filename = args.at(1) + ".h5";
the initialization can be done directly, without a temporary array (what is multi_array for, otherwise?)
for (size_t i = 0; i < array.num_elements(); i++)
array.data()[i] = i;
Or indeed, make it an algorithm:
std::iota(array.data(), array.data() + array.num_elements(), 0);
same with vectors:
std::vector<int> offset; int offset_raw[2] = {4,4}; offset.assign(offset_raw, offset_raw + 2);
std::vector<int> count; int count_raw[2] = {2,2}; count.assign(count_raw, count_raw + 2);
besides being a formatting mess can be simply
std::vector offset{4,4}, count{2,2};
h5xx::slice slice(offset, count);
On To The Real Question
The code has no relevance to the file. At all. I created some debug/tracing code to dump the file contents:
void dump(h5xx::group const& g, std::string indent = "") {
auto dd = g.datasets();
auto gg = g.groups();
for (auto it = dd.begin(); it != dd.end(); ++it) {
std::cout << indent << " ds:" << it.get_name() << "\n";
}
for (auto it = gg.begin(); it != gg.end(); ++it) {
dump(*it, indent + "/" + it.get_name());
}
}
int main()
{
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
dump(xaa);
}
Prints
/particles/lipids/box/edges ds:box_size
/particles/lipids/box/edges ds:step
/particles/lipids/box/edges ds:time
/particles/lipids/box/edges ds:value
/particles/lipids/box/positions ds:step
/particles/lipids/box/positions ds:time
/particles/lipids/box/positions ds:value
Now we can drill down to the dataset. Let's see whether we can figure out the correct type. It certainly is NOT array_2d_t:
h5xx::dataset ds(xaa, "particles/lipids/box/positions/value");
array_2d_t a;
h5xx::datatype detect(a);
std::cout << "type: " << std::hex << ds.get_type() << std::dec << "\n";
std::cout << "detect: " << std::hex << detect.get_type_id() << std::dec << "\n";
Prints
type: 30000000000013b
detect: 30000000000000c
That's a type mismatch. I guess I'll have to learn to read that gibberish as well...
Let's add some diagnostics:
void diag_type(hid_t type)
{
std::cout << " Class " << ::H5Tget_class(type) << std::endl;
std::cout << " Size " << ::H5Tget_size(type) << std::endl;
std::cout << " Sign " << ::H5Tget_sign(type) << std::endl;
std::cout << " Order " << ::H5Tget_order(type) << std::endl;
std::cout << " Precision " << ::H5Tget_precision(type) << std::endl;
std::cout << " NDims " << ::H5Tget_array_ndims(type) << std::endl;
std::cout << " NMembers " << ::H5Tget_nmembers(type) << std::endl;
}
int main()
{
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
// dump(xaa);
{
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
std::cout << "dataset: " << std::hex << ds.get_type() << std::dec << std::endl;
diag_type(ds.get_type());
}
{
array_2d_t a(boost::extents[NJ][NI]);
h5xx::datatype detect(a);
std::cout << "detect: " << std::hex << detect.get_type_id() << std::dec << std::endl;
diag_type(detect.get_type_id());
}
}
Prints
dataset: 30000000000013b
Class 1
Size 4
Sign -1
Order 0
Precision 32
NDims -1
NMembers -1
detect: 30000000000000c
Class 0
Size 4
Sign 1
Order 0
Precision 32
NDims -1
NMembers -1
At least we know that HST_FLOAT (class 1) is required. Let's modify array_2d_t:
using array_2d_t = boost::multi_array<float, 2>;
array_2d_t a(boost::extents[11214][3]);
This at least makes the data appear similarly. Let's ... naively try to read:
h5xx::read_dataset(ds, a);
Oops, that predictably throws
terminate called after throwing an instance of 'h5xx::error'
what(): /home/sehe/Projects/stackoverflow/deps/h5xx/h5xx/dataset/boost_multi_array.hpp:176:read_dataset(): dataset "/particles/lipi
ds/box/positions/value" and target array have mismatching dimensions
No worries, we can guess:
using array_3d_t = boost::multi_array<float, 3>;
array_3d_t a(boost::extents[10][11214][3]);
h5xx::read_dataset(ds, a);
At least this does work. Adapting the print function:
template <typename T> void print_array(T const& array) {
for (auto const& row : array) {
for (auto v : row) printf("%5f ", v);
printf("\n");
}
}
Now we can print the first frame:
h5xx::read_dataset(ds, a);
print_array(*a.begin()); // print the first frame
This prints:
80.480003 35.360001 4.250000
37.450001 3.920000 3.960000
18.530001 -9.690000 4.680000
55.389999 74.339996 4.600000
22.110001 68.709999 3.850000
-4.130000 24.040001 3.730000
40.160000 6.390000 4.730000
-5.400000 35.730000 4.850000
36.669998 22.450001 4.080000
-3.680000 -10.660000 4.180000
(...)
That checks out with h5ls -r -d xaa.h5/particles/lipids/box/positions/value:
particles/lipids/box/positions/value Dataset {75/Inf, 11214, 3}
Data:
(0,0,0) 80.48, 35.36, 4.25, 37.45, 3.92, 3.96, 18.53, -9.69, 4.68,
(0,3,0) 55.39, 74.34, 4.6, 22.11, 68.71, 3.85, -4.13, 24.04, 3.73,
(0,6,0) 40.16, 6.39, 4.73, -5.4, 35.73, 4.85, 36.67, 22.45, 4.08, -3.68,
(0,9,1) -10.66, 4.18, 35.95, 36.43, 5.15, 57.17, 3.88, 5.08, -23.64,
(0,12,1) 50.44, 4.32, 6.78, 8.24, 4.36, 21.34, 50.63, 5.21, 16.29,
(0,15,1) -1.34, 5.28, 22.26, 71.25, 5.4, 19.76, 10.38, 5.34, 78.62,
(0,18,1) 11.13, 5.69, 22.14, 59.7, 4.92, 15.65, 47.28, 5.22, 82.41,
(0,21,1) 2.09, 5.24, 16.87, -11.68, 5.35, 15.54, -0.63, 5.2, 81.25,
(...)
The Home Stretch: Adding The Slice
array_2d_t read_frame(int frame_no) {
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
array_2d_t a(boost::extents[11214][3]);
std::vector offsets{frame_no, 0, 0}, counts{1, 11214, 3};
h5xx::slice slice(offsets, counts);
h5xx::read_dataset(ds, a, slice);
return a;
}
There you have it. Now we can print any frame:
print_array(read_frame(0));
Printing the same as before. Let's try the last frame:
print_array(read_frame(9));
Prints
79.040001 36.349998 3.990000
37.250000 3.470000 4.140000
18.600000 -9.270000 4.900000
55.669998 75.070000 5.370000
21.920000 67.709999 3.790000
-4.670000 24.770000 3.690000
40.000000 6.060000 5.240000
-5.340000 36.320000 5.410000
36.369999 22.490000 4.130000
-3.520000 -10.430000 4.280000
(...)
Checking again with h5ls -r -d xaa.h5/particles/lipids/box/positions/value |& grep '(9' | head confirms:
(9,0,0) 79.04, 36.35, 3.99, 37.25, 3.47, 4.14, 18.6, -9.27, 4.9, 55.67,
(9,3,1) 75.07, 5.37, 21.92, 67.71, 3.79, -4.67, 24.77, 3.69, 40, 6.06,
(9,6,2) 5.24, -5.34, 36.32, 5.41, 36.37, 22.49, 4.13, -3.52, -10.43,
(9,9,2) 4.28, 35.8, 36.43, 4.99, 56.6, 4.09, 5.04, -23.37, 49.42, 3.81,
(9,13,0) 6.31, 8.83, 4.56, 22.01, 50.38, 5.43, 16.3, -2.92, 5.4, 22.02,
(9,16,1) 70.09, 5.36, 20.23, 11.12, 5.66, 78.48, 11.34, 6.09, 20.26,
(9,19,1) 61.45, 5.35, 14.25, 48.32, 5.35, 79.95, 1.71, 5.38, 17.56,
(9,22,1) -11.61, 5.39, 15.64, -0.19, 5.06, 80.43, 71.77, 5.29, 75.54,
(9,25,1) 35.14, 5.26, 22.45, 56.86, 5.56, 16.47, 52.97, 6.16, 20.62,
(9,28,1) 65.12, 5.26, 19.68, 71.2, 5.52, 23.39, 49.84, 5.28, 22.7,
Full Listing
#include <boost/multi_array.hpp>
#include <h5xx/h5xx.hpp>
#include <iostream>
using array_2d_t = boost::multi_array<float, 2>;
template <typename T> void print_array(T const& array)
{
for (auto const& row : array) { for (auto v : row)
printf("%5f ", v);
printf("\n");
}
}
void dump(h5xx::group const& g, std::string indent = "") {
auto dd = g.datasets();
auto gg = g.groups();
for (auto it = dd.begin(); it != dd.end(); ++it) {
std::cout << indent << " ds:" << it.get_name() << std::endl;
}
for (auto it = gg.begin(); it != gg.end(); ++it) {
dump(*it, indent + "/" + it.get_name());
}
}
array_2d_t read_frame(int frame_no) {
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
array_2d_t arr(boost::extents[11214][3]);
std::vector offsets{frame_no, 0, 0}, counts{1, 11214, 3};
h5xx::slice slice(offsets, counts);
h5xx::read_dataset(ds, arr, slice);
return arr;
}
int main()
{
print_array(read_frame(9));
}

I initialize char *str1 = nullptr and output to the screen... But when i want output to other variable i receive empty screen.. Why is it happening?

#include <iostream>
#include <cstring>
int main()
{
const char *str = "Hello world";
char *str1 = nullptr;
std::cout << str << std::endl; // when i output to screan i see hello world
std::cout << str1 << std::endl; // when i output to scream i sen nothing
str1 = new char[strlen(str) + 1];
strcpy(str1, str);
std::cout << str << std::endl; // when i output to scream again i sen nothing
std::cout << str1 << std::endl; // The same
}

Finding incorrect implementation of JudyArray

I'm trying to give a better error report (possible bug) for this case (about judySArray give incorrect result, but I don't know which key that give incorrect result).
The code here from this folder, note on this blog. Dependencies: judySArray.h and cedar.h
// judy.cpp
#include "deps/judySArray.h"
#include <string>
#include <iostream>
#include <cstdlib>
#include <cstring>
using namespace std;
typedef judySArray<double> MSD;
const int MAX_DATA = 12000000;
const char i2ch[] = {'0','1','2','3','4','5','6','7','8','9','a','B','c','D','e','F'};
int get_first_digit(double d) {
while(d > 10) d /= 10;
return d;
}
string to_rhex(int v) {
char hex[32];
int start = 0;
while(v>0) {
hex[start] = i2ch[v%16];
v /= 16;
++start;
}
hex[start] = 0;
return hex;
}
void add_or_inc(MSD &m, const string& key,double set, double inc, int& ctr) {
const char* cstr = key.c_str();
double it = m.find(cstr);
if(!it) {
m.insert(cstr,set);
return;
}
m.insert(cstr,it+inc);
++ctr;
}
int main() {
MSD m(64);
int dup1 = 0, dup2 = 0, dup3 = 0;
for(int z=MAX_DATA;z>0;--z) {
int val2 = MAX_DATA-z;
int val3 = MAX_DATA*2-z;
string key1 = to_string(z);
string key2 = to_string(val2);
string key3 = to_rhex(val3);
add_or_inc(m,key1,z,val2,dup1);
add_or_inc(m,key2,val2,val3,dup2);
add_or_inc(m,key3,val3,z,dup3);
}
cout << dup1 << ' ' << dup2 << ' ' << dup3 << endl;
int total = 0, verify = 0, count = 0;
for(auto &it = m.begin();m.success(); m.next()) {
total += get_first_digit(it.value);
verify += strlen((const char *) it.key);
count += 1;
}
cout << total << ' ' << verify << ' ' << count << endl;
}
other implementation (map, unordered_map, hat-trie and cedar) give correct result:
6009354 6009348 611297
36186112 159701682 23370001
but judy didn't:
6009354 6009348 611297
36186112 159701681 23370000
The problem is, which key that have incorrect result?
I've tried to build a code that insert those keys on another data structure (that is cedar), but that incorrect keys still not detected:
// judy.cpp
#include "deps/judySArray.h"
#include <string>
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <vector>
using namespace std;
typedef judySArray<double> MSD;
const int MAX_DATA = 12000000;
const char i2ch[] = {'0','1','2','3','4','5','6','7','8','9','a','B','c','D','e','F'};
int get_first_digit(double d) {
while(d > 10) d /= 10;
return d;
}
string to_rhex(int v) {
char hex[32];
int start = 0;
while(v>0) {
hex[start] = i2ch[v%16];
v /= 16;
++start;
}
hex[start] = 0;
return hex;
}
void add_or_inc(MSD &m, const string& key,double set, double inc, int& ctr) {
const char* cstr = key.c_str();
double it = m.find(cstr);
if(!it) {
m.insert(cstr,set);
return;
}
m.insert(cstr,it+inc);
++ctr;
}
#include "deps/cedar.h"
class MSD2 {
public:
vector<double> data;
typedef cedar::da<int> CI;
CI da;
bool exists(const string& key,double &old) {
int idx = -1;
bool found = da.exactMatchExists(key.c_str(),key.size(),&idx);
if(found) old = data[idx];
return found;
}
void insert(const string& key,double val) {
da.update(key.c_str(),key.size(),data.size());
data.push_back(val);
}
void update(const string& key,double val) {
int idx = -1;
bool found = da.exactMatchExists(key.c_str(),key.size(),&idx);
if(found) {
data[idx] = val;
return;
}
insert(key,val);
}
};
void add_or_inc(MSD2 &m, const string& key,double set, double inc, int& ctr) {
double old;
if(!m.exists(key,old)) {
m.insert(key,set);
return;
}
m.update(key,old+inc);
++ctr;
}
int main() {
MSD m(64);
MSD2 m2;
int dup1 = 0, dup2 = 0, dup3 = 0;
int vup1 = 0, vup2 = 0, vup3 = 0;
for(int z=MAX_DATA;z>0;--z) {
int val2 = MAX_DATA-z;
int val3 = MAX_DATA*2-z;
string key1 = to_string(z);
string key2 = to_string(val2);
string key3 = to_rhex(val3);
add_or_inc(m,key1,z,val2,dup1);
add_or_inc(m,key2,val2,val3,dup2);
add_or_inc(m,key3,val3,z,dup3);
add_or_inc(m2,key1,z,val2,vup1);
add_or_inc(m2,key2,val2,val3,vup2);
add_or_inc(m2,key3,val3,z,vup3);
}
cout << dup1 << ' ' << dup2 << ' ' << dup3 << endl;
cout << vup1 << ' ' << vup2 << ' ' << vup3 << endl;
int total = 0, verify = 0, count = 0;
int xotal = 0, xerify = 0, xount = 0;
union { int i; int x; } b;
size_t from = 0, p = 0;
char key[256] = {0};
for (b.i = m2.da.begin(from, p); b.i != MSD2::CI::CEDAR_NO_PATH; b.i = m2.da.next(from, p)) {
double it2 = m2.data[b.x]; // <-- find cedar's
xotal += get_first_digit(it2);
m2.da.suffix(key,p,from);
xerify += strlen(key);
xount += 1;
double it = m.find(key); // <-- find judy's
if(it != it2) { // if value doesn't match, print:
cout << "mismatch value for " << key << " : " << it2 << " vs " << it << endl;
}
}
for(auto &it = m.begin();m.success(); m.next()) {
total += get_first_digit(it.value);
verify += strlen((const char *) it.key);
count += 1;
}
cout << total << ' ' << verify << ' ' << count << endl;
cout << xotal << ' ' << xerify << ' ' << xount << endl;
}
compile with: clang++ -std=c++11 judy-findbug.cpp (or g++ -std=c++11)
the output would be:
6009354 6009348 611297
6009354 6009348 611297
36186112 159701681 23370000 <-- judy's
36186112 159701682 23370001 <-- cedar's
cedar has one more value than judy's (that is correct), but it didn't detected by the code above..
How to find that incorrect key(s)?
The bug on the code is someone (me) uncomment the assert(value != 0).
The bug was Karl's Judy implementation should not store null values (0 value).
Solution: use Doug Baskins' Judy implementation.

cublas cublasDgetrfBatched() batched LU factorization doesn't work with matrices bigger than 32x32

I wrote a cuda function for Matlab to perform a LU factorization of a batch of matrices using cublasDgetrfBatched(). The toolkit documentation of this function is here.
It works fine for matrices up to size 32x32. But it fails with status code CUBLAS_STATUS_INVALID_VALUE for bigger matrices. Below is my source code (gpuBatchedLU.cu):
#include "mex.h"
#include "gpu/mxGPUArray.h"
/* Includes, cuda */
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <string>
#include <sstream>
static std::string cublasGetErrorString(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
inline bool cublasAssert(cublasStatus_t code, const char* file, int line) {
if (code != CUBLAS_STATUS_SUCCESS) {
std::stringstream ss;
ss << "cublasAssert: " << cublasGetErrorString(code) << " in "
<< std::string(file) << ", line " << line << ".";
mexErrMsgTxt(ss.str().c_str());
}
return code == CUBLAS_STATUS_SUCCESS;
}
inline bool cudaAssert(cudaError_t code, const char* file, int line) {
if (code != cudaSuccess) {
std::stringstream ss;
ss << "cudaAssert: " << cudaGetErrorString(code) << " in "
<< std::string(file) << ", line " << line << ".";
mexErrMsgTxt(ss.str().c_str());
}
return code == cudaSuccess;
}
inline bool mexGPUAssert(int code, const char* file, int line) {
if (code != MX_GPU_SUCCESS) {
std::stringstream ss;
ss << "mexGPUAssert: could not initialize the Mathworks GPU API in "
<< std::string(file) << ", line " << line << ".";
mexErrMsgTxt(ss.str().c_str());
}
return code == MX_GPU_SUCCESS;
}
#define cublasErrchk(ans) { cublasAssert((ans), __FILE__, __LINE__); }
#define cudaErrchk(ans) { cudaAssert((ans), __FILE__, __LINE__); }
#define mxGPUErrchk(ans) { mexGPUAssert((ans), __FILE__, __LINE__); }
void mexFunction(int nlhs, mxArray *plhs[], /* Output variables */int nrhs,
const mxArray *prhs[]) /* Input variables */{
if (nrhs != 1) { /* end if not one function arguments */
mexErrMsgTxt("This function requires one input argument.");
return;
}
if (nlhs > 3) { /* take three outputs */
mexErrMsgTxt("This function takes a maximum of three output variables.");
return;
}
mxGPUErrchk(mxInitGPU());
const mxGPUArray* in1_gpu = mxGPUCreateFromMxArray(prhs[0]);
size_t ndims = mxGPUGetNumberOfDimensions(in1_gpu);
const size_t* dim = (const size_t*) mxGPUGetDimensions(in1_gpu);
if (ndims != 3) { /* end if input arguments are of different dimensions */
mexErrMsgTxt("The input argument must be a 3-dimensional array.");
return;
}
cublasHandle_t handle;
cublasErrchk(cublasCreate(&handle));
int no_matrices = dim[2];
int nrow = dim[0];
int ncol = dim[1];
int matrix_size = nrow * ncol;
size_t i;
std::stringstream ss;
ss << "dim[2] = " << dim[2] << "\nno_matrices = " << no_matrices << "\nnrow = " << nrow << "\nmatrix_size = " << nrow << " x " << ncol << " = " << matrix_size << std::endl;
mexPrintf(ss.str().c_str());
mxGPUArray* gpu_array_inout = mxGPUCopyFromMxArray(prhs[0]);
double* inout_storage = (double*) mxGPUGetData(gpu_array_inout);
size_t info_dimensions[1] = { no_matrices };
mxGPUArray* gpu_array_info = mxGPUCreateGPUArray(1, (mwSize*) info_dimensions, mxINT32_CLASS, mxREAL,
MX_GPU_INITIALIZE_VALUES);
int* out_info = (int*) mxGPUGetData(gpu_array_info);
mexPrintf("after defining gpu_array_info\n");
size_t pivot_dimensions[2] = { nrow, no_matrices };
mxGPUArray* gpu_array_pivot = mxGPUCreateGPUArray(2, (mwSize*) pivot_dimensions, mxINT32_CLASS, mxREAL,
MX_GPU_DO_NOT_INITIALIZE);
int* out_pivot = (int*) mxGPUGetData(gpu_array_pivot);
mexPrintf("after defining gpu_array_pivot\n");
double** inout_pointers_CPU = (double**) malloc(no_matrices * sizeof(double*));
for (i = 0; i < no_matrices; i++) {
inout_pointers_CPU[i] = (double*) ((char*) inout_storage + i * ((size_t) matrix_size) * sizeof(double));
}
double** inout_pointers_GPU;
cudaErrchk(cudaMalloc((void** )&inout_pointers_GPU, no_matrices * sizeof(double*)));
cudaErrchk(
cudaMemcpy(inout_pointers_GPU, inout_pointers_CPU, no_matrices * sizeof(double*), cudaMemcpyHostToDevice));
free(inout_pointers_CPU);
ss.clear();
ss << "check again before calling cublasDgetrfBatched:\nnrow = " << nrow << "\nno_matrices = " << no_matrices << std::endl;
mexPrintf(ss.str().c_str());
cublasErrchk(cublasDgetrfBatched(handle, nrow, inout_pointers_GPU, nrow, out_pivot, out_info, no_matrices));
cublasErrchk(cublasDestroy(handle));
cudaErrchk(cudaFree(inout_pointers_GPU));
if (mxIsGPUArray(prhs[0])) {
plhs[0] = mxGPUCreateMxArrayOnGPU(gpu_array_inout);
if (nlhs > 1) {
plhs[1] = mxGPUCreateMxArrayOnGPU(gpu_array_pivot);
if (nlhs > 2) {
plhs[2] = mxGPUCreateMxArrayOnGPU(gpu_array_info);
}
}
} else {
plhs[0] = mxGPUCreateMxArrayOnCPU(gpu_array_inout);
if (nlhs > 1) {
plhs[1] = mxGPUCreateMxArrayOnCPU(gpu_array_pivot);
if (nlhs > 2) {
plhs[2] = mxGPUCreateMxArrayOnCPU(gpu_array_info);
}
}
}
mxGPUDestroyGPUArray(gpu_array_inout);
mxGPUDestroyGPUArray(gpu_array_pivot);
mxGPUDestroyGPUArray(gpu_array_info);
mxFree((void*) dim);
return;
}
I compile as follows:
mex -L/usr/local/cuda/lib64 -lcudart -lcublas gpuBatchedLU.cu
And I call from MATLAB:
[a1,b1,c1]=gpuBatchedLU(randn(32,32,5)); %no problem
[a2,b2,c2]=gpuBatchedLU(randn(33,33,5)); %produces CUBLAS_STATUS_INVALID_VALUE
I use Matlab R2013b with the parallel toolbox, Cuda 5.5, and a NVS 5200M graphics chip.
Can anyone replicate this problem? I would appreciate any suggestions on how to solve this problem.
The problem seems to be with Matlab R2013b using libcublas.so in version 5.0. The file link is in /MATLAB/R2013b/bin/glnxa64/. Once I changed the link to the libcublas.so of my Cuda 5.5 installation it worked fine.

Resources