Related
Note: I have tagged this with both programming and windows networking tags, so please don't shout, I'm just trying to expose this to as many people as may be able to help!
I am trying to set the receive and send buffers for a small client and server I have written, so that when I perform a network capture, I see the window size I have set in the TCP handshake.
For the programmers, please consider the following very simple code for a client and server.
For the none-programmers, please skip past this section to my image.
Client:
#include <WinSock2.h>
#include <mstcpip.h>
#include <Ws2tcpip.h>
#include <thread>
#include <iostream>
using namespace std;
int OutputWindowSize(SOCKET s, unsigned int nType)
{
int buflen = 0;
int nSize = sizeof(buflen);
if (getsockopt(s, SOL_SOCKET, nType, (char *)&buflen, &nSize) == 0)
return buflen;
return -1;
}
bool SetWindowSizeVal(SOCKET s, unsigned int nSize)
{
if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&nSize, sizeof(nSize)) == 0)
if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&nSize, sizeof(nSize)) == 0)
return true;
return false;
}
int main(int argc, char** argv)
{
if (argc != 3) { cout << "not enough args!\n"; return 0; }
const char* pszHost = argv[1];
const int nPort = atoi(argv[2]);
WSADATA wsaData;
DWORD Ret = 0;
if ((Ret = WSAStartup((2, 2), &wsaData)) != 0)
{
printf("WSAStartup() failed with error %d\n", Ret);
return 1;
}
struct sockaddr_in sockaddr_IPv4;
memset(&sockaddr_IPv4, 0, sizeof(struct sockaddr_in));
sockaddr_IPv4.sin_family = AF_INET;
sockaddr_IPv4.sin_port = htons(nPort);
if (!InetPtonA(AF_INET, pszHost, &sockaddr_IPv4.sin_addr)) { return 0; }
SOCKET clientSock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); // Create active socket: one which is passed to connect().
if (!SetWindowSizeVal(clientSock, 12345))
{
cout << "Failed to set window size " << endl;
return -1;
}
cout << "Set window size on client socket as: RECV" << OutputWindowSize(clientSock, SO_RCVBUF) <<
" SEND: " << OutputWindowSize(clientSock, SO_SNDBUF) << endl;
int nRet = connect(clientSock, (sockaddr*)&sockaddr_IPv4, sizeof(sockaddr_in));
if (nRet != 0) { return 0; }
char buf[100] = { 0 };
nRet = recv(clientSock, buf, 100, 0);
cout << "Received " << buf << " from the server!" << endl;
nRet = send(clientSock, "Hello from the client!\n", strlen("Hello from the client!\n"), 0);
closesocket(clientSock);
return 0;
}
Server:
#include <WinSock2.h>
#include <mstcpip.h>
#include <Ws2tcpip.h>
#include <iostream>
using namespace std;
int OutputWindowSize(SOCKET s, unsigned int nType)
{
int buflen = 0;
int nSize = sizeof(buflen);
if (getsockopt(s, SOL_SOCKET, nType, (char *)&buflen, &nSize) == 0)
return buflen;
return -1;
}
bool SetWindowSizeVal(SOCKET s, unsigned int nSize)
{
if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&nSize, sizeof(nSize)) == 0)
if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&nSize, sizeof(nSize)) == 0)
return true;
return false;
}
int main()
{
WSADATA wsaData;
DWORD Ret = 0;
if ((Ret = WSAStartup((2, 2), &wsaData)) != 0)
{
printf("WSAStartup() failed with error %d\n", Ret);
return 1;
}
struct sockaddr_in sockaddr_IPv4;
memset(&sockaddr_IPv4, 0, sizeof(struct sockaddr_in));
sockaddr_IPv4.sin_family = AF_INET;
sockaddr_IPv4.sin_port = htons(19982);
int y = InetPton(AF_INET, L"127.0.0.1", &sockaddr_IPv4.sin_addr);
if (y != 1) return 0;
socklen_t addrlen = sizeof(sockaddr_IPv4);
SOCKET sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (!SetWindowSizeVal(sock, 12345))
{
cout << "Failed to set window size " << endl;
return -1;
}
cout << "Set window size on listen socket as: RECV" << OutputWindowSize(sock, SO_RCVBUF) <<
" SEND: " << OutputWindowSize(sock, SO_SNDBUF) << endl;
if (bind(sock, (sockaddr*)&sockaddr_IPv4, sizeof(sockaddr_IPv4)) != 0) { /* error */ }
if (listen(sock, SOMAXCONN) != 0) { return 0; }
while (1)
{
SOCKET sockAccept = accept(sock, (struct sockaddr *) &sockaddr_IPv4, &addrlen);
if (!SetWindowSizeVal(sockAccept, 12345))
{
cout << "Failed to set window size " << endl;
return -1;
}
cout << "Set window size as on accepted socket as: RECV" << OutputWindowSize(sock, SO_RCVBUF) <<
" SEND: " << OutputWindowSize(sock, SO_SNDBUF) << endl;
if (sockAccept == -1) return 0;
int nRet = send(sockAccept, "Hello from the server!\n", strlen("Hello from the server!\n"), 0);
if (!nRet) return 0;
char buf[100] = { 0 };
nRet = recv(sockAccept, buf, 100, 0);
cout << "Received " << buf << " from the client!" << endl;
if (nRet == 0) { cout << "client disonnected!" << endl; }
closesocket(sockAccept);
}
return 0;
}
The output from my program states that the window sizes have been set succesfully:
Set window size on listen socket as: RECV12345 SEND: 12345
Set window size as on accepted socket as: RECV12345 SEND: 12345
for the server, and for the client:
Set window size on listen socket as: RECV12345 SEND: 12345
However, when I capture the traffic using RawCap, I see that the client window size is set fine, but server's window size is not what I set it to be, it is 8192:
Now, I have read this MS link and it says to add a registry value; I did this, adding the value 0x00001234, but it still made no difference.
The interesting thing is, the same code works fine on a Windows 10 machine, which makes me think it is Windows 7 specific. However, I'm not 100% sure on my code, there might be some errors in it.
Can anyone suggest how I can get Windows to honour my requested parameters please?
These are not 'window sizes'. They are send and receive buffer sizes.
There is no such thing as 'output window size'. There is a receive window and a congestion window, and the latter is not relevant to your question.
The send buffer size has exactly nothing to do with the receive window size, and the receive buffer size only determines the maximum receive window size.
The actual receive window size is adjusted dynamically by the protocol. It is the actual size that you are seeing in Wireshark.
The platform is entitled by the specification to adjust the supplied values for the send and receive buffers up or down, and the documentation advises you to get the corresponding values if you want to be sure what they really are.
There is no problem here to solve.
NB You don't have to set the receive window size on an accepted socket if you already set it on the listening socket. It is inherited.
I'm trying to get the original destination of UDP packet using IP_ORIGDSTADDR. On older kernel that mine, it seems to work (I'm running the currently debian testing kernel, 4.7.0-1-amd64).
#define _DEFAULT_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#define DEFAULT_ADDR "127.0.0.1"
#define DEFAULT_PORT 6666
int main(int ac, char **av)
{
int sock;
struct sockaddr_in sin;
memset(&sin, 0, sizeof sin);
sin.sin_family = AF_INET;
if (inet_aton(ac >= 2 ? av[1] : DEFAULT_ADDR, &sin.sin_addr) < 0) {
fprintf(stderr, "Invalid address\n");
goto err;
}
sin.sin_port = htons(ac >= 3 ? atoi(av[2]) : DEFAULT_PORT);
sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0) {
perror("socket");
goto err;
}
if (bind(sock, (struct sockaddr *) &sin, sizeof sin) < 0) {
perror("bind");
goto close_err;
}
#define SOCK_OPT(l, n, v) do { \
int _v = v; \
socklen_t _s; \
if (setsockopt(sock, l, n, &_v, sizeof _v) < 0) { \
perror("setsockopt "# l "/" # n); \
goto close_err; \
} \
\
_s = sizeof (_v); \
if (getsockopt(sock, l, n, &_v, &_s) < 0) { \
perror("getsockopt "# l "/" # n); \
goto close_err; \
} \
\
if (_v != v) { \
fprintf(stderr, "Unexpected sockopt (expected %d, found %d)\n", v, _v); \
goto close_err; \
} \
\
printf(#l "/" #n " is set to %d\n", _v); \
\
} while (0)
SOCK_OPT(SOL_IP, IP_RECVORIGDSTADDR, 1);
SOCK_OPT(SOL_IP, IP_RECVOPTS, 1);
SOCK_OPT(SOL_IP, IP_PKTINFO, 1);
#undef SOCK_OPT
printf("Reading on %s:%d\n", inet_ntoa(sin.sin_addr), htons(sin.sin_port));
for (;;) {
ssize_t n;
char buf[1024];
char tmp[80];
struct iovec iovec[] = {
{
.iov_base = buf,
.iov_len = sizeof buf - 1,
}
};
struct msghdr msghdr;
struct cmsghdr *cmsg_ptr;
struct sockaddr_storage from = { 0 };
int port;
union cmsg_data {
struct sockaddr_in sin;
struct in_pktinfo pktinfo;
};
char msg_control[CMSG_SPACE(sizeof(union cmsg_data)) * 10] = { 0 };
int found;
memset(&msghdr, 0, sizeof msghdr);
msghdr.msg_name = &from;
msghdr.msg_namelen = sizeof from;
msghdr.msg_iov = iovec;
msghdr.msg_iovlen = sizeof iovec / sizeof iovec[0];
msghdr.msg_control = msg_control;
msghdr.msg_controllen = sizeof msg_control;
msghdr.msg_flags = MSG_EOR | MSG_TRUNC | MSG_CTRUNC | MSG_OOB | MSG_ERRQUEUE;
n = recvmsg(sock, &msghdr, MSG_OOB);
if (n < 0) {
perror("recvmsg");
continue;
}
if (buf[n - 1] == '\n')
n--;
buf[n] = 0;
switch (from.ss_family) {
default:
tmp[0] = 0;
break;
case AF_INET:
inet_ntop(AF_INET, &((struct sockaddr_in *) &from)->sin_addr, tmp, sizeof tmp);
port = htons(((struct sockaddr_in *) &from)->sin_port);
break;
case AF_INET6:
inet_ntop(AF_INET6, &((struct sockaddr_in6 *) &from)->sin6_addr, tmp, sizeof tmp);
port = htons(((struct sockaddr_in6 *) &from)->sin6_port);
break;
}
printf("%s:%d Rx %ldb: %.*s, msg_control = %zdb\n", tmp, port, n, (int) n, buf, sizeof msg_control);
found = 0;
for (cmsg_ptr = CMSG_FIRSTHDR(&msghdr); cmsg_ptr != NULL; cmsg_ptr = CMSG_NXTHDR(&msghdr, cmsg_ptr)) {
union cmsg_data *cmsg_data = (union cmsg_data *) CMSG_DATA(cmsg_ptr);
switch (cmsg_ptr->cmsg_level) {
default:
fprintf(stderr, "Unexecpted level : %d\n", cmsg_ptr->cmsg_level);
break;
case SOL_IP:
switch (cmsg_ptr->cmsg_type) {
default:
fprintf(stderr, "Unexecpted type : %d\n", cmsg_ptr->cmsg_type);
break;
case IP_ORIGDSTADDR:
printf("IP_ORIGDSTADDR: sin_addr = %s, sin_port = %d\n", inet_ntoa(cmsg_data->sin.sin_addr), htons(cmsg_data->sin.sin_port));
found++;
break;
case IP_PKTINFO:
snprintf(tmp, sizeof tmp, "%s", inet_ntoa(cmsg_data->pktinfo.ipi_spec_dst));
printf("IP_PKTINFO: ifindex = %u, spec_dst = %s, addr = %s\n", cmsg_data->pktinfo.ipi_ifindex, tmp, inet_ntoa(cmsg_data->pktinfo.ipi_addr));
break;
}
}
}
if (found != 1)
fprintf(stderr, "*** Warning: No SOL_IP / IP_ORIGDSTADDR found\n");
}
close_err:
close(sock);
err:
return 1;
}
When trying this bunch of code (eg. sending packets using netcat), I dont have any IP_ORIGDSTADDR, but only IP_PKTINFO : I need to have UDP port, only IP_ORIGDSTADDR can provide it.
Does anyone have met this strange behaviour ?
I've a question that could be naive for the experienced users, but I'm kinda stuck. I'm trying to get ApplGrid code working (https://applgrid.hepforge.org/index.htm), so I wish to run the provided examples.
As far as I've understood, the dependencies are ROOT (root.cern.ch) and LHAPDF libraries (lhapdf.hepforge.org/index.html), which work well on my machine.
The aforementioned examples were build to work with an older version of LHAPDF, so I had to change the name of an invoked method.
When I try to make, I get an error regarding the undefined reference of that function, but if I isolate that instruction and execute with
g++ extract.cxx `lhapdf-config --cflags --ldflags`
it works like a charm.
The output of the config command is:
-I/home/matteo/software/gcc-4.7/lhapdf/install/include -I/home/matteo/software/gcc-4.7/boost/install/include -L/home/matteo/software/gcc-4.7/lhapdf/install/lib -lLHAPDF
If I remove the "LHAPDF-related commands" from the original cxx file I can run "make" without any problem.
As a final test, I've tried to isolate the "applgrid related code" and try to compile with:
g++ extract_2.cxx `applgrid-config --cxxflags --ldfflags`
but I get an undefined reference error.
The output of the config command is:
-I/home/matteo/software/gcc-4.7/applgrid/install/include -pthread -m64 -I/home/matteo/software/gcc-4.7/root/install/include -L/home/matteo/software/gcc-4.7/applgrid/install/lib -lfAPPLgrid -lAPPLgrid -m64 -L/home/matteo/software/gcc-4.7/root/install/lib -lCore -lCint -lRIO -lNet -lHist -lGraf -lGraf3d -lGpad -lTree -lRint -lPostscript -lMatrix -lPhysics -lMathCore -lThread -pthread -lm -ldl -rdynamic -L/home/matteo/software/gcc-4.7/hoppet/install/lib -lhoppet_v1
the content of the makefile is the following:
CXX = g++
F77 = gfortran
FFLAGS += -O3 -fPIC
CXXFLAGS += -O3 -fPIC
# root
ROOTINCS = $(shell root-config --cflags)
ROOTLIBS = $(shell root-config --glibs)
ROOTARCH = $(findstring -m64, $(ROOTINCS) )
#LHAPDF
LHAPDFINCS = $(shell lhapdf-config --cflags)
LHAPDFDIR = $(shell lhapdf-config --libdir)
LHAPDFLIBS = $(shell lhapdf-config --libs)
# applgrid
APPLCXXFLAGS = $(shell applgrid-config --cxxflags)
APPLCLIBS = $(shell applgrid-config --ldcflags)
APPLFLIBS = $(shell applgrid-config --ldflags)
# hoppet
HOPPETLIBS = $(shell hoppet-config --libs)
# get the fotran runtime library for linking fortran
FRTLLIB = $(shell gfortran $(CXXFLAGS) -print-file-name=libgfortran.a)
FRTLIB = -L$(subst /libgfortran.a, ,$(FRTLLIB) ) -lgfortran
# now set up the compile and link flags and libs
CXXFLAGS += $(ROOTARCH) $(ROOTINCS) $(APPLCXXFLAGS) $(LHAPDFINCS)
LDFLAGS += $(ROOTARCH)
FFLAGS += $(ROOTARCH)
CLIBS += $(ROOTLIBS) $(LHAPDFLIBS) $(HOPPETLIBS) $(APPLCLIBS)
FLIBS += $(ROOTLIBS) $(LHAPDFLIBS) $(HOPPETLIBS) $(APPLFLIBS) $(APPLCLIBS) $(FRTLIB)
install : all
all : stand fnlo fstand getdata
stand: stand.o
$(CXX) $(LDFLAGS) -o $# $< $(CLIBS)
fstand: fstand.o fmain.o
$(CXX) $(LDFLAGS) -o $# fstand.o fmain.o $(FLIBS)
% : %.o
$(CXX) $(LDFLAGS) -o $# $< $(CLIBS)
fnlo: fnmain.o
$(CXX) $(LDFLAGS) -o $# $< $(CLIBS)
.SUFFIXES : .cxx .o .f .c
.f.o :
$(F77) $(FFLAGS) -c $<
.cxx.o:
$(CXX) $(CXXFLAGS) -c $<
#########################################################
# just type make getdata and it will download all
# the required data for these examples
#########################################################
.PRECIOUS : fnlodata appldata
getdata : getfnlodata getappldata
getfnlodata: fnl0004.tab
fnl0004.tab:
# echo "\n************************\ndownloading fastnlo data\n************************\n"
(curl http://fastnlo.hepforge.org/code/src/fnl0004.tab.gz | gunzip > fnl0004.tab )
getappldata: atlas-incljets-arxiv-1009.5908v2
atlas-incljets-arxiv-1009.5908v2:
# echo "\n*************************************\n downloading atlas inclusive jet data\n***********************************\n"
(curl http://www.hepforge.org/archive/applgrid/atlas-incljets-arxiv-1009.5908v2.tgz | tar -xzf - )
clean:
rm -rf ./.libs ./.obj *.lo *.o *.la stand fnlo fstand *~
while the other files in the directory are the following:
fmain.cxx
extern "C" void fstand_();
int main() {
fstand_();
return 0
fnmain.cxx
#include <iostream>
#include <stdio.h>
#include "appl_grid/fastnlo.h"
#include "TFile.h"
#include "TH1D.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
if ( argc<2 ) {
std::cerr << "usage: fnlo fastnlogrid.tab" << std::endl;
return -1;
}
std::cout << "set up lhapdf..." << std::endl;
const std::string _pdfname = "cteq6mE.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// don't need to hard code, can use a runtime parameter...
std::string gridname = argv[1];
// std::string gridname = "fnt1007midp.tab";
// std::string gridname = "fnt1008midp.tab";
// std::string gridname = "fnt2004.tab";
// std::string gridname = "fnl2004.tab";
fastnlo f( gridname );
std::vector<appl::grid*> g = f.grids();
// g.push_back( new appl::grid("atlas-incljets04-eta1.root") );
/// histograms and file for seeing the results
std::vector<TH1D*> hc(g.size());
std::string foutname = "appl.root";
TFile fout( foutname.c_str(),"recreate");
for ( int i=0 ; i<g.size() ; i++ ) {
// trim the grids (not actually needed, done in
// the fastnlo constructor
g[i]->trim();
char hname[64];
sprintf(hname, "hist%02d", i);
/// optionally print out the grid documentation
std::cout << "\n" << g[i]->getDocumentation() << std::endl;
/// perform the convolution
hc[i] = g[i]->convolute( evolvepdf_, alphaspdf_ );
hc[i]->SetName(hname);
hc[i]->SetDirectory(&fout);
hc[i]->Write();
/// print out the results
for ( int j=1 ; j<=hc[i]->GetNbinsX() ; j++ ) {
std::cout << "xsec(" << j-1 << ")=" << hc[i]->GetBinContent(j) << std::endl;
}
}
std::cout << "writing file " << foutname << std::endl;
fout.Close();
return 0;
}
}
fstand.f
C----------------------------------------------------------
C dummy routines that call the pdf and alphas routines
C this example, they just call the lhapdf routine
C for apdf fit, call your own routines
C----------------------------------------------------------
double precision function fnalphas(Q)
double precision Q
double precision alphaspdf
fnalphas = alphaspdf(Q)
return
end
subroutine fnpdf(x, Q, xf)
double precision x, Q
double precision xf(13)
call evolvePDF(x, Q, xf)
return
end
C----------------------------------------------------------
stand.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TPad.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets04-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
// get name of grid from user and create from grid file
appl::grid g(gridname);
g.trim(); // trim away uneeded memory
/// print the grid documentation
std::cout << g.getDocumentation() << std::endl;
// initialise lhapdf
std::cout << "setting up lhapdf" << std::endl;
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
// do the convolution into a vector
std::cout << "doing standalone convolution" << std::endl;
// struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
// double atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
for ( int i=0 ; i<xsec.size() ; i++ ) std::cout << "xsec[" << i << "]\t= " << xsec[i] << std::endl;
// do the convolution into a TH1D
// atimer = appl_timer_start();
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
// atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
hxsec->SetLineColor(kRed);
hxsec->SetMarkerColor(kRed);
hxsec->SetMarkerStyle(20);
hxsec->DrawCopy();
gPad->SetLogy(true);
gPad->Print("xsec.pdf");
return 0;
}
stand-full.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TPad.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets04-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
// get name of grid from user and create from grid file
appl::grid g(gridname);
g.trim(); // trim away uneeded memory
std::cout << "setting up lhapdf" << std::endl;
// initialise lhapdf
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
// do the convolution into a vector
std::cout << "doing standalone convolution" << std::endl;
std::cout << g.getDocumentation() << std::endl;
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop(atimer);
const int Nsubproc = g.subProcesses();
std::cout << "time : " << atime << " ms\tNsubproc : " << Nsubproc << std::endl;
std::vector<std::vector<double> > xsb(Nsubproc);
for ( int i=0 ; i<Nsubproc ; i++ ) {
xsb[i] = g.vconvolute( i, evolvepdf_, alphaspdf_ );
}
// print results
for ( int i=0 ; i<xsec.size() ; i++ ) {
std::cout << "xsec(" << i << ")=" << xsec[i] << std::endl;
}
for ( int i=0 ; i<Nsubproc ; i++ ) {
std::cout << "subproc: " << i << std::endl;
for ( int j=0 ; j<xsec.size() ; j++ ) {
std::cout << "xsec(" << i << ")=" << xsb[i][j] << std::endl;
}
}
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
std::vector<TH1D*> hxsb(Nsubproc);
for ( int i=0 ; i<Nsubproc ; i++ ) {
hxsb[i] = g.convolute( i, evolvepdf_, alphaspdf_ );
}
hxsec->SetLineColor(kRed);
hxsec->SetMarkerColor(kRed);
hxsec->SetMarkerStyle(20);
hxsec->SetMinimum(0.000001);
hxsec->DrawCopy();
gPad->SetLogy(true);
for ( int i=0 ; i<Nsubproc ; i++ ) {
hxsb[i]->SetLineColor(kRed+i%5);
hxsb[i]->SetMarkerColor(kRed+i%5);
hxsb[i]->SetLineStyle(1+i%5);
hxsb[i]->SetMarkerStyle(20+i%5);
hxsb[i]->DrawCopy("same");
}
gPad->Print("xsec.pdf");
double escale = 7e3/8e3;
TH1D* hxsec8 = g.convolute( escale, evolvepdf_, alphaspdf_ );
escale = 7e3/2.76e3;
TH1D* hxsec276 = g.convolute( escale, evolvepdf_, alphaspdf_ );
hxsec->DrawCopy();
hxsec8->SetLineColor(kRed+1);
hxsec8->SetLineStyle(2);
hxsec8->DrawCopy("same");
gPad->Print("xsec8.pdf");
hxsec276->SetLineColor(kRed+2);
hxsec276->SetLineStyle(3);
hxsec276->DrawCopy("same");
gPad->Print("xsec276.pdf");
// or get into a histogram
// TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
// hxsec->SetName("xsec");
return 0;
}
stand-save.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TPad.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& , const double& , double* );
extern "C" double alphaspdf_(const double& Q);
int main(int argc, char** argv) {
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets04-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
// get name of grid from user and create from grid file
appl::grid g(gridname);
g.trim(); // trim away uneeded memory
/// print the grid documentation
std::cout << g.getDocumentation() << std::endl;
// initialise lhapdf
std::cout << "setting up lhapdf" << std::endl;
// const string _pdfname = "cteq6mE.LHgrid";
const string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
// do the convolution into a vector
std::cout << "doing standalone convolution" << std::endl;
// struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
// double atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
for ( int i=0 ; i<xsec.size() ; i++ ) std::cout << "xsec[" << i << "]\t= " << xsec[i] << std::endl;
// do the convolution into a TH1D
// atimer = appl_timer_start();
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
// atime = appl_timer_stop(atimer);
// std::cout << "time " << atime << " ms" << std::endl;
hxsec->SetLineColor(kRed);
hxsec->SetMarkerColor(kRed);
hxsec->SetMarkerStyle(20);
hxsec->DrawCopy();
gPad->SetLogy(true);
gPad->Print("xsec.pdf");
return 0;
}
stand-tutorial.cxx
#include <iostream>
#include <string>
#include <vector>
#include "appl_grid/appl_grid.h"
#include "appl_grid/appl_timer.h"
#include "TH1D.h"
#include "TLatex.h"
#include "TFile.h"
#include "TH2D.h"
#include "TStyle.h"
#include "TCanvas.h"
#include "TPad.h"
#include "TStyle.h"
#include "TColor.h"
#include "TPaveStats.h"
// lhapdf routines
#include "LHAPDF/LHAPDF.h"
extern "C" void evolvepdf_(const double& x, const double& Q, double* xf);
extern "C" double alphaspdf_(const double& Q);
void setpalette()
{
const Int_t NRGBs = 7;
const Int_t NCont = 98;
Double_t stops[NRGBs] = { 0.00, 0.20, 0.40, 0.60, 0.70, 0.90, 1.00 };
Double_t red[NRGBs] = { 0.00, 0.00, 0.00, 0.60, 0.90, 1.00, 0.50 };
Double_t green[NRGBs] = { 0.00, 0.00, 0.40, 0.95, 1.00, 0.20, 0.00 };
Double_t blue[NRGBs] = { 0.00, 1.00, 1.00, 0.10, 0.00, 0.00, 0.00 };
TColor::CreateGradientColorTable(NRGBs, stops, red, green, blue, NCont);
gStyle->SetNumberContours(NCont);
}
TH2D* smooth(TH2D* h) {
int Nnbins = 10;
TH2D* hi = new TH2D("h2","",
h->GetXaxis()->GetNbins()*Nnbins,
h->GetXaxis()->GetBinLowEdge(1),
h->GetXaxis()->GetBinLowEdge(h->GetXaxis()->GetNbins()+1),
h->GetYaxis()->GetNbins()*Nnbins,
h->GetYaxis()->GetBinLowEdge(1),
h->GetYaxis()->GetBinLowEdge(h->GetYaxis()->GetNbins()+1) );
hi->GetXaxis()->SetTitle( h->GetXaxis()->GetTitle() );
hi->GetYaxis()->SetTitle( h->GetYaxis()->GetTitle() );
hi->GetYaxis()->SetTitleOffset(1.1);
for ( int i=1 ; i<=h->GetXaxis()->GetNbins() ; i++ ) {
for ( int j=1 ; j<=h->GetYaxis()->GetNbins() ; j++ ) {
double z1 = h->GetBinContent(i,j);
double z2 = h->GetBinContent(i+1,j);
double z3 = h->GetBinContent(i,j+1);
double z4 = h->GetBinContent(i+1,j+1);
if ( z2==0 ) z2 = z1;
if ( z3==0 ) z3 = z1;
if ( z4==0 ) z4 = z3;
for ( int ii=0 ; ii<Nnbins ; ii++ ) {
double fx = 1.0*ii/Nnbins;
for ( int jj=0 ; jj<Nnbins ; jj++ ) {
double fy = 1.0*jj/Nnbins;
double z = ( (z4-z3-z2+z1)*fx + z3 - z1 )*fy + (z2-z1)*fx + z1;
// std::cout << (i-1)*Nnbins+ii+1 << " " << (j-1)*Nnbins+jj+1 << "\t z : " << z << "\tz1-4: " << z1 << " " << z2 << " " << z3 << " " << z4 << std::endl;
hi->SetBinContent( (i-1)*Nnbins+ii+1, (j-1)*Nnbins+jj+1, z);
// hi->SetBinContent( (i-1)*Nnbins+ii+1, (j-1)*Nnbins+jj+1, h->GetBinContent(i,j) );
}
}
}
}
return hi;
}
int main(int argc, char** argv) {
gStyle->SetOptStat(0);
setpalette();
// use a default atlas inclusive grid
std::string gridname = "atlas-incljets-arxiv-1009.5908v2/r04/atlas-incljets-eta1.root";
if ( argc>1 ) gridname = argv[1];
std::cout << "reading grid " << gridname << std::endl;
appl::grid g(gridname);
// g.trim(); // trim away uneeded memory
/// print the grid documentation
std::cout << g.getDocumentation() << std::endl;
// initialise lhapdf
std::cout << "setting up lhapdf" << std::endl;
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
// initpdfset_(_pdfname.c_str());
// initpdf_( iset );
/// ----------------- example 1 --------------------
if ( true ) {
// do the convolution into a vector
std::cout << "example 1: standalone convolution" << std::endl;
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop(atimer);
std::cout << "time : " << atime << " ms" << std::endl;
for ( int i=0 ; i<xsec.size() ; i++ ) std::cout << "xsec[" << i << "]\t= " << xsec[i] << std::endl;
TH1D* hxsec = g.convolute( evolvepdf_, alphaspdf_ );
hxsec->SetTitle("");
hxsec->DrawCopy();
gPad->SetLogy(true);
gPad->Print("example-1.pdf");
}
/// ----------------- example 2 (corrections) --------------------
if ( false ) {
std::cout << "example 2: multiplicative corrections" << std::endl;
std::cout << g.getApplyCorrections() << std::endl;
g.setApplyCorrections(false);
TH1D* huncor = g.convolute( evolvepdf_, alphaspdf_ );
g.setApplyCorrections(true);
TH1D* hcor = g.convolute( evolvepdf_, alphaspdf_ );
huncor->SetLineStyle(2);
huncor->SetTitle("");
huncor->DrawCopy();
hcor->DrawCopy("same");
gPad->Print("example-2.pdf");
}
/// ----------------- example 3 (cms scaling) --------------------
if ( false ) {
std::cout << "example 3: centre-of-mass energy scaling " << std::endl;
TH1D* h7 = g.convolute( evolvepdf_, alphaspdf_ );
double Escale = 7/2.46;
TH1D* h246 = g.convolute( Escale, evolvepdf_, alphaspdf_ );
std::cout << "cms scale " << g.getCMSScale() << std::endl;
h7->SetTitle("");
h7->SetMinimum(0.001);
gStyle->SetOptStat(0);
h7->DrawCopy();
h246->SetLineColor(kRed);
h246->DrawCopy("same");
gPad->SetLogy(true);
gPad->Print("example-3.pdf");
}
/// ------------------ example 4 (scale variation) -----------------
if ( false ) {
std::cout << "example 4: scale variation " << std::endl;
int nloops = 1; // loop order
int Nbins = 30;
TH2D* hscale = new TH2D("scale", "", Nbins, 0.45, 2.05, Nbins, 0.45, 2.05 );
struct timeval atimer = appl_timer_start();
for ( int i=1 ; i<=hscale->GetNbinsX() ; i++ ) {
double fscale = hscale->GetXaxis()->GetBinCenter(i);
for ( int j=1 ; j<=hscale->GetNbinsY() ; j++ ) {
double rscale = hscale->GetYaxis()->GetBinCenter(j);
std::vector<double> xs = g.vconvolute( evolvepdf_, alphaspdf_, nloops, rscale, fscale );
double total = 0;
for ( int k=xs.size() ; k-- ; ) total += xs[k];
hscale->Fill( fscale, rscale, total );
}
}
double atime = appl_timer_stop(atimer);
std::cout << "time " << atime << " ms" << std::endl;
/// plot ....
gStyle->SetPalette(52);
gStyle->SetPadRightMargin(0.17);
TCanvas* tc = new TCanvas( "","", 650, 600 );
hscale->GetXaxis()->SetTitle("factorisation scale factor");
hscale->GetYaxis()->SetTitle("renormalisation scale factor");
hscale->GetYaxis()->SetTitleOffset(1.1);
TH2D* _hscale = smooth(hscale);
_hscale->DrawCopy("colz");
gPad->Print("example-4.png");
TFile fs("scale.root","recreate");
fs.cd();
hscale->Write();
fs.Write();
fs.Close();
}
/// ------------------ example 5 (subprocesses) -----------------
if ( false ) {
std::cout << "example 4: subprocess contributions " << std::endl;
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_);
std::vector<TH1D*> xsec_sub(g.subProcesses());
for ( int i=0 ; i<g.subProcesses() ; i++ ) {
xsec_sub[i] = g.convolute_subproc( i, evolvepdf_, alphaspdf_);
for ( int j=0 ; j<xsec_sub[i]->GetNbinsX() ; j++ ) {
xsec_sub[i]->SetBinContent( j+1, xsec_sub[i]->GetBinContent(j+1)/xsec[j] );
}
}
/// plot ...
int col[7] = { kRed, kBlue, kMagenta, kCyan, kGreen, kBlack, kViolet };
xsec_sub[0]->SetMaximum(0.6);
xsec_sub[0]->SetMinimum(0);
xsec_sub[0]->GetXaxis()->SetTitle("P_{T} [GeV]");
xsec_sub[0]->GetYaxis()->SetTitle("subprocess fraction");
xsec_sub[0]->SetTitle("");
xsec_sub[0]->DrawCopy();
for ( int i=0 ; i<g.subProcesses() ; i++ ) {
xsec_sub[i]->SetLineColor(col[i]);
xsec_sub[i]->DrawCopy("same");
TLatex* tl = new TLatex(0,0,"");
tl->SetTextColor(col[i]);
tl->SetTextSize(0.04);
char label[64];
sprintf(label, "subprocess %d", i);
tl->DrawLatex(430, 0.55-0.035*i, label );
}
gPad->SetLogy(false);
gPad->Print("example-5.pdf");
}
/// -------------------- example 6 (pdf sets) -----------------------
if ( false ) {
// const std::string _pdfname = "cteq6mE.LHgrid";
const std::string _pdfname = "CT10.LHgrid";
int iset = 0;
LHAPDF::initPDFSet( _pdfname, iset );
std::vector<double> _xs = g.vconvolute( evolvepdf_, alphaspdf_ );
int Nset = LHAPDF::numberPDF();
std::vector<TH1D*> xs(Nset);
for ( int i=0 ; i<Nset ; i++ ) {
struct timeval atimer = appl_timer_start();
LHAPDF::initPDFSet( _pdfname, i );
double atime = appl_timer_stop(atimer);
std::cout << "set " << i << "\tinitPDF time " << atime << " ms" << std::endl;
atimer = appl_timer_start();
xs[i] = g.convolute( evolvepdf_, alphaspdf_ );
atime = appl_timer_stop(atimer);
std::cout << "convolution time " << atime << " ms" << std::endl;
for ( int j=_xs.size() ; j-- ; ) xs[i]->SetBinContent( j+1, xs[i]->GetBinContent(j+1)/_xs[j]);
}
xs[0]->SetMaximum(1.1);
xs[0]->SetMinimum(0.9);
gPad->SetLogy(false);
gPad->SetLogx(true);
xs[0]->DrawCopy("l");
for ( int i=0 ; i<Nset ; i++ ) xs[i]->DrawCopy("lsame");
gPad->Print("example-6.pdf");
gPad->SetLogx(false);
}
/// ------------------- example 7 (timers) ---------------------
if ( false ) {
gStyle->SetOptStat(2220);
std::cout << "example 7 : timings" << std::endl;
TH1D* htrim = new TH1D("htrim", "", 100, 38, 45);
// TH1D* htrim2 = new TH1D("htrim2", "", 100, 37, 45);
TH1D* huntrim = new TH1D("huntrim", "", 100, 38, 45);
g.trim();
std::cout << "grid size : " << g.size() << std::endl;
for ( int i=400 ; i-- ; ) {
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop( atimer );
htrim->Fill(atime);
}
g.untrim();
std::cout << "grid size : " << g.size() << std::endl;
for ( int i=400 ; i-- ; ) {
struct timeval atimer = appl_timer_start();
std::vector<double> xsec = g.vconvolute( evolvepdf_, alphaspdf_ );
double atime = appl_timer_stop( atimer );
huntrim->Fill(atime);
}
if ( huntrim->GetMaximum()<htrim->GetMaximum() ) huntrim->SetMaximum(1.1*htrim->GetMaximum());
else htrim->SetMaximum(1.1*huntrim->GetMaximum());
huntrim->SetLineStyle(2);
huntrim->SetLineColor(kRed);
htrim->GetXaxis()->SetTitle("time [ms]");
huntrim->GetXaxis()->SetTitle("time [ms]");
huntrim->Draw();
htrim->Draw("same");
// htrim2->DrawCopy("same");
huntrim->Draw("same");
htrim->Draw("sames+");
huntrim->Draw("sames+");
gPad->Update();
TPaveStats* tp1 = (TPaveStats*)htrim->GetListOfFunctions()->FindObject("stats");
TPaveStats* tp2 = (TPaveStats*)huntrim->GetListOfFunctions()->FindObject("stats");
std::cout << tp1 << " " << tp2 << std::endl;
tp2->SetTextColor(kRed);
tp1->SetY1NDC(0.93-0.15);
tp1->SetY2NDC(0.93);
tp2->SetY1NDC(0.93-0.16-0.15);
tp2->SetY2NDC(0.93-0.16);
tp1->Draw();
tp2->Draw();
gPad->Update();
gPad->Print("example-7.pdf");
}
return 0;
}
any help will be greatly appreciated, thank you.
Working on WinXP SP3.
Visual Studio 2005.
Trying to read memory of another process.
std::cout<<"Reading Process Memory\n";
const DWORD pid = 3476;
HANDLE handle = OpenProcess(PROCESS_VM_READ,FALSE,pid);
if(handle == NULL) {std::cout<<"Failed to open process\n";return 0;}
char* buffer1 = new char[256];
char* buffer2 = new char[256];
memset(buffer1,0,256*sizeof(char));
memset(buffer2,0,256*sizeof(char));
DWORD nbr = 0;
int address = 0x400000;
BOOL result = ReadProcessMemory(handle,&address,buffer1,32,&nbr);
if(result!=1) std::cout<<"Failed to read memory\n";
address = 0x400000+0x1000;
result = ReadProcessMemory(handle,&address,buffer2,32,&nbr);
if(result!=1) std::cout<<"Failed to read memory\n";
int i = 0;
while(i++<10)
{
if(buffer1[i]!=buffer2[i]) {std::cout<<"Buffers are different\n";break;}
}
delete[] buffer1;
delete[] buffer2;
CloseHandle(handle);
std::cin>>i;
return 0;
The problem is that both buffers are getting the same values. ReadProcMemory returns 1 and number of bytes read is the same as requested.
Your calls to ReadProcessMemory are incorrect. You should be using address directly, not &address. You may need to cast it to a const void *.
result = ReadProcessMemory(handle, reinterpret_cast<const void *>(address), buffer, 32, &nbr);
And you probably should declaring address as a type large enough to handle a pointer, like std::ssize_t or INT_PTR.
INT_PTR address = 0x400000;
buffer couldn't be a char, it has to be int, thats a working example
#include <windows.h>
#include <iostream>
#include <string.h>
using namespace std;
int main()
{
int point1=0;
int i=0;
int d=0;
char* value[4];
SIZE_T stBytes = 0;
HWND hwnd;
HANDLE phandle;
DWORD pid;
hwnd = FindWindow(NULL, "calc"); // calc is the name of the windows process
if (hwnd != 0) {
GetWindowThreadProcessId(hwnd, &pid);
phandle = OpenProcess(PROCESS_ALL_ACCESS, 0, pid);
} else {
cout << "process is not executing";
cin.get();
return 0;
}
if (phandle != 0) {
for(i=0;i<4;i++) // 4 or wathever
{
cout << "The pointer is 0x1001000" << endl; //Print the pointer
ReadProcessMemory(phandle, (LPVOID)0x1001000+i, &point1, 4, &stBytes); //Get the content from 0x1001000 and store it in point1
cout << "decimal content point1 " << point1 << " (DEC)" << endl; //Print the decimal content of point1
printf("%x \n",point1); // print hexadecimal content of point1
char *p=(char*)&point1; // point point1 buffer
for(d=0;d<4;d++)
printf("%x",(unsigned int)(unsigned char) *(p+d)); // print backwards (because the buffer is like a LIFO) and see the dbg debugger
}
ReadProcessMemory(phandle, (LPVOID)point1, &value, 6, &stBytes); //Get the value that is in the address pointed by the pointer
cout << "The value in the non-static address is " << (char*)value << endl << endl; //Print the value
cout << "Press ENTER to exit." << endl;
cin.get();
} else {
cout << "Couldn't get a handle";
cin.get();
// address 0x1001000 content hex 5278DA77
}
}
I feel a bit bad making a forum thread that has already 10 of the same name, but after checking them all, along with most of the guides around, I still can't figure the problem.
I have a char array [40090][11], and I want to make a custom operation on each possible combination of two of its elements (I consider the whole 11-byte bunch as an element). I understand that is a kind of mmatrix multiplication, the matrices being one-column and one-row.
Following the SDK manual I am thinking of having 1 thread per output element. Since 40090=19*2110, I am using:
dim3 threadsperblock(19,19);
dim3 blocksingrid(2110,2110);
xkernel<<<blocksingrid, threadsperblock>>>(dev_b2);
Question 1: Is this fine?
Alright, then, I THINK I am following the SDK's maunal example faaithfully (not the one using shared memory). Whenever I dare make a portion of my wanted operations on the data, though, I get a massively unhelpful error 30 returned: Unknown error. So, Question 2: What am I doing wrong? Note: Disregard the kernel's not saving anything anywhere.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <ctime>
#include <stdio.h>
using namespace std;
cudaError_t cudafunct(void);
__global__ void xkernel(char * dev_b2);
__device__ unsigned char typecheck(unsigned char type1,unsigned char type2);
#define b2c 40090
unsigned char block2[b2c][11];//
//unsigned int i,b1,b2,counter=0;//Block(2),Piece,Rotation,Type(of block2),InterconnectinTriangle
//unsigned char *block4,type=0;
ofstream ofile;
int main()
{
ifstream block2file("2.blk",ios::binary);
block2file.read((char*)(&block2),b2c*11);
block2file.close();
//block4=new unsigned char[200000000];//200MB will do, better than doing constant reallocs
cudaError_t cudaStatus = cudafunct();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudafunct failed!");
system("PAUSE");
return 1;
}
/*
// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceReset failed!");
return 1;
}*/
cout<<"Sequence end. Saving to file...\n";
//ofile.open("blk4.et2",ios::binary);
//ofile.write((char*)block4,17*counter);
//ofile.close();
int t=clock();
//cout<<"\nFound a total of "<<counter<<" block4s.\nTime elapsed: "<<t<<" clocks / "<<(double)t/(double)CLOCKS_PER_SEC<<" seconds\n";
system("PAUSE");
}
// Helper function for using CUDA to add vectors in parallel.
cudaError_t cudafunct(void)
{
char *dev_b2 = 0;
cudaError_t cudaStatus;
cudaStatus = cudaMalloc((void**)&dev_b2, sizeof(block2));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_b2, block2, sizeof(block2), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
dim3 threadsperblock(19,19);
dim3 blocksingrid(2110,2110);
xkernel<<<blocksingrid, threadsperblock>>>(dev_b2);
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching xkernel!\n", cudaStatus);
goto Error;
}
/*
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}*/
Error:
cudaFree(dev_b2);
return cudaStatus;
}
__global__ void xkernel(char *dev_b2)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
/*for(int k=0;k<11;k++)
{
lb2[0][k]=dev_b2[i*b2c+k];
lb2[1][k]=dev_b2[j*b2c+k];
}*/
int b00;
b00=dev_b2[i*b2c];
//int type=typecheck(dev_b2[i*b2c+4],dev_b2[j*b2c+4]);
//if(!j && !(i % 100))cout<<setw(6)<<i<<" / "<<jc<<" ("<<setw(10)<<(float)100*i/jc<<" % )"<<endl;
/*if(
(dev_b2[i*b2c+7]!=dev_b2[j*b2c+9])||//SW~NW
(dev_b2[i*b2c+6]!=dev_b2[j*b2c+10])//SE~NE
) return;
if( (type=typecheck(dev_b2[i*b2c+4],dev_b2[j*b2c+4]) ) ==255) return;*/
/*if(
(dev_b2[i*b2c+0]==dev_b2[j*b2c+0])||//1st=3rd
(dev_b2[i*b2c+0]==dev_b2[j*b2c+2])||//1st=4th
(dev_b2[i*b2c+2]==dev_b2[j*b2c+0])||//2nd=3rd
(dev_b2[i*b2c+2]==dev_b2[j*b2c+2])//2nd=4th
) return;*/
/*
*(block4+counter*17+0)=b2[i][0];//1st piece
*(block4+counter*17+1)=b2[i][1];//1st rotation
*(block4+counter*17+2)=b2[i][2];//2nd piece
*(block4+counter*17+3)=b2[i][3];//2nd rotation
*(block4+counter*17+4)=b2[j][0];//3rd piece
*(block4+counter*17+5)=b2[j][1];//3rd rotation
*(block4+counter*17+6)=b2[j][2];//4th piece
*(block4+counter*17+7)=b2[j][3];//4th rotation
*(block4+counter*17+8)=type;
*(block4+counter*17+9)=b2[i][5];//Right frame colours, down->up
*(block4+counter*17+10)=b2[j][5];
*(block4+counter*17+11)=b2[j][6];//Up frame colours, right->left
*(block4+counter*17+12)=b2[j][7];
*(block4+counter*17+13)=b2[j][8];//Left frame colours, up->down
*(block4+counter*17+14)=b2[i][8];
*(block4+counter*17+15)=b2[i][9];//Down frame colours, left->right
*(block4+counter++*17+16)=b2[i][10];*/
}
__device__ unsigned char typecheck(unsigned char type1,unsigned char type2)
{//Warning! Previous error! First partenthesis is t*2* = upper piece!
if( (type1==4) && (type2==0) ) return 0;
if( (type1==6) && (type2==1) ) return 1;
if( (type1==2) && (type2==6) ) return 2;
if( (type1==3) && (type2==4) ) return 3;
if( (type1==4) && (type2==4) ) return 4;
if( (type1==8) && (type2==5) ) return 5;
if( (type1==6) && (type2==6) ) return 6;
if( (type1==7) && (type2==8) ) return 7;
if( (type1==8) && (type2==8) ) return 8;
if( (type1==9) && (type2==8) ) return 9;
if( (type1==10) && (type2==8) ) return 10;
if( (type1==8) && (type2==11) ) return 11;
if( (type1==8) && (type2==12) ) return 12;
if( (type1==8) && (type2==13) ) return 13;
return 255;
}
I have a feeling you read out-of-bounds from your dev_b2 array.
blockIdx.x is in range of [0..2110], so the variable i is in range of [0..23210]. But then you multiply it with b2c.
As a result the highest address you read from will be b2c*23210 = 930488900.
But dev_b2 has only the size of b2c*11 = 440990.