Win32/x86 SEH Bug on EXCEPTION_FLT_INVALID_OPERATION? - winapi

I just experiemnted with Win32 structured exception handling.
I generated a singalling NaN through a 0.0 / 0.0 division.
I wrote the following test-code:
#include <windows.h>
#include <cfloat>
#include <iostream>
int main()
{
double d1 = 0.0;
double d2 = 0.0;
double dx;
_controlfp( _controlfp( 0, 0 ) & ~_EM_INVALID, _MCW_EM );
__try
{
dx = d1 / d2;
}
__except( GetExceptionCode() == EXCEPTION_FLT_INVALID_OPERATION
? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH )
{
std::cout << "exception caught";
}
return 0;
}
I compiled the code both for Win32 x86 and Win32 x64.
For both cases, SSE2-code is genrated.
For x64, the exception is caught properly. But for x86, I get an uncaught exception.
When I change the __except-line to
__except( EXCEPTION_EXECUTE_HANDLER )
and compile the code for x86, the exception is caught.
Is this a Windows-bug?
[EDIT1]
I extended my program, here it is:
#include <windows.h>
#include <intrin.h>
#include <cfloat>
#include <limits>
#include <iostream>
using namespace std;
void PrintSseExceptionMask();
void ClearSseExceptionFlags();
LONG CALLBACK Handler( PEXCEPTION_POINTERS ExceptionInfo );
int main()
{
double d1 = 0.0;
double d2 = 0.0;
double dx;
_controlfp( ~_EM_INVALID & _MCW_EM, _MCW_EM );
PrintSseExceptionMask();
AddVectoredExceptionHandler( 0, Handler );
ClearSseExceptionFlags();
dx = d1 / d2;
return 0;
}
void PrintSseExceptionFlags( unsigned mxcsr );
LONG CALLBACK Handler( PEXCEPTION_POINTERS pep )
{
unsigned mxcsr = _mm_getcsr();
if( pep->ExceptionRecord->ExceptionCode == STATUS_FLOAT_INVALID_OPERATION )
cout << "float invalid operation caught" << endl;
else if( pep->ExceptionRecord->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS )
cout << "multiple float traps caught" << endl;
PrintSseExceptionFlags( mxcsr );
return EXCEPTION_CONTINUE_SEARCH;
}
unsigned const MXCSR_INVALID_OPERATION_FLAG = 0x0001;
unsigned const MXCSR_DENORMAL_FLAG = 0x0002;
unsigned const MXCSR_DIVIDE_BY_ZERO_FLAG = 0x0004;
unsigned const MXCSR_OVERFLOW_FLAG = 0x0008;
unsigned const MXCSR_UNDERFLOW_FLAG = 0x0010;
unsigned const MXCSR_PRECISION_FLAG = 0x0020;
unsigned const MXCSR_EXCEPTION_FLAGS = 0x003F;
unsigned const MXCSR_INVALID_OPERATION_MASK = 0x0080;
unsigned const MXCSR_DENORMAL_OPERATION_MASK = 0x0100;
unsigned const MXCSR_DIVIDE_BY_ZERO_MASK = 0x0200;
unsigned const MXCSR_OVERFLOW_MASK = 0x0400;
unsigned const MXCSR_UNDERFLOW_MASK = 0x0800;
unsigned const MXCSR_PRECISION_MASK = 0x1000;
unsigned const MXCSR_EXCEPTION_MASK = 0x1F80;
void PrintSseExceptionFlags( unsigned mxcsr )
{
unsigned exceptionFlags;
static const struct
{
unsigned flag;
char *pstrFlag;
} aExceptionFlags[] =
{
MXCSR_INVALID_OPERATION_FLAG, "invalid operation flag",
MXCSR_DENORMAL_FLAG, "denormal flag",
MXCSR_DIVIDE_BY_ZERO_FLAG, "divide by zero flag",
MXCSR_OVERFLOW_FLAG, "overflow flag",
MXCSR_UNDERFLOW_FLAG, "underflow flag",
MXCSR_PRECISION_FLAG, "precision flag",
(unsigned)-1, nullptr
};
if( !(exceptionFlags = mxcsr & MXCSR_EXCEPTION_FLAGS) )
{
cout << "no exception flags set" << endl;
return;
}
for( int i = 0; aExceptionFlags[i].pstrFlag; i++ )
if( exceptionFlags & aExceptionFlags[i].flag )
cout << aExceptionFlags[i].pstrFlag << " set" << endl;
}
void PrintSseExceptionMask()
{
unsigned exceptionMasks;
static const struct
{
unsigned mask;
char *pstrMask;
} aExceptionMasks[] =
{
MXCSR_INVALID_OPERATION_MASK, "invalid operation",
MXCSR_DENORMAL_OPERATION_MASK, "denormal operation",
MXCSR_DIVIDE_BY_ZERO_MASK, "divide by zero",
MXCSR_OVERFLOW_MASK, "overflow",
MXCSR_UNDERFLOW_MASK, "underflow",
MXCSR_PRECISION_MASK, "precision",
(unsigned)-1, nullptr
};
if( (exceptionMasks = _mm_getcsr() & MXCSR_EXCEPTION_MASK) == MXCSR_EXCEPTION_MASK )
{
cout << "all excpetions masked" << endl;
return;
}
for( int i = 0; aExceptionMasks[i].pstrMask; i++ )
if( !(exceptionMasks & aExceptionMasks[i].mask) )
cout << aExceptionMasks[i].pstrMask << " exception enabled" << endl;
}
void ClearSseExceptionFlags()
{
_mm_setcsr( _mm_getcsr() & ~MXCSR_EXCEPTION_FLAGS );
}
When the exception is caught in Handler(), it reports that only the invalid operation flag of the MXCSR is set, and no other flag.
So it seems, that there isn't a different behaviour of the FPU in x86-mode, but there is rather a Windows-bug.

Related

Why does ReadProcessMemory fail so often with ERROR_PARTIAL_COPY?

The following program tries to scan read/write pages of a foreign application with ReadProcessMemory():
#include <Windows.h>
#include <iostream>
#include <vector>
#include <charconv>
#include <cstring>
#include <vector>
#include <stdexcept>
#include <sstream>
#include <cctype>
#include <fstream>
#include <cmath>
using namespace std;
vector<vector<MEMORY_BASIC_INFORMATION>> pageTree( HANDLE hProcess, DWORD dwMask );
using XHANDLE = unique_ptr<void, decltype([]( HANDLE h ) { h && h != INVALID_HANDLE_VALUE && CloseHandle( h ); })>;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
try
{
DWORD dwProcessId = [&]() -> DWORD
{
DWORD dwRet;
if( from_chars_result fcr = from_chars( argv[1], argv[1] + strlen( argv[1] ), dwRet ); fcr.ec != errc() || *fcr.ptr )
throw invalid_argument( "process-id unparseable" );
return dwRet;
}();
XHANDLE hProcess( [&]() -> HANDLE
{
HANDLE hRet = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, dwProcessId );
if( !hRet )
throw system_error( (int)GetLastError(), system_category(), "can't open process" );
return hRet;
}() );
vector<vector<MEMORY_BASIC_INFORMATION>> vvmbi = pageTree( hProcess.get(), PAGE_READWRITE );
vector<char> processRegion;
size_t
succs = 0, partialErrs = 0, errs = 0,
total = 0, read = 0, skipped = 0;
for( vector<MEMORY_BASIC_INFORMATION> const &vmbi : vvmbi )
for( MEMORY_BASIC_INFORMATION const &vmbi : vmbi )
{
processRegion.resize( vmbi.RegionSize );
size_t actuallyRead;
bool succ = ReadProcessMemory( hProcess.get(), vmbi.BaseAddress, to_address( processRegion.begin() ), vmbi.RegionSize, &actuallyRead );
succs += succ;
partialErrs += !succ && GetLastError() == ERROR_PARTIAL_COPY;
errs += !succ;
bool bytesCopied = succ || GetLastError() == ERROR_PARTIAL_COPY;
actuallyRead = bytesCopied ? actuallyRead : 0;
total += processRegion.size(),
read += actuallyRead;
skipped += bytesCopied ? processRegion.size() - actuallyRead : processRegion.size();
}
cout << "successes: " << succs << endl;
cout << "partial errs: " << partialErrs << endl;
cout << "errs: " << errs << endl;
cout << "read: " << read << endl;
cout << "skipped: " << skipped;
auto pct = []( double a, double b ) -> double { return trunc( a / b * 1000.0 + 0.5 ) / 10.0; };
cout << " (" << pct( (double)(ptrdiff_t)skipped, (double)(ptrdiff_t)total ) << "%)" << endl;
}
catch( exception const &exc )
{
cout << exc.what() << endl;
}
}
template<typename Fn>
requires requires( Fn fn, MEMORY_BASIC_INFORMATION &mbi ) { { fn( mbi ) } -> std::convertible_to<bool>; }
void enumProcessMemory( HANDLE hProcess, Fn fn );
vector<vector<MEMORY_BASIC_INFORMATION>> pageTree( HANDLE hProcess, DWORD dwMask )
{
vector<vector<MEMORY_BASIC_INFORMATION>> vvmbis;
enumProcessMemory( hProcess, [&]( MEMORY_BASIC_INFORMATION &mbi ) -> bool
{
if( !(mbi.AllocationProtect & dwMask) )
return true;
if( !vvmbis.size() || vvmbis.back().back().BaseAddress != mbi.BaseAddress )
vvmbis.emplace_back( vector<MEMORY_BASIC_INFORMATION>() );
vvmbis.back().emplace_back( mbi );
return true;
} );
return vvmbis;
}
template<typename Fn>
requires requires( Fn fn, MEMORY_BASIC_INFORMATION &mbi ) { { fn( mbi ) } -> std::convertible_to<bool>; }
void enumProcessMemory( HANDLE hProcess, Fn fn )
{
MEMORY_BASIC_INFORMATION mbi;
for( char *last = nullptr; ; last = (char *)mbi.BaseAddress + mbi.RegionSize )
{
size_t nBytes = VirtualQueryEx( hProcess, last, &mbi, sizeof mbi );
if( nBytes != sizeof mbi )
if( DWORD dwErr = GetLastError(); dwErr == ERROR_INVALID_PARAMETER )
break;
else
throw system_error( (int)dwErr, system_category(), "can't query process pages" );
if( !fn( mbi ) )
break;
}
}
This is the result from scanning explorer.exe:
successes: 316
partial errs: 282
errs: 282
read: 139862016
skipped: 4452511744 (97%)
I.e. 316 copies from the foreign address space are successful, 282 are errors with partial reads, the same number are errors at all (i.e. all errors are partial reads), and the given number of bytes are read and skipped. The total memory that has skipped is 97%.
Why does ReadProcessMemory() fail so often, or what am I doing wrong here?
Remy was mostly right. Here's the corrected code with a filter-callback on pageTree instead of a protection mask.
#include <Windows.h>
#include <iostream>
#include <vector>
#include <charconv>
#include <cstring>
#include <vector>
#include <stdexcept>
#include <sstream>
#include <cctype>
#include <fstream>
#include <cmath>
using namespace std;
template<typename FilterFn>
requires requires( FilterFn fn, MEMORY_BASIC_INFORMATION &mbi ) { { fn( mbi ) } -> std::convertible_to<bool>; }
vector<vector<MEMORY_BASIC_INFORMATION>> pageTree( HANDLE hProcess, FilterFn filterFn );
using XHANDLE = unique_ptr<void, decltype([]( HANDLE h ) { h && h != INVALID_HANDLE_VALUE && CloseHandle( h ); })>;
int main( int argc, char **argv )
{
if( argc < 2 )
return EXIT_FAILURE;
try
{
DWORD dwProcessId = [&]() -> DWORD
{
DWORD dwRet;
if( from_chars_result fcr = from_chars( argv[1], argv[1] + strlen( argv[1] ), dwRet ); fcr.ec != errc() || *fcr.ptr )
throw invalid_argument( "process-id unparseable" );
return dwRet;
}();
XHANDLE hProcess( [&]() -> HANDLE
{
HANDLE hRet = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, dwProcessId );
if( !hRet )
throw system_error( (int)GetLastError(), system_category(), "can't open process" );
return hRet;
}() );
vector<vector<MEMORY_BASIC_INFORMATION>> vvmbi = pageTree( hProcess.get(),
[]( MEMORY_BASIC_INFORMATION &mbi ) -> bool
{
return mbi.State == MEM_COMMIT;
} );
vector<char> processRegion;
size_t
succs = 0, partialErrs = 0, errs = 0,
total = 0, read = 0, skipped = 0;
for( vector<MEMORY_BASIC_INFORMATION> const &vmbi : vvmbi )
for( MEMORY_BASIC_INFORMATION const &vmbi : vmbi )
{
processRegion.resize( vmbi.RegionSize );
size_t actuallyRead;
bool succ = ReadProcessMemory( hProcess.get(), vmbi.BaseAddress, to_address( processRegion.begin() ), vmbi.RegionSize, &actuallyRead );
succs += succ;
partialErrs += !succ && GetLastError() == ERROR_PARTIAL_COPY;
errs += !succ;
bool bytesCopied = succ || GetLastError() == ERROR_PARTIAL_COPY;
actuallyRead = bytesCopied ? actuallyRead : 0;
total += processRegion.size(),
read += actuallyRead;
skipped += bytesCopied ? processRegion.size() - actuallyRead : processRegion.size();
}
cout << "successes: " << succs << endl;
cout << "partial errs: " << partialErrs << endl;
cout << "errs: " << errs << endl;
cout << "read: " << read << endl;
cout << "skipped: " << skipped;
auto pct = []( double a, double b ) -> double { return trunc( a / b * 1000.0 + 0.5 ) / 10.0; };
cout << " (" << pct( (double)(ptrdiff_t)skipped, (double)(ptrdiff_t)total ) << "%)" << endl;
}
catch( exception const &exc )
{
cout << exc.what() << endl;
}
}
template<typename Fn>
requires requires( Fn fn, MEMORY_BASIC_INFORMATION &mbi ) { { fn( mbi ) } -> std::convertible_to<bool>; }
void enumProcessMemory( HANDLE hProcess, Fn fn );
template<typename FilterFn>
requires requires( FilterFn fn, MEMORY_BASIC_INFORMATION &mbi ) { { fn( mbi ) } -> std::convertible_to<bool>; }
vector<vector<MEMORY_BASIC_INFORMATION>> pageTree( HANDLE hProcess, FilterFn filterFn )
{
vector<vector<MEMORY_BASIC_INFORMATION>> vvmbis;
enumProcessMemory( hProcess, [&]( MEMORY_BASIC_INFORMATION &mbi ) -> bool
{
if( !filterFn( mbi ) )
return true;
if( !vvmbis.size() || vvmbis.back().back().BaseAddress != mbi.BaseAddress )
vvmbis.emplace_back( vector<MEMORY_BASIC_INFORMATION>() );
vvmbis.back().emplace_back( mbi );
return true;
} );
return vvmbis;
}
template<typename Fn>
requires requires( Fn fn, MEMORY_BASIC_INFORMATION &mbi ) { { fn( mbi ) } -> std::convertible_to<bool>; }
void enumProcessMemory( HANDLE hProcess, Fn fn )
{
MEMORY_BASIC_INFORMATION mbi;
for( char *last = nullptr; ; last = (char *)mbi.BaseAddress + mbi.RegionSize )
{
size_t nBytes = VirtualQueryEx( hProcess, last, &mbi, sizeof mbi );
if( nBytes != sizeof mbi )
if( DWORD dwErr = GetLastError(); dwErr == ERROR_INVALID_PARAMETER )
break;
else
throw system_error( (int)dwErr, system_category(), "can't query process pages" );
if( !fn( mbi ) )
break;
}
}
Unfortunately I still get about 6% skipped memory:
successes: 2159
partial errs: 225
errs: 225
read: 706748416
skipped: 42897408 (5.7%)
Why is that ?

OpenCL compute histogram program doesn't returns 0 in every bin

I'm trying to implement a simple opencl program to compute an histogram.
Below is what I currently have:
#include <CL/cl.h>
#include <iostream>
#include <vector>
#define STB_IMAGE_IMPLEMENTATION
#include <stb_image.h>
#include <algorithm>
//Getting platform, device, context and command queue
void setup(
cl_platform_id &platformId, cl_device_id &deviceId, cl_context& context, cl_command_queue& commandQueue,
std::string platformName = "NVIDIA CUDA", cl_device_type deviceType = CL_DEVICE_TYPE_GPU,
std::string deviceName = "GeForce GTX 1070")
{
using std::vector;
using std::string;
using std::cout;
using std::endl;
cl_uint numberOfPlatforms, numberOfDevices;
cl_int error;
//Finding platform id
error = clGetPlatformIDs(0,nullptr,&numberOfPlatforms);
vector<cl_platform_id> platform(numberOfPlatforms);
error = clGetPlatformIDs(numberOfPlatforms,platform.data(),nullptr);
for(const auto & currentPlatform : platform)
{
size_t stringSize;
error = clGetPlatformInfo(currentPlatform,CL_PLATFORM_NAME,0,nullptr,&stringSize);
char * currentPlatformName = new char[stringSize];
error = clGetPlatformInfo(currentPlatform,CL_PLATFORM_NAME,stringSize,currentPlatformName,nullptr);
if(string(currentPlatformName).compare(platformName) == 0)
{
cout << "Platform " << platformName << " found!" << endl;
delete [] currentPlatformName;
platformId = currentPlatform;
break;
}
delete [] currentPlatformName;
}
error = clGetDeviceIDs(platformId,deviceType,0,nullptr,&numberOfDevices);
vector<cl_device_id> device(numberOfDevices);
error = clGetDeviceIDs(platformId,deviceType,numberOfDevices,device.data(),nullptr);
for(const auto & currentDevice : device)
{
size_t stringSize;
error = clGetDeviceInfo(currentDevice,CL_DEVICE_NAME,0,nullptr,&stringSize);
char * currentDeviceName = new char[stringSize];
error = clGetDeviceInfo(currentDevice,CL_DEVICE_NAME,stringSize,currentDeviceName,nullptr);
if(string(currentDeviceName).compare(deviceName) == 0)
{
cout << "Device " << deviceName << " found!" << endl;
delete [] currentDeviceName;
deviceId = currentDevice;
break;
}
delete [] currentDeviceName;
}
context = clCreateContext(nullptr,1,&deviceId,nullptr,nullptr,&error);
commandQueue = clCreateCommandQueue(context,deviceId,0,&error);
}
void run(const std::string & imagePath, const std::string& programSource, const cl_device_id deviceId,
const cl_context& context, const cl_command_queue& commandQueue, int histogram[256])
{
cl_int error;
int width, height, channels;
stbi_set_flip_vertically_on_load(true);
unsigned char *image = stbi_load(imagePath.c_str(),
&width,
&height,
&channels,
STBI_grey);
char min = 0;
char max = 255;
for(int i = 0; i < width*height; ++i)
{
min = (image[i] < min) ? image[i]:min;
max = (image[i] > max) ? image[i]:max;
}
std::cout << "(min, max) := (" << min << ", " << max << ")" << std::endl;
//create buffers
cl_mem memImage = clCreateBuffer(context,CL_MEM_READ_ONLY,width*height*sizeof(char),image,&error);
cl_mem memHistogram = clCreateBuffer(context,CL_MEM_READ_WRITE,256*sizeof(int),&histogram,&error);
//Create program, kernel and setting kernel args
size_t programSize = programSource.length();
const char * source = programSource.c_str();
cl_program program = clCreateProgramWithSource(context,1,&source,&programSize,&error);
error = clBuildProgram(program,1,&deviceId,nullptr,nullptr,nullptr);
cl_kernel kernel = clCreateKernel(program,"computeHistogram",&error);
error = clEnqueueWriteBuffer(commandQueue,memImage,CL_TRUE,0,sizeof(cl_mem),&image,0,nullptr,nullptr);
error = clSetKernelArg(kernel,0,sizeof(cl_mem),&memImage);
error = clSetKernelArg(kernel,1,sizeof(cl_mem),&memHistogram);
clFinish(commandQueue);
size_t globalWorkSize = width*height;
error = clEnqueueNDRangeKernel(commandQueue,kernel,1,nullptr,&globalWorkSize,nullptr,0,nullptr,nullptr);
error = clEnqueueWriteBuffer(commandQueue,memHistogram,CL_TRUE,0,256*sizeof(int),&histogram,0,nullptr,nullptr);
clFinish(commandQueue);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
int main(int argc, char** argv)
{
cl_platform_id platformId;
cl_device_id deviceId;
cl_context context;
cl_command_queue commandQueue;
setup(platformId,deviceId,context,commandQueue);
std::string filename = "gray.jpeg";
std::string programSource =
"__kernel void computeHistogram(\n"
" __global char * image, __global int * histogram)\n"
"{\n"
" size_t idx = get_global_id(0);\n"
" char pixelValue = image[idx];\n"
" atomic_inc(&histogram[pixelValue]);\n"
"}\n";
int histogram[256] = {0};
run(filename,programSource, deviceId, context, commandQueue,histogram);
for(int i = 0; i < 256; ++i)
{
std::cout << "i : " << histogram[i] << std::endl;
}
return 0;
}
However I get 0 in every bin. I think the logic I'm trying to apply is correct, but I cannot figure what the error is.
There are several problems. To name a few:
clCreateBuffer returns error -38 (CL_INVALID_MEM_OBJECT) because host_ptr is being passed and this is not being reflected in the flags parameter. CL_MEM_USE_HOST_PTR can be used in addition to CL_MEM_READ_ONLY and CL_MEM_READ_WRITE respectively.
To clEnqueueWriteBuffer size of cl_mem object is being passed instead of the size of image buffer.
After clEnqueueNDRangeKernel again clEnqueueWriteBuffer is being used. I suspect the intention here was to read data back and for that clEnqueueReadBuffer needs to be used.
There may be more problems. These are just the major ones and it's hard to imagine that you checked cl functions return codes and all of them returned CL_SUCCESS...
The actual program that works is the following:
#include <CL/cl.h>
#include <iostream>
#include <vector>
#define STB_IMAGE_IMPLEMENTATION
#include <stb_image.h>
#include <algorithm>
//Getting platform, device, context and command queue
void setup(
cl_platform_id &platformId, cl_device_id &deviceId, cl_context& context, cl_command_queue& commandQueue,
std::string platformName = "NVIDIA CUDA", cl_device_type deviceType = CL_DEVICE_TYPE_GPU,
std::string deviceName = "GeForce GTX 1070")
{
using std::vector;
using std::string;
using std::cout;
using std::endl;
cl_uint numberOfPlatforms, numberOfDevices;
cl_int error;
//Finding platform id
error = clGetPlatformIDs(0,nullptr,&numberOfPlatforms);
vector<cl_platform_id> platform(numberOfPlatforms);
error = clGetPlatformIDs(numberOfPlatforms,platform.data(),nullptr);
for(const auto & currentPlatform : platform)
{
size_t stringSize;
error = clGetPlatformInfo(currentPlatform,CL_PLATFORM_NAME,0,nullptr,&stringSize);
char * currentPlatformName = new char[stringSize];
error = clGetPlatformInfo(currentPlatform,CL_PLATFORM_NAME,stringSize,currentPlatformName,nullptr);
if(string(currentPlatformName).compare(platformName) == 0)
{
cout << "Platform " << platformName << " found!" << endl;
delete [] currentPlatformName;
platformId = currentPlatform;
break;
}
delete [] currentPlatformName;
}
error = clGetDeviceIDs(platformId,deviceType,0,nullptr,&numberOfDevices);
vector<cl_device_id> device(numberOfDevices);
error = clGetDeviceIDs(platformId,deviceType,numberOfDevices,device.data(),nullptr);
for(const auto & currentDevice : device)
{
size_t stringSize;
error = clGetDeviceInfo(currentDevice,CL_DEVICE_NAME,0,nullptr,&stringSize);
char * currentDeviceName = new char[stringSize];
error = clGetDeviceInfo(currentDevice,CL_DEVICE_NAME,stringSize,currentDeviceName,nullptr);
if(string(currentDeviceName).compare(deviceName) == 0)
{
cout << "Device " << deviceName << " found!" << endl;
delete [] currentDeviceName;
deviceId = currentDevice;
break;
}
delete [] currentDeviceName;
}
context = clCreateContext(nullptr,1,&deviceId,nullptr,nullptr,&error);
commandQueue = clCreateCommandQueue(context,deviceId,0,&error);
}
void run(const std::string & imagePath, const std::string& programSource, const cl_device_id deviceId,
const cl_context& context, const cl_command_queue& commandQueue, int histogram[256])
{
cl_int error;
int width, height, channels;
stbi_set_flip_vertically_on_load(true);
unsigned char *image = stbi_load(imagePath.c_str(),
&width,
&height,
&channels,
STBI_grey);
unsigned char min = 255;
unsigned char max = 0;
for(int i = 0; i < width*height; ++i)
{
min = (image[i] < min) ? image[i]:min;
max = (image[i] > max) ? image[i]:max;
}
std::cout << "(min, max) := (" << static_cast<int>(min) << ", " << static_cast<int>(max) << ")" << std::endl;
//create buffers
cl_mem memImage = clCreateBuffer(context,CL_MEM_READ_ONLY,width*height*sizeof(unsigned char),image,&error);
cl_mem memHistogram = clCreateBuffer(context,CL_MEM_READ_WRITE,256*sizeof(int),&histogram,&error);
//Create program, kernel and setting kernel args
size_t programSize = programSource.length();
const char * source = programSource.c_str();
cl_program program = clCreateProgramWithSource(context,1,&source,&programSize,&error);
error = clBuildProgram(program,1,&deviceId,nullptr,nullptr,nullptr);
cl_kernel kernel = clCreateKernel(program,"computeHistogram",&error);
error = clEnqueueWriteBuffer(commandQueue,memImage,CL_TRUE,0,width*height*sizeof(unsigned char),image,0,nullptr,nullptr);
error = clSetKernelArg(kernel,0,sizeof(cl_mem),&memImage);
error = clSetKernelArg(kernel,1,sizeof(cl_mem),&memHistogram);
clFinish(commandQueue);
const size_t globalWorkSize = width*height;
error = clEnqueueNDRangeKernel(commandQueue,kernel,1,nullptr,&globalWorkSize,nullptr,0,nullptr,nullptr);
error = clEnqueueReadBuffer(commandQueue,memHistogram,CL_TRUE,0,256*sizeof(int),histogram,0,nullptr,nullptr);
clFinish(commandQueue);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
int main(int argc, char** argv)
{
cl_platform_id platformId;
cl_device_id deviceId;
cl_context context;
cl_command_queue commandQueue;
setup(platformId,deviceId,context,commandQueue);
std::string filename = "gray.jpeg";
std::string programSource =
"__kernel void computeHistogram(\n"
" __global unsigned char * image, __global int * histogram)\n"
"{\n"
" size_t idx = get_global_id(0);\n"
" unsigned char pixelValue = image[idx];\n"
" atomic_inc(&histogram[pixelValue]);\n"
" barrier(CLK_GLOBAL_MEM_FENCE);"
"}\n";
int histogram[256] = {0};
run(filename,programSource, deviceId, context, commandQueue,histogram);
for(int i = 0; i < 256; ++i)
{
std::cout << i << " : " << histogram[i] << std::endl;
}
return 0;
}
The main issue the line
error = clEnqueueReadBuffer(commandQueue,memHistogram,CL_TRUE,0,256*sizeof(int),histogram,0,nullptr,nullptr);
In the original post this was a clEnqueueWriteBuffer and the size was wrong. I was also using char instead of unsigned char and finally the kernel is different.

Protobuf ParseFromZeroCopyStream incurs high memory usage with repeated field

I have encountered a problem of high memory usage when using ParseFromZeroCopyStream to load file in which a large buffer is written. Besides, the code snippet below uses 60Gb++ of RAM but failed as the system froze after reaching its RAM limit.
FYI, I am using protobuf as DLL.
scene.proto
syntax = "proto3";
package Recipe;
option cc_enable_arenas = true;
message Scene
{
repeated int32 image_data = 1 [packed=true];
}
source.cpp
#include <iostream>
#include <fstream>
#include <ostream>
#include <istream>
#include <string>
#include <cstdint>
#include "Scene.pb.h"
#include <google\protobuf\io\zero_copy_stream_impl.h>
#include <google\protobuf\io\gzip_stream.h>
#include <google\protobuf\arena.h>
int const _MIN = 0;
int const _MAX = 255;
unsigned int const _SIZE = 1280000000;
//unsigned int const _SIZE = 2000;
unsigned int const _COMPRESSION_LEVEL = 6;
void randWithinUnsignedCharSize(uint8_t * buffer, unsigned int size)
{
for (size_t i = 0; i < size; ++i)
{
buffer[i] = i;
}
}
using namespace google::protobuf::io;
int main()
{
GOOGLE_PROTOBUF_VERIFY_VERSION;
{
google::protobuf::Arena arena;
Recipe::Scene * scene = google::protobuf::Arena::CreateMessage<Recipe::Scene>(&arena);
uint8_t * imageData = new uint8_t[_SIZE];
randWithinUnsignedCharSize(imageData, _SIZE);
scene->mutable_image_data()->Resize(_SIZE, 0);
for (size_t i = 0; i < _SIZE; i++)
{
scene->set_image_data(i, imageData[i]);
}
std::cout << "done saving data to repeated field.\n";
{
std::fstream output("data.txt", std::ios::out | std::ios::trunc | std::ios::binary);
OstreamOutputStream outputFileStream(&output);
GzipOutputStream::Options options;
options.format = GzipOutputStream::GZIP;
options.compression_level = _COMPRESSION_LEVEL;
GzipOutputStream gzipOutputStream(&outputFileStream, options);
if (!scene->SerializeToZeroCopyStream(&gzipOutputStream)) {
std::cerr << "Failed to write scene." << std::endl;
return -1;
}
}
delete[] imageData;
}
std::cout << "Finish serializing into data.txt\n";
{
google::protobuf::Arena arena1;
Recipe::Scene * scene1 = google::protobuf::Arena::CreateMessage<Recipe::Scene>(&arena1);
{
std::fstream input("data.txt", std::ios::in | std::ios::binary);
IstreamInputStream inputFileStream(&input);
GzipInputStream gzipInputStream(&inputFileStream);
if (!scene1->ParseFromZeroCopyStream(&gzipInputStream)) {
std::cerr << "Failed to parse scene." << std::endl;
return -1;
}
}
std::cout << "scene1->imagedata_size() " << scene1->image_data_size() << std::endl;
}
google::protobuf::ShutdownProtobufLibrary();
return 0;
}

how would I go about handling these return type errors? (hashmap/table)

#ifndef HASHMAP_H
#define HASHMAP_H
#include <iostream>
#include <string>
#include <vector>
using namespace std;
enum Status{open , active, deactivated };
//template <typename T>
template</*typename Key,*/ typename T>
class hashmap{
private:
class Node{
public:
const string Key;
//vector<T> values;
T value;
Status status;
Node(string key, T val) :Key(key), value(val), status(active){}
void operator =(const Node &n){
string *ptr;
ptr = (string*)(&(this->Key));
*ptr = n.Key;
//Node(n);
this->status = n.status;
this->value = n.value;
}
Node() :status(open){}
Node(const string& key) :Key(key), status(active){}
//Node(const Node &n) : value(n.val), status(n.status){}
};
//typedef map<
unsigned int hash(const string& s, int tableSize){
unsigned int h = 0;
/*each(s)*/
for(auto it : s) h = 31 * h + unsigned(it);
return h % tableSize;
}
unsigned int hash(const string& s){
return hash(s, table_size);
}
int table_size = 103;
vector<Node> table;
typedef typename vector<Node>::iterator iter;
public:
//default constructor
hashmap(){
table = vector<Node>(table_size);
}
//copy constructor
hashmap(const hashmap& x){
table = x.table;
//for (auto it = )
}
//assignment operator //has been removed
hashmap& operator=(const hashmap& x){
this->table.erase(this->table.begin(), this->table.begin() + 103);
for ( int i = 0; i < x.table_size; i++){
this->table.push_back(x.table.at(i));
}
return *this;
}
//destructor
~hashmap(){
table.clear();
}
//index operator
T& operator[](const string x){
int h = hash(x, table.size());
if (table[h].Key == x){
return (table[h].value);
}
else {
Node* n = new Node(x);
table[h] = *n;
return (table[h].value);
}
}
//Node test
void okay(const string x,int i){
Node *temp = new Node(x, i);
cout << temp->status << endl;
/*cout << table[1].status << endl;
cout << table[2].status << endl;
table.at(0) = (*temp);
cout << table[0].Key << endl;
cout << table[0].value << endl;
cout << table[3].status << endl;*/
}
int stride(int x){
return (7-x%7);
}
//find()
iter find(const string& x){
int h = hash(x);
int s = stride(h);
int t = table_size;
int z;
//for (int i = 0; i < t; i++){
for (int i = hash(x, table_size) % t; i != t; i = (i + stride(h)) % t){
z = (h + i*s) % table_size;
if (table[z].status == open) return NULL;
if (table[z].status == deactivated) continue;
if (table[z].Key == x) return &table[h];
}
return table.end();
}
//begin()
iter begin(){
return table.begin();
}
//end()
iter end(){
return table.end();
}
};
#endif // !HASHMAP_H
Everything seems to be working fine except the find function. It's suppose to probe through the vector and return values upon conditions but the problem I'm having is I get these errors about return type conflicts.
Error2error C2664: 'std::_Vector_iterator<std::_Vector_val<std::_Simple_types<hashmap::Node>>>::_Vector_iterator(const
std::_Vector_iterator<std::_Vector_val<std::_Simple_types<hashmap::Node>>> &)' : cannot convert argument 1 from 'hashmap<int>::Node *' to 'const
std::_Vector_iterator<std::_Vector_val<std::_Simple_types<hashmap<int>::Node>>> &'
Error1error C2664: 'std::_Vector_iterator<std::_Vector_val<std::_Simple_types<hashmap<int>::Node>>>::_Vector_iterator(const
std::_Vector_iterator<std::_Vector_val<std::_Simple_types<hashmap<int>::Node>>> &)' : cannot convert argument 1 from 'int' to 'const
std::_Vector_iterator<std::_Vector_val<std::_Simple_types<hashmap<int>::Node>>> &'
How can I edit the iterator to fix this?
thank you.

Error C3867: function call missing argument list

I'm trying to compile a code in Visual Studio, but I keep getting the following error:
Error 4 error C3867: 'MindSet::Form1::handleDataValueFunc': function call missing argument list; use '&MindSet::Form1::handleDataValueFunc' to create a pointer to member c:\documents and settings\licap\desktop\mindset\mindset\mindset\Form1.h 122 1 MindSet
This is my code
#pragma endregion
void handleDataValueFunc(unsigned char extendedCodeLevel, unsigned char code,
unsigned char valueLength, const unsigned char *value, void *customData)
{
FILE *arq1;
FILE *arq2;
FILE *arq3;
arq1 = fopen("raw.txt","a");
arq2 = fopen("atencao.txt","a");
arq3 = fopen("meditacao.txt","a");
if (extendedCodeLevel == 0 && code == RAW_WAVE_CODE)
{
short rawValue = ((value[0] << 8) & 0xff00) | (0x00ff & value[1]);
printf("%d\n", rawValue);
fprintf(arq1,"%d\n",rawValue);
}
if (extendedCodeLevel == 0 && code == ATTENTION_LEVEL_CODE)
{
short attentionValue = (value[0] & 0xFF);
printf("%d\n", attentionValue);
fprintf(arq2,"%d\n",attentionValue);
}
if (extendedCodeLevel == 0 && code == MEDITATION_LEVEL_CODE)
{
short meditationValue = (value[0] & 0xFF);
printf("%d\n", meditationValue);
fprintf(arq3,"%d\n",meditationValue);
}
fclose(arq1);
fclose(arq2);
fclose(arq3);
}
private: System::Void IniciarCaptura_Click(System::Object^ sender, System::EventArgs^ e) {
SerialPort* port = new SerialPortW32();
if (port->open())
{
/* Initialize ThinkGear stream parser */
ThinkGearStreamParser parser;
THINKGEAR_initParser(&parser, PARSER_TYPE_PACKETS, handleDataValueFunc, NULL);
unsigned char byteRead;
for (int i = 0; i < 100000; i++)
{
if (port->read(&byteRead, 1) == 1)
{
THINKGEAR_parseByte(&parser, byteRead);
fflush(stdout);
}
else
{
//cerr << "Erro na leitura da porta" << endl;
break;
}
}
port->close();
}
else
{
//cout << port->getErrorMessage() << endl;
}
delete port;
//return 0;
}
};
}
I've already tried to add a "&" before "handleDataValueFunc", but it only returns another error message. Can anybody help?
You will have to use gcroot See http://msdn.microsoft.com/en-us/library/481fa11f.aspx
struct nativeMindSetFormHandle
{
nativeMindSetFormHandle(MindSet::Form1 ^ h) : handle(h) {}
gcroot<MindSet::Form1 ^> handle;
};
static void handleDataValueFuncProxy(unsigned char extendedCodeLevel,
unsigned char code, unsigned char valueLength, const unsigned char *value,
void *customData)
{
static_cast<nativeMindSetFormHandle *>(customData)->handle->handleDataValueFunc(extendedCodeLevel, code, valueLength, value, NULL);
}
And update IniciarCaptura_Click to include:
nativeMindSetFromHandle * nativeHandle = new nativeMindSetFormHandle(this);
THINKGEAR_initParser(&parser, PARSER_TYPE_PACKETS, handleDataValueFuncProxy, nativeHandle);
And don't forget to delete nativeHandle when you are done.

Resources