ALL,
I have a following code:
In the header
class MySQLDatabase
{
protected:
struct MySQLImpl;
MySQLImpl *m_pimpl;
};
struct MySQLDatabase::MySQLImpl
{
std::wstring_convert<std::codecvt_utf8<wchar_t> > m_myconv;
std::wstring m_host, m_user, m_password, m_dbName, m_socket, m_catalog;
};
In the cpp
int MySQLDatabase::Connect()
{
std::wstring tname;
// some code here
const char *temp1 = m_pimpl->m_myconv.to_bytes( tname.c_str() ).c_str();
const char *temp2 = m_pimpl->m_myconv.to_bytes( ownerName.c_str() ).c_str();
const char *temp3 = m_pimpl->m_myconv.to_bytes( fieldName.c_str() ).c_str();
unsigned long str_length1 = strlen( temp1 ), str_length2 = strlen( temp2 ), str_length3 = strlen( temp3 );
str_data1 = new char[str_length1 + 1], str_data2 = new char[str_length2 + 1], str_data3 = new char[str_length3 + 1];
memset( str_data1, '\0', str_length1 + 1 );
memset( str_data2, '\0', str_length2 + 1 );
memset( str_data3, '\0', str_length3 + 1 );
// some more code here
}
I am trying to compile and run this code on Linux with gcc 5.4 in c++11 mode.
The tname variable contains L"draft.abc\303\237".
The problem is that the temp1 is "" (empty string). And so is temp2 and temp3 (ownerName is L"" and fieldName is L"name").
Am I doing something wrong with the conversion?
Related
First question is PROT_WRITE and PROT_READ i wasn't able to find anywhere and it's giving me a hard time compiling. I replaced with 0 and 1 but it doesn't seem to work.
Second, "rejected (unexisting region cookie)"
int rank;
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Win win;
int knem_fd = open("/dev/knem", O_RDWR);
int err;
uint64_t size = 64;
if( rank == 0 ){
char *inbuf = malloc(size);
for( int i = 0; i < size; i++ )
inbuf[i] = rand() % 26 + 97;
print_array( inbuf, size, '0' );
struct knem_cmd_create_region create;
struct knem_cmd_param_iovec knem_iov[1];
knem_iov[0].base = (uint64_t)&inbuf;
knem_iov[0].len = size;
create.iovec_array = (uintptr_t) &knem_iov[0];
create.iovec_nr = 1;
create.flags = KNEM_FLAG_SINGLEUSE;
//create.protection = 1;
err = ioctl( knem_fd, KNEM_CMD_CREATE_REGION, &create );
MPI_Send( &(create.cookie), 1, MPI_UINT64_T, 1, 0, MPI_COMM_WORLD );
MPI_Barrier( MPI_COMM_WORLD );
} else if( rank == 1 ){
char *obuf = malloc(size);
int err;
struct knem_cmd_copy copy;
struct knem_cmd_create_region create;
struct knem_cmd_param_iovec knem_iov[1];
knem_iov[0].base = (uint64_t)&obuf;
knem_iov[0].len = size;
create.iovec_array = (uintptr_t) &knem_iov[0];
create.iovec_nr = 1;
//create.protection = 0;
create.flags = KNEM_FLAG_SINGLEUSE;
err = ioctl( knem_fd, KNEM_CMD_CREATE_REGION, &create );
MPI_Recv( &(copy.src_cookie), 1, MPI_UINT64_T, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
copy.src_offset = 0;
copy.dst_cookie = create.cookie;
copy.dst_offset = 0;
copy.flags = 0;
err = ioctl(knem_fd, KNEM_CMD_COPY, ©);
print_array( obuf, size, '1' );
MPI_Barrier( MPI_COMM_WORLD );
}
0 and 1 both create a region, 0 sends its cookie to 1 and 1 goes in grab data from 0. I checked the received cookie is the same as the send cookie, but it just failed to find the declared region.
PROT_READ and PROT_WRITE are mmap flags, you need to include sys/mman.h to get them. In the second part of the code, you need to set copy.src_cookie to create.cookie (or just use an inline copy to avoid creating that region at all since it'll be destroyed immediately because of the SINGLEUSE flag). Also, make sure ou check the return values of all ioctl before continuing. Copy cannot work if create.cookie wasn't initialized because the create ioctl failed.
everyone,
I'm a beginner on OpenCL and I wrote some simple code in C which sums two arrays. Here is part of the code:
// Create Kernel.
cl_kernel kernelSum = clCreateKernel( myProgram, "sum", &error );
// Set Input Array.
size_t arraySize = 1000;
char* a = ( char* ) malloc( sizeof( char ) * arraySize );
char* b = ( char* ) malloc( sizeof( char ) * arraySize );
char* c = ( char* ) malloc( sizeof( char ) * arraySize );
for (int i = 0; i < arraySize; i += 1)
{
a[ i ] = 1;
b[ i ] = 2;
c[ i ] = -1;
}
// Set Buffers.
cl_mem a_buffer = clCreateBuffer(
myContext,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
arraySize * sizeof( char ), a,
&error );
cl_mem b_buffer = clCreateBuffer(
myContext,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
arraySize * sizeof( char ), b,
&error );
cl_mem c_buffer = clCreateBuffer(
myContext,
CL_MEM_WRITE_ONLY,
arraySize * sizeof( char ), NULL,
&error );
printf( "Buffers created.\n" );
// Setting Kernel Arguments.
error = clSetKernelArg( kernelSum, 0, sizeof( cl_mem ), &a_buffer );
error |= clSetKernelArg( kernelSum, 1, sizeof( cl_mem ), &b_buffer );
error |= clSetKernelArg( kernelSum, 2, sizeof( cl_mem ), &c_buffer );
printf( "Arguments Set.\n" );
// Enqueue kernels to execute.
cl_event event;
size_t globalWorkOffset = 0;
size_t globalWorkSize[ 1 ] = { arraySize };
size_t localWorkSize[ 1 ] = { 1 };
clEnqueueNDRangeKernel(
myCommandQueue,
kernelSum,
1, // work_dim
0, // global work offset
globalWorkSize,
localWorkSize, // local work offset
0, NULL,
&event
);
printf( "Kernel Enqueued.\n" );
error = clEnqueueReadBuffer(
myCommandQueue,
c_buffer,
CL_TRUE, // blocking option
( size_t ) 0, arraySize * sizeof( char ), // offset, data_size
c, // host_ptr
0, NULL,
&event );
if ( error != CL_SUCCESS )
{
printf( "Buffer Reading Back Failed.\n" );
exit( 1 );
}
However, I got incorrect result : all the numbers in "c" array are zeros. I thought it has something to do with clEnqueueReadBuffer, or perhaps not. Any ideas about this issue? Expecting your suggestions! :-)
Your call to clEnqueueReadBuffer will not wait for the kernel to finish. It will most likely execute simultaneously with the kernel. Change the call to:
error = clEnqueueReadBuffer(
myCommandQueue,
c_buffer,
CL_TRUE, // blocking option
( size_t ) 0, arraySize * sizeof( char ), // offset, data_size
c, // host_ptr
1, &event,
NULL );
This will cause clEnqueueReadBuffer to wait for the kernel event to finish before starting to read the buffer.
Everyone. All your suggestions are really helpful. It's with your help I finally found what I've done wrong :
clBuildProgram( myProgram, 1, &device, NULL, NULL, &error );
This is my original code.
The official definition of this function is :
cl_int clBuildProgram(cl_program program,
cl_uint num_devices,
const cl_device_id *device_list,
const char *options,
void (CL_CALLBACK *pfn_notify)
(cl_program program,
void *user_data),
void *user_data)
I found I set the last of the argument list as "&error"; I got this function mixed with some others whose argument list usually ends with an error number. Here, I should have call it like this:
clBuildProgram( myProgram, 1, &device, NULL, NULL, NULL );
The last one set to NULL. Then the program run correctly.
Thank you all very much! :-)
The content of the '.text' section is accessed using code like this:
1) For the application which is loaded into memory (i.e. executing):
//accessing code in memory
PIMAGE_DOS_HEADER pDOSHeader = NULL;
pDOSHeader = static_cast<PIMAGE_DOS_HEADER>( (void*)hModule);
...
PIMAGE_NT_HEADERS pNTHeader = reinterpret_cast<PIMAGE_NT_HEADERS>((byte*)hModule + pDOSHeader->e_lfanew );
...
PIMAGE_FILE_HEADER pFileHeader = reinterpret_cast<PIMAGE_FILE_HEADER>((byte*)&pNTHeader->FileHeader );
...
PIMAGE_OPTIONAL_HEADER pOptionalHeader =
reinterpret_cast<PIMAGE_OPTIONAL_HEADER>((byte*)&pNTHeader->OptionalHeader );
...
PIMAGE_SECTION_HEADER pSectionHeader = reinterpret_cast<PIMAGE_SECTION_HEADER>(
(byte*)&pNTHeader->OptionalHeader +
pNTHeader->FileHeader.SizeOfOptionalHeader );
//so iterate headers and select one with right name
const char TEXT[] = ".text";
const char BSSTEXT[] = ".textbss";
unsigned int nSectionCount = pNTHeader->FileHeader.NumberOfSections;
char szSectionName[ IMAGE_SIZEOF_SHORT_NAME + 1 ];
szSectionName[ IMAGE_SIZEOF_SHORT_NAME ] = '\0';
for( unsigned int i = 0; i < nSectionCount; i++ )
{
memcpy( szSectionName, pSectionHeader->Name,
IMAGE_SIZEOF_SHORT_NAME );
if( 0 == strncmp( TEXT, szSectionName,
IMAGE_SIZEOF_SHORT_NAME ) )
{
break;
}
pSectionHeader++;
}
pVirtualAddress = (void*)(pSectionHeader->VirtualAddress);
dwCodeSize = pSectionHeader->Misc.VirtualSize;
//seems resonable: To calculate the real starting address of a given section in memory,
//add the base address of the image to the section's VirtualAddress stored in this field.
pCodeStart = (void*)(((byte*)hModule) +(size_t)((byte*)pVirtualAddress) );
pCodeEnd = (void*)((byte*)pCodeStart + dwCodeSize);
2) For the application file read from hdd and mapped into memory:
//loading code from file and mapping
hFile = CreateFile( filename, FILE_READ_DATA, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
...
hFileMapping = CreateFileMapping( hFile, NULL, PAGE_READONLY ),0, 0, NULL );
...
pBaseAddress = MapViewOfFile( hFileMapping, FILE_MAP_READ, 0, 0, 0 );
...
PIMAGE_DOS_HEADER pDOSHeader = static_cast<PIMAGE_DOS_HEADER>( pBaseAddress);
...
PIMAGE_NT_HEADERS pNTHeader = reinterpret_cast<PIMAGE_NT_HEADERS>(
(PBYTE)_pBaseAddress() + pDOSHeader->e_lfanew );
...
PIMAGE_FILE_HEADER pFileHeader = reinterpret_cast<PIMAGE_FILE_HEADER>(
(PBYTE)&pNTHeader->FileHeader );
...
PIMAGE_OPTIONAL_HEADER pOptionalHeader =
reinterpret_cast<PIMAGE_OPTIONAL_HEADER>(
(PBYTE)&pNTHeader->OptionalHeader );
PIMAGE_SECTION_HEADER pSectionHeader =
reinterpret_cast<PIMAGE_SECTION_HEADER>(
(PBYTE)&pNTHeader->OptionalHeader +
pNTHeader->FileHeader.SizeOfOptionalHeader );
DWORD dwEntryPoint = pNTHeader->OptionalHeader.AddressOfEntryPoint;
UINT nSectionCount = pNTHeader->FileHeader.NumberOfSections;
const char TEXT[] = ".text";
const char BSSTEXT[] = ".textbss";
char szSectionName[ IMAGE_SIZEOF_SHORT_NAME + 1 ];
szSectionName[ IMAGE_SIZEOF_SHORT_NAME ] = '\0';
for( unsigned int i = 0; i < nSectionCount; i++ )
{
memcpy( szSectionName, pSectionHeader->Name,
IMAGE_SIZEOF_SHORT_NAME );
if( 0 == strncmp( TEXT, szSectionName,
IMAGE_SIZEOF_SHORT_NAME ) )
{
break;
}
pSectionHeader++;
}
// Use this when probing On Disk. It is where things
// are on disk - not where they will be in memory
dwRawData = pSectionHeader->PointerToRawData;
// Use this when probing On Disk. It is where things
// are on disk - not where they will be in memory
pCodeStart = (void*)((byte*)pBaseAddress +
pSectionHeader->PointerToRawData );
pEntryPoint = (void*)(((byte*)pBaseAddress) + dwEntryPoint);
dwCodeSize = pSectionHeader->Misc.VirtualSize;
pCodeEnd = (void*)((byte*)pCodeStart + pSectionHeader->Misc.VirtualSize );
If the application is built with Visual Studio, all the bytes between pCodeStart and pCodeEnd are matching in both cases.
But if the application is built with GCC (MinGW) some bytes which are following pCodeStart and prior pCodeEnd are the same but somewhere in the middle some different bytes are appearing.
Why does it happen?
I am getting an error in this piece of code:
#include <stdio.h>
#include <cv.h>
#include <highgui.h>
using namespace std;
int main( int argc, char** argv )
{
//load color img specified by first argument
//IplImage *img = cvLoadImage( argv[1]);
IplImage *img = cvLoadImage(argv[1], CV_LOAD_IMAGE_COLOR );
IplImage *red = cvCreateImage(cvSize(img->width, img->height ),img->depth,img->nChannels);
IplImage *green = cvCreateImage(cvSize(img->width, img->height ),img- >depth,img>nChannels);
IplImage *blue = cvCreateImage(cvSize(img->width, img->height ),img->depth,img->nChannels);
// setup the pointer to access img data
uchar *pImg = ( uchar* )img->imageData;
// setup pointer to write data
uchar *pRed = ( uchar* )red->imageData;
uchar *pGreen = ( uchar* )green->imageData;
uchar *pBlue = ( uchar* )blue->imageData;
int i, j, rED, gREEN, bLUE, byte;
for( i = 0 ; i < img->height ; i++ )
{
for( j = 0 ; j < img->width ; j++ )
{
rED = pImg[i*img->widthStep + j*img->nChannels + 2];
gREEN = pImg[i*img->widthStep + j*img->nChannels + 1];
bLUE = pImg[i*img->widthStep + j*img->nChannels + 0];
// RED
pRed[i*img->widthStep + j*img->nChannels + 2] = rED;
// GREEN
pGreen[i*img->widthStep + j*img->nChannels + 1] = gREEN;
// BLUE
pBlue[i*img->widthStep + j*img->nChannels + 0] = bLUE;
}
}
// save images
cvSaveImage( argv[2], red );
cvSaveImage( argv[3], green );
cvSaveImage( argv[4], blue );
return 0;
}
The error is debug assertion failed.
expression:
invalid null pointer
this is piece of code where there is a break point.
#ifdef _DEBUG
_CRTIMP2_PURE void __CLRCALL_PURE_OR_CDECL _Debug_message(const wchar_t *message,
const wchar_t *file, unsigned int line)
{ // report error and die
if(::_CrtDbgReportW(_CRT_ASSERT, file, line, NULL, message)==1)
{
::_CrtDbgBreak();
}
}
the yellow arrow is pointing to ::_CrtDbgBreak()
Perhaps not the only problem in that snippet, but there's a typo on this line:
IplImage *green = cvCreateImage(cvSize(img->width, img->height ),img->depth,img>nChannels);
You're passing img>nChannels, not img->nChannels
I wrote an OpenCL kernel for a 3x3 Sobel filter, and currently it's running in about 17 millis on a 2k x 2k image. This isn't as a fast as I had hoped; does anyone have any suggestions for how to improve the speed? I've followed most of the suggestions on the checklist for optimizing kernels. My processor is an Intel i5-3450. The workgroup size is 8x8, and the number of workitems is height x width / 16, which is 2048 x 128 on the images I'm running on.
__kernel void localCacheSobelFilter(
const __global char16* src,
__write_only __global float16* angle,
__write_only __global float16* mag,
const int width,
const int height)
{
// Cache the data we're looking at in __local space
const int row = get_global_id(0);
const int col = get_global_id(1);
const int cacheRow = get_local_id(0) + 1;
const int cacheCol = get_local_id(1) + 1;
__local char16 cache[BLOCK_SIZE + 2][BLOCK_SIZE + 2];
cache[cacheRow][cacheCol] = src[ indexOf(row, col) ];
// --- Deal with the boundary conditions
// This adds in the rows above and below the local block,
// ignoring the corners.
const bool atTopRow = (cacheRow == 1);
const bool atBottomRow = (cacheRow == BLOCK_SIZE);
if(atTopRow) {
cache[0][cacheCol] = src[ indexOf(row - 1, col) ];
} else if (atBottomRow) {
cache[BLOCK_SIZE + 1][cacheCol] = src[ indexOf(row + 1, col) ];
}
// This adds in the columns to the left and right of the local block,
// ignoring the corners.
const bool atLeftCol = (cacheCol == 1);
const bool atRightCol = (cacheCol == BLOCK_SIZE);
if(atLeftCol) {
cache[cacheRow][0].sf = src[ indexOf(row, col - 1) ].sf;
} else if (atRightCol) {
cache[cacheRow][BLOCK_SIZE + 1].s0 = src[ indexOf(row, col + 1) ].s0;
}
// Now finally check the corners
const bool atTLCorner = atTopRow && atLeftCol;
const bool atTRCorner = atTopRow && atRightCol;
const bool atBLCorner = atBottomRow && atLeftCol;
const bool atBRCorner = atBottomRow && atRightCol;
if(atTLCorner) {
cache[0][0].sf = src[ indexOf(row - 1, col - 1) ].sf;
} else if (atTRCorner) {
cache[0][BLOCK_SIZE + 1].s0 = src[ indexOf(row - 1, col + 1) ].s0;
} else if (atBLCorner) {
cache[BLOCK_SIZE + 1][0].sf = src[ indexOf(row + 1, col - 1) ].sf;
} else if (atBRCorner) {
cache[BLOCK_SIZE + 1][BLOCK_SIZE + 1].s0 = src[ indexOf(row + 1, col + 1) ].s0;
}
barrier(CLK_LOCAL_MEM_FENCE);
//===========================================================================
// Do the calculation
// [..., pix00] upperRow [pix02, ...]
// [..., pix10] centerRow [pix12, ...]
// [..., pix20] lowerRow [pix22, ...]
const char pix00 = cache[cacheRow - 1][cacheCol - 1].sf;
const char pix10 = cache[cacheRow ][cacheCol - 1].sf;
const char pix20 = cache[cacheRow + 1][cacheCol - 1].sf;
const char16 upperRow = cache[cacheRow - 1][cacheCol];
const char16 centerRow = cache[cacheRow ][cacheCol];
const char16 lowerRow = cache[cacheRow + 1][cacheCol];
const char pix02 = cache[cacheRow - 1][cacheCol + 1].s0;
const char pix12 = cache[cacheRow ][cacheCol + 1].s0;
const char pix22 = cache[cacheRow + 1][cacheCol + 1].s0;
// Do the calculations for Gy
const char16 upperRowShiftLeft = (char16)(upperRow.s123456789abcdef, pix02);
const char16 upperRowShiftRight = (char16)(pix00, upperRow.s0123456789abcde);
const char16 lowerRowShiftLeft = (char16)(lowerRow.s123456789abcdef, pix22);
const char16 lowerRowShiftRight = (char16)(pix20, lowerRow.s0123456789abcde);
const float16 Gy = convert_float16(
(upperRowShiftLeft + 2 * upperRow + upperRowShiftRight)
- (lowerRowShiftLeft + 2 * lowerRow + lowerRowShiftRight));
// Do the calculations for Gx
const char16 centerRowShiftLeft = (char16)(centerRow.s123456789abcdef, pix12);
const char16 centerRowShiftRight = (char16)(pix10, centerRow.s0123456789abcde);
const float16 Gx = convert_float16(
(upperRowShiftRight + 2 * centerRowShiftRight + lowerRowShiftRight)
- (upperRowShiftLeft + 2 * centerRowShiftLeft + lowerRowShiftLeft));
// Find the angle and magnitude
angle[ indexOf(row, col) ] = 0.0; //atan2(Gy, Gx);
mag[ indexOf(row, col) ] = ALPHA * max(Gx, Gy) + BETA * min(Gx, Gy);
}
Any help would be greatly appreciated. Thanks!
In your kernel you have a lot of "if" to avoid the edge effect. But it is time consuming and useless for 99% time.
I think you can change the global work size in NDRangeKernel and use an offset to avoid edge effect.
Ex:
Offset = {1,1,0}
GlobalWorkSize = { width-2, height-2,0}