SDL2 problem with displaying a texture in c - sdl-2

os : ArchLinux up to date.
Hello,
with this code :
#include <SDL2/SDL.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
/* SDL */
SDL_Window *fenetre = NULL;
SDL_Renderer *rendu = NULL;
SDL_Texture *texture = NULL;
/* Init SDL */
if(0 != SDL_Init(SDL_INIT_VIDEO))
{
fprintf(stderr, "Erreur SDL_Init : %s", SDL_GetError());
goto Quit;
}
fenetre = SDL_CreateWindow(" Essai texture", SDL_WINDOWPOS_UNDEFINED,
SDL_WINDOWPOS_UNDEFINED, 500, 500, SDL_WINDOW_SHOWN);
if(NULL == fenetre)
{
fprintf(stderr, "Erreur SDL_CreateWindow : %s", SDL_GetError());
goto Quit;
}
rendu = SDL_CreateRenderer(fenetre, -1, SDL_RENDERER_ACCELERATED);
if(NULL == rendu)
{
fprintf(stderr, "Erreur SDL_CreateRenderer : %s", SDL_GetError());
goto Quit;
}
/* Color */
if(0 != SDL_SetRenderDrawColor(rendu, 242, 243, 244, 255))
{
fprintf(stderr, "Erreur SDL_SetRenderDrawColor : %s", SDL_GetError());
goto Quit;
}
/* renderClear */
if(0 != SDL_RenderClear(rendu))
{
fprintf(stderr, "Erreur SDL_RenderClear : %s", SDL_GetError());
goto Quit;
}
/* Create la texture */
texture = SDL_CreateTexture(rendu, SDL_PIXELFORMAT_RGBA8888,
SDL_TEXTUREACCESS_TARGET, 400, 400);
if(NULL == texture)
{
fprintf(stderr, "Erreur SDL_CreateTexture : %s", SDL_GetError());
goto Quit;
}
/* Colore la texture */
SDL_SetRenderTarget(rendu, texture);
SDL_SetRenderDrawColor(rendu, 242, 110, 160, 255);
SDL_RenderFillRect(rendu, NULL);
SDL_SetRenderTarget(rendu, NULL);
/* Show texture in bottom */
SDL_Rect dst = {50, 250, 400, 200};
SDL_RenderCopy(rendu, texture, NULL, &dst);
SDL_RenderPresent(rendu);
SDL_Delay(3000); //
/* And show texture in top */
dst.y = 50;
SDL_RenderCopy(rendu, texture, NULL, &dst);
SDL_RenderPresent(rendu);
SDL_Delay(3000);
Quit:
if(NULL != texture)
SDL_DestroyTexture(texture);
if(NULL != rendu)
SDL_DestroyRenderer(rendu);
if(NULL != fenetre)
SDL_DestroyWindow(fenetre);
SDL_Quit();
return 0;
}
at the second SDL_RenderPresent, the texture display works well but the window in the background is black.
With flag SDL_RENDERER_SOFTWARE in CreateRenderer, works well.
I try on debian, work well even with the flag SDL_RENDERER_ACCELERATED.
Someone had this little inconvenience with Archlinux and possibly a solution.
thank you in advance.
First RenderPresent
Second RenderPresent

Related

direct x 11 invalid argument when creating vertex shader

Here is the Shader Code File Name:Shader.shader
struct VOut
{
float4 position : SV_POSITION;
float4 color : COLOR;
};
VOut VMain(float4 position : POSITION, float4 color : COLOR)
{
VOut output;
output.position = position;
output.color = color;
return output;
}
float4 PMain(float4 position : SV_POSITION, float4 color : COLOR) : SV_TARGET
{
return color;
}
Here is how I compile my shaders
RasterShader::RasterShader(ID3D11Device* device,LPCWSTR vFile,LPCSTR vEntry,LPCSTR vVersion,LPCWSTR pFile,LPCSTR pEntry,LPCSTR pVersion)
{
ID3DBlob* compiledCode=nullptr;
vShader = nullptr;
pShader = nullptr;
errors = 0;
if (FAILED(Compile(vFile,vEntry,vVersion,&compiledCode)))
{
errors = 1;
return;
}
if (FAILED(device->CreateVertexShader(compiledCode->GetBufferPointer(), compiledCode->GetBufferSize(),nullptr, &vShader)))
{
compiledCode->Release();
MessageBox(NULL, L"Failed To Create Vertex Shader", L"Failed Vertex Shader", MB_OK);
errors = 1;
return;
}
D3D11_INPUT_ELEMENT_DESC desc[] =
{
{"POSITION",0,DXGI_FORMAT_R32G32B32_FLOAT,0,0,D3D11_INPUT_PER_VERTEX_DATA,0}
,{"COLOR",0,DXGI_FORMAT_R32G32B32_FLOAT,0,12,D3D11_INPUT_PER_VERTEX_DATA,0}
};
if (FAILED(device->CreateInputLayout(desc, 2,compiledCode->GetBufferPointer(),compiledCode->GetBufferSize(),&inputLayout)))
{
compiledCode->Release();
MessageBox(NULL, L"Failed To Create Input Layout", L"Failed Input Layout", MB_OK);
errors = 1;
return;
}
compiledCode->Release();
if (FAILED(Compile(pFile,pEntry,pVersion,&compiledCode)))
{
errors = 1;
return;
}
if (FAILED(device->CreatePixelShader(compiledCode->GetBufferPointer(), compiledCode->GetBufferSize(), nullptr, &pShader)))
{
compiledCode->Release();
MessageBox(NULL, L"Failed To Create Pixel Shader", L"Failed Pixel Shader", MB_OK);
errors = 1;
return;
}
compiledCode->Release();
}
HRESULT RasterShader::Compile(LPCWSTR fileName,LPCSTR entry,LPCSTR version,ID3DBlob** code)
{
ID3DBlob* errors=nullptr;
HRESULT hr = D3DCompileFromFile(fileName,nullptr,nullptr
,entry,version
,0,0,code,&errors);
if (FAILED(hr))
{
if (errors!=nullptr)
{
CString data((char*)errors->GetBufferPointer());
MessageBox(NULL, data.GetBuffer(), L"Shader Compile Errors", MB_OK);
data.ReleaseBuffer();
errors->Release();
}
if (code) { (*code)->Release(); }
}
return hr;
}
RasterShader * RasterShader::Create(ID3D11Device* device,LPCWSTR vFile,LPCSTR vMain,LPCSTR vVersion,LPCWSTR pFile,LPCSTR pMain,LPCSTR pVersion)
{
RasterShader* shader = new RasterShader(device,vFile,vMain,vVersion,pFile,pMain,pVersion);
if (shader->errors == 1)
{
delete shader;
shader = nullptr;
}
return shader;
}
Here is how I create my shader
shader = RasterShader::Create(directx->getDevice(), L"Shader.shader","VMain","vs_4_0",L"Shader.shader","PMain","ps_4_0");
if (shader == nullptr)
{
errors = 1;
return;
}
Here is how I create my device
D3D_FEATURE_LEVEL levels[] = {
D3D_FEATURE_LEVEL_9_1,
D3D_FEATURE_LEVEL_9_2,
D3D_FEATURE_LEVEL_9_3,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_11_1
};
//CREATE DEVICE AND CONTEXT
HRESULT hr = D3D11CreateDevice(nullptr,D3D_DRIVER_TYPE_HARDWARE,0,D3D11_CREATE_DEVICE_BGRA_SUPPORT | D3D10_CREATE_DEVICE_DEBUG
,levels,ARRAYSIZE(levels),D3D11_SDK_VERSION
,&device,&level,&context);
The Returned/Supported feature level is 9_1
The program breaks at the point I create my VertexShader with the Message "FAILED TO CREATE VERTEX SHADER"
When I analyze the HRESULT returned by device->CreateVertexShader() I get the Error Code
E_INVALIDARG
When debug layer was enabled I got this error
CreateVertexShader: Encoded Vertex Shader size doesn't match specified size. [ STATE_CREATION ERROR #166: CREATEVERTEXSHADER_INVALIDSHADERBYTECODE]
I tired enquiring the size of my buffer. Not sure if helpful or if I did it right.
wchar_t buffer[256];
wsprintf(buffer, L"%d",sizeof((*code)->GetBufferPointer()));
MessageBox(NULL, buffer, L"A", MB_OK);
It prints 4
wchar_t buffer[256];
wsprintf(buffer, L"%d",sizeof((*code)->GetBufferSize()));
MessageBox(NULL, buffer, L"A", MB_OK);
It prints 164
Any help would be greatly appreciated. Thank U
Eurika I got it !!
Turns out my profile should be combined with my feature level as follows
Vertex Shader : vs_4_0_level_9_1
Pixel Shader : ps_4_0_level_9_1
Thank's Everyone :)

How to determine font used in dialog window

How to determine font used for some control in some dialog window in runnning process on Windows? Something like Microsoft Spy++ does.
I did not find this functionality in Spy++, but here's a small program that I just wrote for this task:
#include <windows.h>
#include <stdio.h>
int main(int argc, char **argv)
{
if (argc != 2) {
fprintf(stderr, "Usage: findfont WINDOWTITLE\n");
return 1;
}
LPCSTR title = argv[1];
HWND hWnd = FindWindow(NULL, title);
if (hWnd == NULL) {
fprintf(stderr, "Window titled \"%s\" not found\n", title);
return 1;
}
HFONT hFont = (HFONT) SendMessage(hWnd, WM_GETFONT, 0, 0);
if (hFont == NULL) {
fprintf(stderr, "WM_GETFONT failed\n");
return 1;
}
LOGFONT lf = { 0 };
if (!GetObject(hFont, sizeof(LOGFONT), &lf)) {
fprintf(stderr, "GetObject failed\n");
return 1;
}
printf("Face name: %s Height: %ld\n", lf.lfFaceName, lf.lfHeight);
return 0;
}

How to show the GStreamer video in Qt frameless window ?

I am trying to show the gstreamer video in qt frameless window .
My gstreamer pipeline for which i am creating code is : gst-launch-1.0 -v rtspsrc location=rtsp://192.168.1.15:8554/test ! rtpjitterbuffer ! rtph264depay ! avdec_h264 ! d3dvideosink sync=false
This is my first question please answer me. My code is as below
My code is working for a qt window. It is showing the video which it is receiving
from rtsp link but i have two major issues in it:
1. When i minimize this window, It loses it's output video and starts to show a blank screen. 2. I want to open this video in a frameless window but if i do so then it displays nothing.
I am using Qt-4.8.12 and gstreamer version 1.4.5 for windows 7 64 bit. Any help regarding these two issues is highly appreciated. Thanks in Advance.
#include <glib.h>
#include <gst/gst.h>
#include <gst/video/videooverlay.h>
#include <QApplication>
#include <QTimer>
#include <QWidget>
#include <stdio.h>
#include "qmainwindow.h"
static void on_pad_added (GstElement *element, GstPad *pad, gpointer data);
static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data);
int main(int argc, char *argv[])
{
if (!g_thread_supported ())
g_thread_init (NULL);
/* Initialize GStreamer */
gst_init (&argc, &argv);
QApplication app(argc, argv);
app.connect(&app, SIGNAL(lastWindowClosed()), &app, SLOT(quit ()));
/* Creating Elements */
//GstElement *pipeLine = gst_pipeline_new ("xvoverlay");
QWidget window;
// QMainWindow window;
window.resize(1024,768);
WId xwinid=window.winId();
GMainLoop *loop;
GstElement *pipeLine, *rtspSrc, *rtpJitterBuffer, *rtpH264Depay, *avDecH264, *videoSink;
rtspSrc = gst_element_factory_make("rtspsrc", NULL);
rtpJitterBuffer = gst_element_factory_make("rtpjitterbuffer", NULL);
rtpH264Depay = gst_element_factory_make("rtph264depay", NULL);
avDecH264 = gst_element_factory_make("avdec_h264", NULL);
videoSink = gst_element_factory_make("d3dvideosink", NULL);
loop = g_main_loop_new (NULL, FALSE);
if (!rtspSrc || !rtpJitterBuffer || !rtpH264Depay || !avDecH264 || !videoSink)
{
g_printerr ("Not all elements could be created.\n");
return -1;
}
/* Set element properties */
g_object_set( rtspSrc, "location", "rtsp://192.168.1.16:8554/test" , NULL);
g_object_set( videoSink, "sync", false, NULL);
/*Initializing Pipeline*/
pipeLine = gst_pipeline_new ("TestPipeLine");
if (!pipeLine)
{
g_printerr ("Pipeline could not be created.");
}
/* we add a message handler */
GstBus *bus = gst_pipeline_get_bus (GST_PIPELINE (pipeLine));
gst_bus_add_watch (bus, bus_call, loop);
gst_object_unref (bus);
/*Adding Components to the pipeline */
gst_bin_add_many (GST_BIN(pipeLine),
rtspSrc,
rtpJitterBuffer,
rtpH264Depay,
avDecH264,
videoSink,
NULL);
/* if (gst_element_link (rtspSrc, rtpJitterBuffer) != TRUE)
{
g_printerr ("rtspSrc & rtpJitterBuffer could not be linked.\n");
gst_object_unref (pipeLine);
return -1;
}
*/
if (gst_element_link (rtpJitterBuffer, rtpH264Depay) != TRUE)
{
g_printerr ("rtpJitterBuffer and rtpH264Depay could not be linked.\n");
gst_object_unref (pipeLine);
return -1;
}
if (gst_element_link (rtpH264Depay, avDecH264) != TRUE)
{
g_printerr ("rtpH264Depay and avDecH264 could not be linked.\n");
gst_object_unref (pipeLine);
return -1;
}
if (gst_element_link (avDecH264, videoSink) != TRUE)
{
g_printerr ("avDecH264 and videoSink could not be linked.\n");
gst_object_unref (pipeLine);
return -1;
}
g_signal_connect (rtspSrc, "pad-added", G_CALLBACK (on_pad_added), rtpJitterBuffer);
window.setWindowFlags(Qt::FramelessWindowHint);
// else
// g_printerr("Pipeline created");zz
gst_video_overlay_set_window_handle (GST_VIDEO_OVERLAY(videoSink), guintptr(xwinid));
window.show();
/* Set the pipeline to "playing" state*/
g_print ("Now playing: %s\n", argv[1]);
gst_element_set_state (pipeLine, GST_STATE_PLAYING);
app.exec();
/* Iterate */
g_print ("Running...\n");
g_main_loop_run (loop);
/* Out of the main loop, clean up nicely */
g_print ("Returned, stopping playback\n");
gst_element_set_state (pipeLine, GST_STATE_NULL);
g_print ("Deleting pipeline\n");
gst_object_unref (GST_OBJECT (pipeLine));
return 0;
}
static void on_pad_added (GstElement *element, GstPad *pad, gpointer data)
{
GstPad *sinkpad;
GstElement *decoder = (GstElement *) data;
/* We can now link this pad with the vorbis-decoder sink pad */
g_print ("Dynamic pad created, linking demuxer/decoder\n");
sinkpad = gst_element_get_static_pad (decoder, "sink");
gst_pad_link (pad, sinkpad);
gst_object_unref (sinkpad);
}
static gboolean bus_call (GstBus *bus, GstMessage *msg, gpointer data)
{
GMainLoop *loop = (GMainLoop *) data;
switch (GST_MESSAGE_TYPE (msg)) {
case GST_MESSAGE_EOS:
g_print ("End of stream\n");
g_main_loop_quit (loop);
break;
case GST_MESSAGE_ERROR: {
gchar *debug;
GError *error;
gst_message_parse_error (msg, &error, &debug);
g_free (debug);
g_printerr ("Error: %s\n", error->message);
g_error_free (error);
g_main_loop_quit (loop);
break;
}
default:
break;
}
return TRUE;
}
If you switch back to using QWindow, this will work:
window.setWindowFlags (Qt::FramelessWindowHint);
I'm not sure how, but I think something similar is available for QWidget.

Trying to pair Bluetooth with Windows 7 API return Timeout (Code 258)

I am trying to replace the Windows Bluetooth pairing. I found some code that was suppose to do this, and I mostly mimiced that code. Although, when I run the code which is below I always get a 258 error code.
Below is the code that does the actual pairing.
#include <stdio.h>
#include <tchar.h>
#include <stdlib.h>
#include <initguid.h>
#include <winsock2.h>
#include <ws2bth.h>
#include <BluetoothAPIs.h>
bool BluetoothAuthCallback(LPVOID pvParam, PBLUETOOTH_AUTHENTICATION_CALLBACK_PARAMS pAuthCallbackParams)
{
DWORD dwRet;
fprintf(stderr, "BluetoothAuthCallback 0x%x\n", pAuthCallbackParams->deviceInfo.Address.ullLong);
dwRet = BluetoothSendAuthenticationResponse(NULL, &(pAuthCallbackParams->deviceInfo), L"1234");
if(dwRet != ERROR_SUCCESS)
{
fprintf(stderr, "BluetoothSendAuthenticationResponse ret %d\n", dwRet);
ExitProcess(2);
return 1;
}
fprintf(stderr, "BluetoothAuthCallback finish\n");
ExitProcess(0);
return 1;
}
int _tmain(int argc, _TCHAR* argv[])
{
SOCKADDR_BTH sa = { 0 };
int sa_len = sizeof(sa);
DWORD dwRet;
BLUETOOTH_DEVICE_INFO btdi = {0};
HBLUETOOTH_AUTHENTICATION_REGISTRATION hRegHandle = 0;
// initialize windows sockets
WORD wVersionRequested;
WSADATA wsaData;
wVersionRequested = MAKEWORD( 2, 0 );
if( WSAStartup( wVersionRequested, &wsaData ) != 0 ) {
ExitProcess(2);
}
// parse the specified Bluetooth address
if( argc < 2 ) {
fprintf(stderr, "usage: rfcomm-client <addr>\n"
"\n addr must be in the form (XX:XX:XX:XX:XX:XX)");
ExitProcess(2);
}
if( SOCKET_ERROR == WSAStringToAddress( argv[1], AF_BTH,
NULL, (LPSOCKADDR) &sa, &sa_len ) ) {
ExitProcess(2);
}
btdi.dwSize = sizeof(BLUETOOTH_DEVICE_INFO);
btdi.Address.ullLong = sa.btAddr;
btdi.ulClassofDevice = 0;
btdi.fConnected = false;
btdi.fRemembered = false;
btdi.fAuthenticated = false;
dwRet = BluetoothRegisterForAuthenticationEx(&btdi, &hRegHandle, (PFN_AUTHENTICATION_CALLBACK_EX)&BluetoothAuthCallback, NULL);
if(dwRet != ERROR_SUCCESS)
{
fprintf(stderr, "BluetoothRegisterForAuthenticationEx ret %d\n", dwRet);
ExitProcess(2);
}
dwRet = BluetoothAuthenticateDeviceEx(NULL, NULL, &btdi, NULL,MITMProtectionNotRequired);
if(dwRet != ERROR_SUCCESS)
{
fprintf(stderr, "BluetoothAuthenticateDevice ret %d\n", dwRet);
ExitProcess(2);
}
Sleep(1000);
fprintf(stderr, "pairing finish\n");
ExitProcess(0);
return 0;
}
Below this is how I find the address to pass into the pairing application.
// BluetoothAddressFinder.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <initguid.h>
#include <winsock2.h>
#include <ws2bth.h>
#include <BluetoothAPIs.h>
int _tmain(int argc, _TCHAR* argv[])
{
BLUETOOTH_RADIO_INFO m_bt_info = {sizeof(BLUETOOTH_RADIO_INFO),0,};
BLUETOOTH_FIND_RADIO_PARAMS m_bt_find_radio = {sizeof(BLUETOOTH_FIND_RADIO_PARAMS)};
BLUETOOTH_DEVICE_INFO m_device_info = {sizeof(BLUETOOTH_DEVICE_INFO),0,};
BLUETOOTH_DEVICE_SEARCH_PARAMS m_search_params = {
sizeof(BLUETOOTH_DEVICE_SEARCH_PARAMS),
1, 0,
1,1,1,15,NULL
};
HANDLE m_radio = NULL;
HBLUETOOTH_RADIO_FIND m_bt = NULL;
HBLUETOOTH_DEVICE_FIND m_bt_dev = NULL;
int m_radio_id;
DWORD mbtinfo_ret;
int tempNumberOfDevices = 0;
m_bt = BluetoothFindFirstRadio(&m_bt_find_radio, &m_radio);
m_radio_id = 0;
do {
// Then get the radio device info....
mbtinfo_ret = BluetoothGetRadioInfo(m_radio, &m_bt_info);
// If there was an issue with the current radio check the next radio
if(mbtinfo_ret != ERROR_SUCCESS)
continue;
m_search_params.hRadio = m_radio;
ZeroMemory(&m_device_info, sizeof(BLUETOOTH_DEVICE_INFO));
m_device_info.dwSize = sizeof(BLUETOOTH_DEVICE_INFO);
// Next for every radio, get the device
m_bt_dev = BluetoothFindFirstDevice(&m_search_params, &m_device_info);
// Get the device info
do {
tempNumberOfDevices+=1;
fprintf(stdout, "Device name: %S Device Address: %d:%d:%d:%d:%d:%d \n" ,m_device_info.szName,m_device_info.Address.rgBytes[5],m_device_info.Address.rgBytes[4], m_device_info.Address.rgBytes[3], m_device_info.Address.rgBytes[2], m_device_info.Address.rgBytes[1], m_device_info.Address.rgBytes[0] );
} while(BluetoothFindNextDevice(m_bt_dev, &m_device_info));
} while(BluetoothFindNextRadio(&m_bt_find_radio, &m_radio));
}

OpenCL stackoverflow. How to solve it?

I'm having a problem when I try to run the reduction program from the OpenCL in Action's sources.
Im using Visual Studio 2008. This is the error:
Unhandled exception in 0x013526a7 in Reduction.exe: 0xC00000FD: Stack
overflow.
And in the asm file the cursor is to
test dword ptr [eax],eax ; probe page.
I tried to debug it, but when I put a breakpoint in the main function, the debugging starts, but the program does not keep running.
I don't know what is the really problem.
These are the source files:
reduction.cpp
#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "reduction_complete.cl"
#define ARRAY_SIZE 1048576
#define KERNEL_1 "reduction_vector"
#define KERNEL_2 "reduction_complete"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {
cl_platform_id platform;
cl_device_id dev;
int err;
/* Identify a platform */
err = clGetPlatformIDs(1, &platform, NULL);
if(err < 0) {
perror("Couldn't identify a platform");
exit(1);
}
/* Access a device */
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
if(err == CL_DEVICE_NOT_FOUND) {
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
}
if(err < 0) {
perror("Couldn't access any devices");
exit(1);
}
return dev;
}
/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
int err;
/* Read program file and place content into buffer */
program_handle = fopen(filename, "r");
if(program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
/* Create program from file */
program = clCreateProgramWithSource(ctx, 1,
(const char**)&program_buffer, &program_size, &err);
if(err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if(err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*) malloc(log_size + 1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
log_size + 1, program_log, NULL);
printf("%s\n", program_log);
free(program_log);
exit(1);
}
return program;
}
int main() {
/* OpenCL structures */
cl_device_id device;
cl_context context;
cl_program program;
cl_kernel vector_kernel, complete_kernel;
cl_command_queue queue;
cl_event start_event, end_event;
cl_int i, err;
size_t local_size, global_size;
/* Data and buffers */
float data[ARRAY_SIZE];
float sum, actual_sum;
cl_mem data_buffer, sum_buffer;
cl_ulong time_start, time_end, total_time;
/* Initialize data */
for(i=0; i<ARRAY_SIZE; i++) {
data[i] = 1.0f*i;
}
/* Create device and determine local size */
device = create_device();
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(local_size), &local_size, NULL);
if(err < 0) {
perror("Couldn't obtain device information");
exit(1);
}
/* Create a context */
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err < 0) {
perror("Couldn't create a context");
exit(1);
}
/* Build program */
program = build_program(context, device, PROGRAM_FILE);
/* Create data buffer */
data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err);
sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float), NULL, &err);
if(err < 0) {
perror("Couldn't create a buffer");
exit(1);
};
/* Create a command queue */
queue = clCreateCommandQueue(context, device,
CL_QUEUE_PROFILING_ENABLE, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Create kernels */
vector_kernel = clCreateKernel(program, KERNEL_1, &err);
complete_kernel = clCreateKernel(program, KERNEL_2, &err);
if(err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Set arguments for vector kernel */
err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL);
/* Set arguments for complete kernel */
err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL);
err |= clSetKernelArg(complete_kernel, 2, sizeof(cl_mem), &sum_buffer);
if(err < 0) {
perror("Couldn't create a kernel argument");
exit(1);
}
/* Enqueue kernels */
global_size = ARRAY_SIZE/4;
err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size,
&local_size, 0, NULL, &start_event);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
printf("Global size = %zu\n", global_size);
/* Perform successive stages of the reduction */
while(global_size/local_size > local_size) {
global_size = global_size/local_size;
err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size,
&local_size, 0, NULL, NULL);
printf("Global size = %zu\n", global_size);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
}
global_size = global_size/local_size;
err = clEnqueueNDRangeKernel(queue, complete_kernel, 1, NULL, &global_size,
NULL, 0, NULL, &end_event);
printf("Global size = %zu\n", global_size);
/* Finish processing the queue and get profiling information */
clFinish(queue);
clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL);
total_time = time_end - time_start;
/* Read the result */
err = clEnqueueReadBuffer(queue, sum_buffer, CL_TRUE, 0,
sizeof(float), &sum, 0, NULL, NULL);
if(err < 0) {
perror("Couldn't read the buffer");
exit(1);
}
/* Check result */
actual_sum = 1.0f * (ARRAY_SIZE/2)*(ARRAY_SIZE-1);
if(fabs(sum - actual_sum) > 0.01*fabs(sum))
printf("Check failed.\n");
else
printf("Check passed.\n");
printf("Total time = %lu\n", total_time);
/* Deallocate resources */
clReleaseEvent(start_event);
clReleaseEvent(end_event);
clReleaseMemObject(sum_buffer);
clReleaseMemObject(data_buffer);
clReleaseKernel(vector_kernel);
clReleaseKernel(complete_kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
reduction_complete.cl
__kernel void reduction_vector(__global float4* data,
__local float4* partial_sums) {
int lid = get_local_id(0);
int group_size = get_local_size(0);
partial_sums[lid] = data[get_global_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = group_size/2; i>0; i >>= 1) {
if(lid < i) {
partial_sums[lid] += partial_sums[lid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0) {
data[get_group_id(0)] = partial_sums[0];
}
}
__kernel void reduction_complete(__global float4* data,
__local float4* partial_sums, __global float* sum) {
int lid = get_local_id(0);
int group_size = get_local_size(0);
partial_sums[lid] = data[get_local_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = group_size/2; i>0; i >>= 1) {
if(lid < i) {
partial_sums[lid] += partial_sums[lid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0) {
*sum = partial_sums[0].s0 + partial_sums[0].s1 +
partial_sums[0].s2 + partial_sums[0].s3;
}
}
I dont know what causes the stackoverflow...
I don't see any recursion so my guess is the float data[ARRAY_SIZE]; where #define ARRAY_SIZE 1048576 is putting 4MB on the stack which is pretty large. Try changing that to a dynamic allocation.

Resources