Windows WriteFile and ReadFile interferes with each other

Windows WriteFile and ReadFile interferes with each other - windows

I have two threads, one calls WriteFile, the other calls ReadFile, both uses the same serial port at 115200. What I have found repeatedly is that when I add more sequential WriteFile calls in the writing thread, is that I have an increased number of check-sum failures i.e. corrupt data. If I add delays between the WriteFile calls in the writing thread (using CBaseDLIMessage::CrossSleep(20) ), the check-sum errors reduce or even completely go away (depending on the sleep time), however the problem comes back when adding additional WriteFile methods.
Is it possible that the WriteFile method can somehow write data into a shared buffer, which is also used for reading (at driver level), which subsequently corrupts my read data?
My serial setup code looks like this:
CommHandle OpenSerialPort(int nId, long baud)
{
#if defined(USE_WINDOWS_SERIAL_PORT_LIB)
char szCOM[16];
/* COM waiting */
COMMTIMEOUTS g_cto =
{
MAXDWORD, /* ReadIntervalTimeOut */
0, /* ReadTotalTimeOutMultiplier */
0, /* ReadTotalTimeOutConstant */
0, /* WriteTotalTimeOutMultiplier */
0 /* WriteTotalTimeOutConstant */
};
/* COM configuration */
DCB g_dcb =
{
sizeof(DCB), /* DCBlength */
baud, /* BaudRate */
TRUE, /* fBinary */
FALSE, /* fParity */
FALSE, /* fOutxCtsFlow */
FALSE, /* fOutxDsrFlow */
DTR_CONTROL_DISABLE, /* fDtrControl */
FALSE, /* fDsrSensitivity */
FALSE, /* fTXContinueOnXoff */
FALSE, /* fOutX */
FALSE, /* fInX */
FALSE, /* fErrorChar */
FALSE, /* fNull */
RTS_CONTROL_DISABLE, /* fRtsControl */
FALSE, /* fAbortOnError */
0, /* fDummy2 */
0, /* wReserved */
0x100, /* XonLim */
0x100, /* XoffLim */
8, /* ByteSize */
NOPARITY, /* Parity */
ONESTOPBIT, /* StopBits */
0x11, /* XonChar */
0x13, /* XoffChar */
'?', /* ErrorChar */
0x1A, /* EofChar */
0x10 /* EvtChar */
};
//cout << "COM:::" << nId;
sprintf(szCOM, "\\\\.\\COM%d", nId);
HANDLE hCOM = CreateFile(szCOM, /* lpFileName */
GENERIC_READ|GENERIC_WRITE, /* dwDesiredAccess */
0, /* dwShareMode */
NULL, /* lpSecurityAttributes */
OPEN_EXISTING, /* dwCreationDisposition */
FILE_ATTRIBUTE_SYSTEM, /* dwFlagsAndAttributes */
NULL); /* hTemplateFile */
//std::cout << "COM: " << (int)nId << "\n";
if(hCOM == INVALID_HANDLE_VALUE)
{
//std::cout << "INVALID_HANDLE_VALUE \n" ;
DWORD err = GetLastError();
if (err == ERROR_FILE_NOT_FOUND)
{
std::cout << "Error: Failed to open COM[" << nId << "]\n";
}
else if(err == ERROR_INVALID_NAME)
{
std::cout << "\nError: \n%s 'filename, directory name, or volume label syntax is incorrect'\n"; //error code 0x7B
}
else
{
//cout << "\nHandle creation error code: %x\n" << err;
}
return 0;
}
else
{
std::cout << "COM[" << nId << "] OPEN. Baud: " << baud <<"\n";
}
// COM buffer size
SetupComm(hCOM, RX_SIZE, TX_SIZE);
// COM config
if(!SetCommTimeouts(hCOM, &g_cto) || !SetCommState(hCOM, &g_dcb))
{
CloseHandle(hCOM);
return 0;
}
// COM buffer purge
PurgeComm(hCOM, PURGE_TXCLEAR|PURGE_RXCLEAR|PURGE_TXABORT|PURGE_RXABORT);
EscapeCommFunction(hCOM, SETDTR);
return hCOM;
#else
wxSerialPort *pwxSerialPort = new wxSerialPort();
char szPort[32];
sprintf(szPort,"com%d", nId);
if(pwxSerialPort->Open(szPort) < 0) {
return 0;
}
// set the baudrate
pwxSerialPort->SetBaudRate((wxBaud)baud);
return pwxSerialPort;
#endif
}
int CloseSerialPort(CommHandle hSerialPort)
{
#if defined(USE_WINDOWS_SERIAL_PORT_LIB)
CloseHandle(hSerialPort);
return 1;
#else
hSerialPort->Close();
delete hSerialPort;
return 1;
#endif
}
int WriteDataToSerialPort(CommHandle hSerialPort, void *pMsg, unsigned short messageLength, unsigned int *pBytesWritten)
{
*pBytesWritten = 0;
if(hSerialPort == 0)
{
//cout << "hSerialPort ====== 0";
return -1;
}
#if defined(USE_WINDOWS_SERIAL_PORT_LIB)
return WriteFile(hSerialPort, (char*)pMsg, messageLength, (DWORD*)pBytesWritten, NULL);
#else
*pBytesWritten = hSerialPort->Write((char*)pMsg, messageLength);
return 1;
#endif
}
int ReadDataFromSerialPort(CommHandle hSerialPort, void *pMsg, unsigned short messageLength, unsigned int *pBytesRead)
{
unsigned short i;
*pBytesRead = 0;
i = 0;
if(hSerialPort == 0)
{
return -1;
}
#if defined(USE_WINDOWS_SERIAL_PORT_LIB)
return ReadFile(hSerialPort, (char*)pMsg, messageLength, (DWORD*)pBytesRead, NULL);
#else
*pBytesRead = hSerialPort->Read((char*)pMsg, messageLength);
return 1;
#endif
}

Communication through serial port is slow.
You should use WriteFileEx instead of WriteFile.
You can specify a routine which may execute when the writting(transmitting) is completed or failed.
If you write next data after that routine has been executed, overflow of data may not be occured.

Related

STM32L486 enable LPUART1 with DMA

I'm trying to enable LPUART1 to write/read traces and debug my code.
Im setting PC0/PC1 pin:
/*Configure GPIO pins : PC0 PC1 */
GPIO_InitStruct.Pin = GPIO_PIN_0|GPIO_PIN_1;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
GPIO_InitStruct.Alternate = GPIO_AF8_LPUART1;
HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);
Created the handlers:
UART_HandleTypeDef hlpuart1;
DMA_HandleTypeDef hdma_lpuart_tx;
DMA_HandleTypeDef hdma_lpuart_rx;
My init function. I call it from main.c:
/* LPUART1 init function */
void MX_LPUART1_UART_Init(void)
{
hlpuart1.Instance = LPUART1;
hlpuart1.Init.BaudRate = 115200;
hlpuart1.Init.WordLength = UART_WORDLENGTH_8B;
hlpuart1.Init.StopBits = UART_STOPBITS_1;
hlpuart1.Init.Parity = UART_PARITY_NONE;
hlpuart1.Init.Mode = UART_MODE_TX_RX;
hlpuart1.Init.HwFlowCtl = UART_HWCONTROL_NONE;
hlpuart1.Init.OneBitSampling = UART_ONE_BIT_SAMPLE_DISABLE;
hlpuart1.AdvancedInit.AdvFeatureInit = UART_ADVFEATURE_NO_INIT;
if (HAL_UART_Init(&hlpuart1) != HAL_OK)
{
Error_Handler();
}
}
The function HAL_UART_Init:
/* USER CODE BEGIN LPUART1_MspInit 0 */
/* USER CODE END LPUART1_MspInit 0 */
/* LPUART1 clock enable */
__HAL_RCC_LPUART1_CLK_ENABLE();
__HAL_RCC_GPIOC_CLK_ENABLE();
/**LPUART1 GPIO Configuration
PC0 ------> LPUART1_RX
PC1 ------> LPUART1_TX
*/
GPIO_InitStruct.Pin = GPIO_PIN_0|GPIO_PIN_1;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
GPIO_InitStruct.Alternate = GPIO_AF8_LPUART1;
HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);
/* LPUART1 DMA Init */
/* LPUART_RX Init */
hdma_lpuart_rx.Instance = DMA2_Channel7;
hdma_lpuart_rx.Init.Request = DMA_REQUEST_4;
hdma_lpuart_rx.Init.Direction = DMA_PERIPH_TO_MEMORY;
hdma_lpuart_rx.Init.PeriphInc = DMA_PINC_DISABLE;
hdma_lpuart_rx.Init.MemInc = DMA_MINC_ENABLE;
hdma_lpuart_rx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
hdma_lpuart_rx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
hdma_lpuart_rx.Init.Mode = DMA_CIRCULAR;
hdma_lpuart_rx.Init.Priority = DMA_PRIORITY_LOW;
if (HAL_DMA_Init(&hdma_lpuart_rx) != HAL_OK)
{
Error_Handler();
}
__HAL_LINKDMA(uartHandle,hdmarx,hdma_usart1_rx);
/* LPUART_TX Init */
hdma_lpuart_tx.Instance = DMA2_Channel6;
hdma_lpuart_tx.Init.Request = DMA_REQUEST_4;
hdma_lpuart_tx.Init.Direction = DMA_MEMORY_TO_PERIPH;
hdma_lpuart_tx.Init.PeriphInc = DMA_PINC_DISABLE;
hdma_lpuart_tx.Init.MemInc = DMA_MINC_ENABLE;
hdma_lpuart_tx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
hdma_lpuart_tx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
hdma_lpuart_tx.Init.Mode = DMA_NORMAL;
hdma_lpuart_tx.Init.Priority = DMA_PRIORITY_LOW;
if (HAL_DMA_Init(&hdma_lpuart_tx) != HAL_OK)
{
Error_Handler();
}
__HAL_LINKDMA(uartHandle,hdmatx,hdma_lpuart_tx);
/* LPUART1 interrupt Init */
HAL_NVIC_SetPriority(LPUART1_IRQn, 0, 0);
HAL_NVIC_EnableIRQ(LPUART1_IRQn);
I have set the NVIC LPUART1 at the end of the last code. I have enable too the DMA2 channel 6 and 7 interrupts:
/**
* Enable DMA controller clock
*/
void MX_DMA_Init(void)
{
/* DMA controller clock enable */
__HAL_RCC_DMA1_CLK_ENABLE();
__HAL_RCC_DMA2_CLK_ENABLE();
/* DMA interrupt init */
/* DMA1_Channel1_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA1_Channel1_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA1_Channel1_IRQn);
/* DMA1_Channel2_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA1_Channel2_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA1_Channel2_IRQn);
/* DMA1_Channel4_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA1_Channel4_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA1_Channel4_IRQn);
/* DMA1_Channel5_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA1_Channel5_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA1_Channel5_IRQn);
/* DMA2_Channel1_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA2_Channel1_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA2_Channel1_IRQn);
/* DMA2_Channel2_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA2_Channel2_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA2_Channel2_IRQn);
/* DMA2_Channel6_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA2_Channel6_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA2_Channel6_IRQn);
/* DMA2_Channel7_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA2_Channel7_IRQn, 5, 0);
HAL_NVIC_EnableIRQ(DMA2_Channel7_IRQn);
}
In my main.c im trying to print something and it always return "HAL_OK":
HAL_UART_Transmit_DMA(&hlpuart1, (uint8_t *)'ACK\n', 5);
Inside the last function, we have set this callback:
/* Set the UART DMA transfer complete callback */
huart->hdmatx->XferCpltCallback = UART_DMATransmitCplt;
And inside this one, we have this callback:
HAL_UART_TxCpltCallback(huart);
void HAL_UART_TxCpltCallback(UART_HandleTypeDef *huart) {
/* Set transmission flag: transfer complete */
BaseType_t xHigherPriorityTaskWoken = pdFALSE;
if(huart->Instance == USART1)
{
if(UART_TX_Acess != NULL)
{
xSemaphoreGiveFromISR(UART_TX_Acess, &xHigherPriorityTaskWoken); //Usart transmision complete
}
}
else if(huart->Instance == LPUART1)
{
modbus_tx_finish = 0;
}
portYIELD_FROM_ISR( xHigherPriorityTaskWoken );
}
Everything seems to be fine coded but DMA2_Channel6_IRQHandler is jumping and and inside DMA IRQhandler an error appears:
/* Transfer Error Interrupt management **************************************/
But I cant understand why...

Ok I noticed the error. I was writting wrong the function to write. THis was the original:
HAL_UART_Transmit_DMA(&hlpuart1, (uint8_t *)'ACK\n', 5);
And this is the change:
HAL_UART_Transmit_DMA(&hlpuart1, (uint8_t *)"ACK\n", 5);

i.MX8 mini GPT implementation issue

I'm trying to use GPT to measuring frequency on a gpio. After some research on the google and Linux's documentation, unfortunately I couldn't see any compatible GPT driver in kernel. I found a patch on google and applied it. But I have a getting clock error now. There is a function which It gets the clock but it's never executing.
P.S : I prepared a platform driver to use exported functions (e.g. mxc_request_input_capture) in timer-imx-gpt.c.
My dts nodes
/*These nodes are child node if aips1*/
gpt1: gpt#302d0000 {
compatible = "fsl,imx8mm-gpt";
reg = <0x302d0000 0x10000>;
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MM_CLK_GPT1>,
<&clk IMX8MM_CLK_GPT1_ROOT>,
<&clk IMX8MM_CLK_GPT_3M>;
clock-names = "ipg", "per", "osc_per";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_gpt_input_capture0>;
status = "okay";
};
iomuxc: pinctrl#30330000 {
compatible = "fsl,imx8mm-iomuxc";
reg = <0x30330000 0x10000>;
pinctrl_gpt_input_capture0: gptinputcapture0grp {
fsl,pins = <
MX8MM_IOMUXC_SAI3_RXFS_GPT1_CAPTURE1 0xd6 /*0x00000000*/
>;
};
};
request gpt input capture func (in timer-imx-gpt.c)
int mxc_request_input_capture(unsigned int chan, mxc_icap_handler_t handler,
unsigned long capflags, void *dev_id)
{
struct imx_timer *imxtm;
struct icap_channel *ic;
unsigned long flags;
u64 start_cycles;
int ret = 0;
u32 mode;
/* we only care about rising and falling flags */
capflags &= (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING);
if (chan > 1 || !handler || !capflags)
return -EINVAL;
printk("mxc_request_input_capture 1\n");
ic = &icap_channel_arr[chan];
imxtm = ic->imxtm;
printk("mxc_request_input_capture 2\n");
if (!imxtm->gpt->gpt_ic_enable)
return -ENODEV; //THIS ERROR CODE RETURN
printk("mxc_request_input_capture 3\n");
spin_lock_irqsave(&icap_lock, flags);
if (ic->handler) {
ret = -EBUSY;
goto out;
}
printk("mxc_request_input_capture 4\n");
ic->handler = handler;
ic->dev_id = dev_id;
switch (capflags) {
case IRQF_TRIGGER_RISING:
mode = V2_IM_RISING;
break;
case IRQF_TRIGGER_FALLING:
mode = V2_IM_FALLING;
break;
default:
mode = V2_IM_BOTH;
break;
}
printk("mxc_request_input_capture 4\n");
/* ack any pending input capture interrupt before enabling */
imxtm->gpt->gpt_ic_irq_acknowledge(ic);
/*
* initialize the cyclecounter. The input capture is capturing
* from the mxc clocksource, so it has the same mask/shift/mult.
*/
memset(&ic->cc, 0, sizeof(ic->cc));
ic->cc.read = gpt_ic_read;
ic->cc.mask = clocksource_mxc.mask;
ic->cc.shift = clocksource_mxc.shift;
ic->cc.mult = clocksource_mxc.mult;
printk("mxc_request_input_capture 5\n");
/* initialize a timecounter for the input capture */
start_cycles = mxc_read_sched_clock();
timecounter_init(&ic->tc, &ic->cc, ktime_get_ns());
printk("mxc_request_input_capture 6\n");
/*
* timecounter_init() read the last captured timer count, but
* that's not the start cycle counter, so update it with the
* real start cycles.
*/
ic->tc.cycle_last = start_cycles;
imxtm->gpt->gpt_ic_enable(ic, mode);
imxtm->gpt->gpt_ic_irq_enable(ic);
printk("mxc_request_input_capture 7\n");
out:
spin_unlock_irqrestore(&icap_lock, flags);
printk("mxc_request_input_capture 8\n");
return ret;
}
EXPORT_SYMBOL_GPL(mxc_request_input_capture);
I realized that imxtm->gpt->gpt_ic_enable variable seems NULL. imxtm structure gets filled in the end of static int __init _mxc_timer_init function. But the function returns error before filling the struct. The error is happening checking the clock. The function which related to getting clock is mxc_timer_init_dt but it never executes.
log:
root:~# dmesg | grep gpt
[ 0.000174] gpt imx6dl_timer_init_dt HEAD
[ 0.000240] gpt _mxc_timer_init
[ 0.000244] gpt imxtm->type 3
[ 0.000249] gpt imxtm->type GPT_TYPE_IMX6DL
[ 0.000252] imx-gpt is NOT null
[ 0.000260] gpt GPT_TYPE_IMX6DL 3
[ 0.000263] gpt mxc_timer_init_dt error -517
[ 16.837810] mxc-timer 302d0000.gpt: GPT Timer Probe Function head
[ 16.843920] mxc-timer 302d0000.gpt: GPT Timer Probe Function end
It start with executing imx6dl_timer_init_dt. mxc_timer_init_dt function must be execute after imx6dl_timer_init_dt to get the clock but it is not executing. Due to failing on getting clock, I am not able to use the gpt module.
some parts of timer-imx-gpt.c
static int __init _mxc_timer_init(struct imx_timer *imxtm)
{
struct icap_channel *ic;
int i, ret;
printk(KERN_INFO "gpt _mxc_timer_init\n");
printk(KERN_INFO "gpt imxtm->type %d \n", imxtm->type);
switch (imxtm->type) {
case GPT_TYPE_IMX1:
imxtm->gpt = &imx1_gpt_data;
printk(KERN_INFO "gpt imxtm->type GPT_TYPE_IMX1 \n");
break;
case GPT_TYPE_IMX21:
imxtm->gpt = &imx21_gpt_data;
printk(KERN_INFO "gpt imxtm->type GPT_TYPE_IMX21 \n");
break;
case GPT_TYPE_IMX31:
imxtm->gpt = &imx31_gpt_data;
printk(KERN_INFO "gpt imxtm->type GPT_TYPE_IMX31 \n");
break;
case GPT_TYPE_IMX6DL:
imxtm->gpt = &imx6dl_gpt_data;
printk(KERN_INFO "gpt imxtm->type GPT_TYPE_IMX6DL \n");
break;
default:
printk(KERN_INFO "gpt imxtm->type DEFAULT \n");
return -EINVAL;
}
imxtm->gpt = &imx6dl_gpt_data;
if(!imxtm->gpt)
printk(KERN_INFO "imx-gpt is null");
printk(KERN_INFO "imx-gpt is NOT null");
if (IS_ERR(imxtm->clk_per)) {
pr_err("i.MX timer: unable to get clk\n"); //I SAW THIS LOG IN LINUX LOGS
return PTR_ERR(imxtm->clk_per); //Function return error at this line
}
printk(KERN_INFO "gpt clk_per success\n");
if (!IS_ERR(imxtm->clk_ipg))
clk_prepare_enable(imxtm->clk_ipg);
printk(KERN_INFO "gpt clk_ipg success\n");
clk_prepare_enable(imxtm->clk_per);
printk(KERN_INFO "gpt clk_prepare_enable\n");
/*
* Initialise to a known state (all timers off, and timing reset)
*/
writel_relaxed(0, imxtm->base + MXC_TCTL);
writel_relaxed(0, imxtm->base + MXC_TPRER); /* see datasheet note */
imxtm->gpt->gpt_oc_setup_tctl(imxtm);
/* init and register the timer to the framework */
ret = mxc_clocksource_init(imxtm);
if (ret)
return ret;
ret = mxc_clockevent_init(imxtm);
if (ret)
return ret;
/*Filling the imx structure. But never reach here*/
for (i = 0; i < 2; i++) {
ic = &icap_channel_arr[i];
ic->imxtm = imxtm;
}
printk(KERN_INFO, "_mxc_timer_init RETURN SUCCESS\n");
return 0;
}
void __init mxc_timer_init(unsigned long pbase, int irq, enum imx_gpt_type type)
{
struct imx_timer *imxtm;
printk(KERN_INFO, "gpt mxc_timer_init HEAD\n");
imxtm = kzalloc(sizeof(*imxtm), GFP_KERNEL);
BUG_ON(!imxtm);
imxtm->clk_per = clk_get_sys("imx-gpt.0", "per");
imxtm->clk_ipg = clk_get_sys("imx-gpt.0", "ipg");
imxtm->base = ioremap(pbase, SZ_4K);
BUG_ON(!imxtm->base);
imxtm->type = type;
imxtm->irq = irq;
_mxc_timer_init(imxtm);
}
/*
* a platform driver is needed in order to acquire pinmux
* for input capture pins. The probe call is also useful
* for setting up the input capture channel structures.
*/
static int mxc_timer_probe(struct platform_device *pdev)
{
struct icap_channel *ic;
int i;
dev_info(&pdev->dev, "GPT Timer Probe Function head\n");
/* setup the input capture channels */
for (i = 0; i < 2; i++) {
ic = &icap_channel_arr[i];
ic->chan = i;
if (i == 0) {
ic->cnt_reg = V2_TCAP1;
ic->irqen_bit = V2_IR_IF1;
ic->status_bit = V2_TSTAT_IF1;
ic->mode_bit = V2_TCTL_IM1_BIT;
} else {
ic->cnt_reg = V2_TCAP2;
ic->irqen_bit = V2_IR_IF2;
ic->status_bit = V2_TSTAT_IF2;
ic->mode_bit = V2_TCTL_IM2_BIT;
}
}
dev_info(&pdev->dev, "GPT Timer Probe Function end\n");
return 0;
}
static int mxc_timer_remove(struct platform_device *pdev)
{
return 0;
}
static const struct of_device_id timer_of_match[] = {
{ .compatible = "fsl,imx1-gpt" },
{ .compatible = "fsl,imx21-gpt" },
{ .compatible = "fsl,imx27-gpt" },
{ .compatible = "fsl,imx31-gpt" },
{ .compatible = "fsl,imx25-gpt" },
{ .compatible = "fsl,imx50-gpt" },
{ .compatible = "fsl,imx51-gpt" },
{ .compatible = "fsl,imx53-gpt" },
{ .compatible = "fsl,imx6q-gpt" },
{ .compatible = "fsl,imx6dl-gpt" },
{ .compatible = "fsl,imx6sl-gpt" },
{ .compatible = "fsl,imx6sx-gpt" },
{ .compatible = "fsl,imx8mm-gpt" },
{ },
};
MODULE_DEVICE_TABLE(of, timer_of_match);
static struct platform_driver mxc_timer_pdrv = {
.probe = mxc_timer_probe,
.remove = mxc_timer_remove,
.driver = {
.name = "mxc-timer",
.owner = THIS_MODULE,
.of_match_table = timer_of_match,
},
};
module_platform_driver(mxc_timer_pdrv);
static int __init mxc_timer_init_dt(struct device_node *np, enum imx_gpt_type type)
{
struct imx_timer *imxtm;
static int initialized;
int ret;
printk(KERN_INFO, "gpt mxc_timer_init_dt head\n");
/* Support one instance only */
if (initialized)
return 0;
imxtm = kzalloc(sizeof(*imxtm), GFP_KERNEL);
if (!imxtm)
return -ENOMEM;
imxtm->base = of_iomap(np, 0);
if (!imxtm->base)
return -ENXIO;
imxtm->irq = irq_of_parse_and_map(np, 0);
if (imxtm->irq <= 0)
return -EINVAL;
imxtm->clk_ipg = of_clk_get_by_name(np, "ipg");
/* Try osc_per first, and fall back to per otherwise */
imxtm->clk_per = of_clk_get_by_name(np, "osc_per");
if (IS_ERR(imxtm->clk_per))
imxtm->clk_per = of_clk_get_by_name(np, "per");
imxtm->type = type;
printk(KERN_INFO, "gpt mxc_timer_init_dt line 883\n");
ret = _mxc_timer_init(imxtm);
if (ret)
return ret;
initialized = 1;
printk(KERN_INFO, "gpt mxc_timer_init_dt end\n");
return 0;
}
static int __init imx1_timer_init_dt(struct device_node *np)
{
return mxc_timer_init_dt(np, GPT_TYPE_IMX1);
}
static int __init imx21_timer_init_dt(struct device_node *np)
{
return mxc_timer_init_dt(np, GPT_TYPE_IMX21);
}
static int __init imx31_timer_init_dt(struct device_node *np)
{
enum imx_gpt_type type = GPT_TYPE_IMX31;
printk(KERN_INFO "gpt imx31_timer_init_dt HEAD");
/*
* We were using the same compatible string for i.MX6Q/D and i.MX6DL/S
* GPT device, while they actually have different programming model.
* This is a workaround to keep the existing i.MX6DL/S DTBs continue
* working with the new kernel.
*/
if (of_machine_is_compatible("fsl,imx6dl"))
type = GPT_TYPE_IMX6DL;
return mxc_timer_init_dt(np, type);
}
static int __init imx6dl_timer_init_dt(struct device_node *np)
{
printk(KERN_INFO "gpt imx6dl_timer_init_dt HEAD");
return mxc_timer_init_dt(np, GPT_TYPE_IMX6DL); //This function must execute to get clock
}
TIMER_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
TIMER_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt);
TIMER_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt);
TIMER_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt);
TIMER_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt);
TIMER_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt);
TIMER_OF_DECLARE(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt);
TIMER_OF_DECLARE(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt);
TIMER_OF_DECLARE(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt);
TIMER_OF_DECLARE(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt);
TIMER_OF_DECLARE(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt);
TIMER_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt);
TIMER_OF_DECLARE(imx8mm_timer, "fsl,imx8mm-gpt", imx6dl_timer_init_dt); //This line added after applying patch
I will be very grateful for your help

Deepstream back to back detectors with DashCamNetand VehicleMakeNet not classifying correctly

Question also posted on their forum here: https://forums.developer.nvidia.com/t/deepstream-back-to-back-detectors-with-dashcamnetand-vehiclemakenet-not-classifying-correctly/220606
Hello,
Hardware Platform (Jetson / GPU) dGPU RTX 3080 on Ubuntu 20.04.1
DeepStream Version 6.1
TensorRT Version 8.4.1
NVIDIA GPU Driver Version (valid for GPU only) 515.48.07
I'm trying to use the back-to-back-detectors reference C application with the DashCamNet and VehicleMakeNet models. The DashCamNet detector works, but the VehicleMakeNet classifier is only outputing whatever the first entry in the labels file is (Acura in this case).
The changes I've made is from lines 89 to 103, I've replaced that all with an if statement that will print out any classifier metadata if it exists along with instantiating variables, changing the config names and changing the sink to "fake-renderer".
if (obj_meta->classifier_meta_list) {
class_meta = (NvDsClassifierMeta * )(obj_meta->classifier_meta_list->data);
if (class_meta->label_info_list) {
label_info = (NvDsLabelInfo * )(class_meta->label_info_list->data);
g_print("Result: %s\n", label_info->result_label);
}
}
DashCamNet Configuration:
[property]
gpu-id=0
net-scale-factor=0.00392156862745098
offsets=0.0;0.0;0.0
tlt-model-key=tlt_encode
tlt-encoded-model=models/tao_pretrained_models/dashcamnet/resnet18_dashcamnet_pruned.etlt
labelfile-path=models/tao_pretrained_models/dashcamnet/labels.txt
int8-calib-file=models/tao_pretrained_models/dashcamnet/dashcamnet_int8.txt
model-engine-file=models/tao_pretrained_models/dashcamnet/resnet18_dashcamnet_pruned.etlt_b1_gpu0_int8.engine
infer-dims=3;544;960
uff-input-blob-name=input_1
batch-size=1
process-mode=1
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=1
num-detected-classes=4
interval=0
gie-unique-id=1
output-blob-names=output_cov/Sigmoid;output_bbox/BiasAdd
model-color-format=0
maintain-aspect-ratio=0
output-tensor-meta=0
[class-attrs-all]
pre-cluster-threshold=0.2
group-threshold=1
## Set eps=0.7 and minBoxes for cluster-mode=1(DBSCAN)
eps=0.2
#minBoxes=3
VehicleMakeNet
[property]
batch-size=4
classifier-threshold=0.95
gie-unique-id=4
gpu-id=0
input-dims=3;224;224;0
int8-calib-file=models/VehicleMake/vehiclemakenet_int8.txt
labelfile-path=models/VehicleMake/labels_vehiclemakenet.txt
model-color-format=0
model-engine-file=models/VehicleMake/resnet18_vehiclemakenet_pruned.etlt_b4_gpu0_int8.engine
net-scale-factor=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=1
network-type=1
num-detected-classes=4
offsets=124;117;104
operate-on-gie-id=1
output-blob-names=predictions/Softmax
process-mode=2
tlt-encoded-model=models/VehicleMake/resnet18_vehiclemakenet_pruned.etlt
tlt-model-key=tlt_encode
uff-input-blob-name=input_1
Full code
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gst/gst.h>
#include <glib.h>
#include <stdio.h>
#include "gstnvdsmeta.h"
#include <cuda_runtime_api.h>
#define MAX_DISPLAY_LEN 64
#define PGIE_CLASS_ID_VEHICLE 0
#define PGIE_CLASS_ID_PERSON 2
#define SGIE_CLASS_ID_LP 1
#define SGIE_CLASS_ID_FACE 0
/* Change this to 0 to make the 2nd detector act as a primary(full-frame) detector.
* When set to 1, it will act as secondary(operates on primary detected objects). */
#define SECOND_DETECTOR_IS_SECONDARY 1
/* The muxer output resolution must be set if the input streams will be of
* different resolution. The muxer will scale all the input frames to this
* resolution. */
#define MUXER_OUTPUT_WIDTH 1280
#define MUXER_OUTPUT_HEIGHT 720
/* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set
* based on the fastest source's framerate. */
#define MUXER_BATCH_TIMEOUT_USEC 40000
gint frame_number = 0;
gchar pgie_classes_str[4][32] = { "Vehicle", "TwoWheeler", "Person",
"Roadsign"
};
#define PRIMARY_DETECTOR_UID 1
#define SECONDARY_DETECTOR_UID 2
/* nvvidconv_sink_pad_buffer_probe will extract metadata received on nvvideoconvert sink pad
* and update params for drawing rectangle, object information etc. */
static GstPadProbeReturn
nvvidconv_sink_pad_buffer_probe (GstPad * pad, GstPadProbeInfo * info,
gpointer u_data)
{
GstBuffer *buf = (GstBuffer *) info->data;
NvDsObjectMeta *obj_meta = NULL;
guint vehicle_count = 0;
guint person_count = 0;
guint face_count = 0;
guint lp_count = 0;
NvDsMetaList * l_frame = NULL;
NvDsMetaList * l_obj = NULL;
NvDsDisplayMeta *display_meta = NULL;
NvDsClassifierMeta *class_meta = NULL;
NvDsLabelInfo *label_info = NULL;
NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta (buf);
for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
l_frame = l_frame->next) {
NvDsFrameMeta *frame_meta = (NvDsFrameMeta *) (l_frame->data);
int offset = 0;
for (l_obj = frame_meta->obj_meta_list; l_obj != NULL;
l_obj = l_obj->next) {
obj_meta = (NvDsObjectMeta *) (l_obj->data);
/* Check that the object has been detected by the primary detector
* and that the class id is that of vehicles/persons. */
if (obj_meta->unique_component_id == PRIMARY_DETECTOR_UID) {
if (obj_meta->class_id == PGIE_CLASS_ID_VEHICLE)
vehicle_count++;
if (obj_meta->class_id == PGIE_CLASS_ID_PERSON)
person_count++;
}
if (obj_meta->classifier_meta_list) {
class_meta = (NvDsClassifierMeta * )(obj_meta->classifier_meta_list->data);
if (class_meta->label_info_list) {
label_info = (NvDsLabelInfo * )(class_meta->label_info_list->data);
g_print("Result: %s\n", label_info->result_label);
}
}
}
display_meta = nvds_acquire_display_meta_from_pool(batch_meta);
NvOSD_TextParams *txt_params = &display_meta->text_params[0];
display_meta->num_labels = 1;
txt_params->display_text = g_malloc0 (MAX_DISPLAY_LEN);
offset = snprintf(txt_params->display_text, MAX_DISPLAY_LEN, "Person = %d ", person_count);
offset += snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "Vehicle = %d ", vehicle_count);
offset += snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "Face = %d ", face_count);
offset += snprintf(txt_params->display_text + offset , MAX_DISPLAY_LEN, "License Plate = %d ", lp_count);
/* Now set the offsets where the string should appear */
txt_params->x_offset = 10;
txt_params->y_offset = 12;
/* Font , font-color and font-size */
txt_params->font_params.font_name = "Serif";
txt_params->font_params.font_size = 10;
txt_params->font_params.font_color.red = 1.0;
txt_params->font_params.font_color.green = 1.0;
txt_params->font_params.font_color.blue = 1.0;
txt_params->font_params.font_color.alpha = 1.0;
/* Text background color */
txt_params->set_bg_clr = 1;
txt_params->text_bg_clr.red = 0.0;
txt_params->text_bg_clr.green = 0.0;
txt_params->text_bg_clr.blue = 0.0;
txt_params->text_bg_clr.alpha = 1.0;
nvds_add_display_meta_to_frame(frame_meta, display_meta);
}
g_print ("Frame Number = %d Vehicle Count = %d Person Count = %d"
" Face Count = %d License Plate Count = %d\n",
frame_number, vehicle_count, person_count,
face_count, lp_count);
frame_number++;
return GST_PAD_PROBE_OK;
}
static gboolean
bus_call (GstBus * bus, GstMessage * msg, gpointer data)
{
GMainLoop *loop = (GMainLoop *) data;
switch (GST_MESSAGE_TYPE (msg)) {
case GST_MESSAGE_EOS:
g_print ("End of stream\n");
g_main_loop_quit (loop);
break;
case GST_MESSAGE_ERROR:{
gchar *debug;
GError *error;
gst_message_parse_error (msg, &error, &debug);
g_printerr ("ERROR from element %s: %s\n",
GST_OBJECT_NAME (msg->src), error->message);
if (debug)
g_printerr ("Error details: %s\n", debug);
g_free (debug);
g_error_free (error);
g_main_loop_quit (loop);
break;
}
default:
break;
}
return TRUE;
}
int
main (int argc, char *argv[])
{
GMainLoop *loop = NULL;
GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL,
*decoder = NULL, *streammux = NULL, *sink = NULL, *primary_detector = NULL,
*secondary_detector = NULL, *nvvidconv = NULL, *nvosd = NULL;
GstElement *transform = NULL;
GstBus *bus = NULL;
guint bus_watch_id;
GstPad *nvvidconv_sink_pad = NULL;
int current_device = -1;
cudaGetDevice(&current_device);
struct cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, current_device);
/* Check input arguments */
if (argc != 2) {
g_printerr ("Usage: %s <H264 filename>\n", argv[0]);
return -1;
}
/* Standard GStreamer initialization */
gst_init (&argc, &argv);
loop = g_main_loop_new (NULL, FALSE);
/* Create gstreamer elements */
/* Create Pipeline element that will form a connection of other elements */
pipeline = gst_pipeline_new ("pipeline");
/* Source element for reading from the file */
source = gst_element_factory_make ("filesrc", "file-source");
/* Since the data format in the input file is elementary h264 stream,
* we need a h264parser */
h264parser = gst_element_factory_make ("h264parse", "h264-parser");
/* Use nvdec_h264 for hardware accelerated decode on GPU */
decoder = gst_element_factory_make ("nvv4l2decoder", "nvv4l2-decoder");
/* Create nvstreammux instance to form batches from one or more sources. */
streammux = gst_element_factory_make ("nvstreammux", "stream-muxer");
if (!pipeline || !streammux) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
/* Create two nvinfer instances for the two back-to-back detectors */
primary_detector = gst_element_factory_make ("nvinfer", "primary-nvinference-engine1");
secondary_detector = gst_element_factory_make ("nvinfer", "primary-nvinference-engine2");
/* Use convertor to convert from NV12 to RGBA as required by nvosd */
nvvidconv = gst_element_factory_make ("nvvideoconvert", "nvvideo-converter");
/* Create OSD to draw on the converted RGBA buffer */
nvosd = gst_element_factory_make ("nvdsosd", "nv-onscreendisplay");
/* Finally render the osd output */
if(prop.integrated) {
transform = gst_element_factory_make ("nvegltransform", "nvegl-transform");
}
sink = gst_element_factory_make("fakesink", "fake-renderer");
if (!source || !h264parser || !decoder || !primary_detector || !secondary_detector
|| !nvvidconv || !nvosd || !sink) {
g_printerr ("One element could not be created. Exiting.\n");
return -1;
}
if(prop.integrated) {
if(!transform) {
g_printerr ("One tegra element could not be created. Exiting.\n");
return -1;
}
}
/* we set the input filename to the source element */
g_object_set (G_OBJECT (source), "location", argv[1], NULL);
g_object_set (G_OBJECT (streammux), "width", MUXER_OUTPUT_WIDTH, "height",
MUXER_OUTPUT_HEIGHT, "batch-size", 1,
"batched-push-timeout", MUXER_BATCH_TIMEOUT_USEC, NULL);
/* Set the config files for the two detectors. We demonstrate this by using
* the same detector model twice but making them act as vehicle-only and
* person-only detectors by adjusting the bbox confidence thresholds in the
* two seperate config files. */
g_object_set (G_OBJECT (primary_detector), "config-file-path", "dashcamnet_config.txt",
"unique-id", PRIMARY_DETECTOR_UID, NULL);
g_object_set (G_OBJECT (secondary_detector), "config-file-path", "vehicletypenet_sgie_config.txt",
"unique-id", SECONDARY_DETECTOR_UID, "process-mode", 2, NULL);
/* we add a message handler */
bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline));
bus_watch_id = gst_bus_add_watch (bus, bus_call, loop);
gst_object_unref (bus);
/* Set up the pipeline */
/* we add all elements into the pipeline */
if(prop.integrated) {
gst_bin_add_many (GST_BIN (pipeline),
source, h264parser, decoder, streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, transform, sink, NULL);
} else {
gst_bin_add_many (GST_BIN (pipeline),
source, h264parser, decoder, streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, sink, NULL);
}
GstPad *sinkpad, *srcpad;
gchar pad_name_sink[16] = "sink_0";
gchar pad_name_src[16] = "src";
sinkpad = gst_element_get_request_pad (streammux, pad_name_sink);
if (!sinkpad) {
g_printerr ("Streammux request sink pad failed. Exiting.\n");
return -1;
}
srcpad = gst_element_get_static_pad (decoder, pad_name_src);
if (!srcpad) {
g_printerr ("Decoder request src pad failed. Exiting.\n");
return -1;
}
if (gst_pad_link (srcpad, sinkpad) != GST_PAD_LINK_OK) {
g_printerr ("Failed to link decoder to stream muxer. Exiting.\n");
return -1;
}
gst_object_unref (sinkpad);
gst_object_unref (srcpad);
/* we link the elements together */
/* file-source -> h264-parser -> nvh264-decoder ->
* nvinfer -> nvvidconv -> nvosd -> video-renderer */
if (!gst_element_link_many (source, h264parser, decoder, NULL)) {
g_printerr ("Elements could not be linked: 1. Exiting.\n");
return -1;
}
if(prop.integrated) {
if (!gst_element_link_many (streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, transform, sink, NULL)) {
g_printerr ("Elements could not be linked: 2. Exiting.\n");
return -1;
}
} else {
if (!gst_element_link_many (streammux, primary_detector, secondary_detector,
nvvidconv, nvosd, sink, NULL)) {
g_printerr ("Elements could not be linked: 2. Exiting.\n");
return -1;
}
}
/* Lets add probe to get informed of the meta data generated, we add probe to
* the sink pad of the nvvideoconvert element, since by that time, the buffer would have
* had got all the metadata. */
nvvidconv_sink_pad = gst_element_get_static_pad (nvvidconv, "sink");
if (!nvvidconv_sink_pad)
g_print ("Unable to get sink pad\n");
else
gst_pad_add_probe (nvvidconv_sink_pad, GST_PAD_PROBE_TYPE_BUFFER,
nvvidconv_sink_pad_buffer_probe, NULL, NULL);
/* Set the pipeline to "playing" state */
g_print ("Now playing: %s\n", argv[1]);
gst_element_set_state (pipeline, GST_STATE_PLAYING);
/* Wait till pipeline encounters an error or EOS */
g_print ("Running...\n");
g_main_loop_run (loop);
/* Out of the main loop, clean up nicely */
g_print ("Returned, stopping playback\n");
gst_element_set_state (pipeline, GST_STATE_NULL);
g_print ("Deleting pipeline\n");
gst_object_unref (GST_OBJECT (pipeline));
g_source_remove (bus_watch_id);
g_main_loop_unref (loop);
return 0;
}

I think the issue was that the labels.txt file that was provided for the model needed to be comma seperated and doesn’t work if it’s seperated by new lines.

'scull' device from ldd3 is not shown under /dev/

Following the book ldd3 (- Linux Device Drivers 3 ed.) and using, also, source code files available here (as suggested by another stackoverflow's user here), I am able to compile the scull device module and load it on my Linux-based OS. For more details:
$ insmod scull.ko
$ lsmod
Module Size Used by
scull 20480 0
$ dmesg | tail -...
[...] scullsingle registered at f300008
[...] sculluid registered at f300009
[...] scullwuid registered at f30000a
[...] sullpriv registered at f30000b
However, in the /dev/ directory, there is NOT the scull0 device entry as I was expecting. So I cannot use it.
In addition, when executing:
$ rmmod scull
The system displays this error:
rmmod: ERROR: ../libkmod/libkmod.c:514 lookup_builtin_file() could not open builtin file '/lib/modules/4.9.0/modules.builtin.bin'
The funny phenomena is that now, executing lsmod, the device seems disappeared and before, as you can see from the snipped, it was present.
MORE INFO (may be they can be useful in some way):
using a 4.9.0 kernel version
currently, I am cross-compiling the module
gcc used: aarch64-Linux-gnu-gcc
ARCH=arm64
snipped code of scull:
CODE:
/*
* main.c -- the bare scull char module
*
* Copyright (C) 2001 Alessandro Rubini and Jonathan Corbet
* Copyright (C) 2001 O'Reilly & Associates
*
* The source code in this file can be freely used, adapted,
* and redistributed in source or binary form, so long as an
* acknowledgment appears in derived source files. The citation
* should list that the code comes from the book "Linux Device
* Drivers" by Alessandro Rubini and Jonathan Corbet, published
* by O'Reilly & Associates. No warranty is attached;
* we cannot take responsibility for errors or fitness for use.
*
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/kernel.h> /* printk() */
#include <linux/slab.h> /* kmalloc() */
#include <linux/fs.h> /* everything... */
#include <linux/errno.h> /* error codes */
#include <linux/types.h> /* size_t */
#include <linux/proc_fs.h>
#include <linux/fcntl.h> /* O_ACCMODE */
#include <linux/seq_file.h>
#include <linux/cdev.h>
#include <asm/uaccess.h> /* copy_*_user */
#include "scull.h" /* local definitions */
/*
* Our parameters which can be set at load time.
*/
int scull_major = SCULL_MAJOR;
int scull_minor = 0;
int scull_nr_devs = SCULL_NR_DEVS; /* number of bare scull devices */
int scull_quantum = SCULL_QUANTUM;
int scull_qset = SCULL_QSET;
module_param(scull_major, int, S_IRUGO);
module_param(scull_minor, int, S_IRUGO);
module_param(scull_nr_devs, int, S_IRUGO);
module_param(scull_quantum, int, S_IRUGO);
module_param(scull_qset, int, S_IRUGO);
MODULE_AUTHOR("Alessandro Rubini, Jonathan Corbet");
MODULE_LICENSE("Dual BSD/GPL");
struct scull_dev *scull_devices; /* allocated in scull_init_module */
/*
* Empty out the scull device; must be called with the device
* semaphore held.
*/
int scull_trim(struct scull_dev *dev)
{
struct scull_qset *next, *dptr;
int qset = dev->qset; /* "dev" is not-null */
int i;
for (dptr = dev->data; dptr; dptr = next) { /* all the list items */
if (dptr->data) {
for (i = 0; i < qset; i++)
kfree(dptr->data[i]);
kfree(dptr->data);
dptr->data = NULL;
}
next = dptr->next;
kfree(dptr);
}
dev->size = 0;
dev->quantum = scull_quantum;
dev->qset = scull_qset;
dev->data = NULL;
return 0;
}
#ifdef SCULL_DEBUG /* use proc only if debugging */
/*
* The proc filesystem: function to read and entry
*/
int scull_read_procmem(struct seq_file *s, void *v)
{
int i, j;
int limit = s->size - 80; /* Don't print more than this */
for (i = 0; i < scull_nr_devs && s->count <= limit; i++) {
struct scull_dev *d = &scull_devices[i];
struct scull_qset *qs = d->data;
if (down_interruptible(&d->sem))
return -ERESTARTSYS;
seq_printf(s,"\nDevice %i: qset %i, q %i, sz %li\n",
i, d->qset, d->quantum, d->size);
for (; qs && s->count <= limit; qs = qs->next) { /* scan the list */
seq_printf(s, " item at %p, qset at %p\n",
qs, qs->data);
if (qs->data && !qs->next) /* dump only the last item */
for (j = 0; j < d->qset; j++) {
if (qs->data[j])
seq_printf(s, " % 4i: %8p\n",
j, qs->data[j]);
}
}
up(&scull_devices[i].sem);
}
return 0;
}
/*
* Here are our sequence iteration methods. Our "position" is
* simply the device number.
*/
static void *scull_seq_start(struct seq_file *s, loff_t *pos)
{
if (*pos >= scull_nr_devs)
return NULL; /* No more to read */
return scull_devices + *pos;
}
static void *scull_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
if (*pos >= scull_nr_devs)
return NULL;
return scull_devices + *pos;
}
static void scull_seq_stop(struct seq_file *s, void *v)
{
/* Actually, there's nothing to do here */
}
static int scull_seq_show(struct seq_file *s, void *v)
{
struct scull_dev *dev = (struct scull_dev *) v;
struct scull_qset *d;
int i;
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
seq_printf(s, "\nDevice %i: qset %i, q %i, sz %li\n",
(int) (dev - scull_devices), dev->qset,
dev->quantum, dev->size);
for (d = dev->data; d; d = d->next) { /* scan the list */
seq_printf(s, " item at %p, qset at %p\n", d, d->data);
if (d->data && !d->next) /* dump only the last item */
for (i = 0; i < dev->qset; i++) {
if (d->data[i])
seq_printf(s, " % 4i: %8p\n",
i, d->data[i]);
}
}
up(&dev->sem);
return 0;
}
/*
* Tie the sequence operators up.
*/
static struct seq_operations scull_seq_ops = {
.start = scull_seq_start,
.next = scull_seq_next,
.stop = scull_seq_stop,
.show = scull_seq_show
};
/*
* Now to implement the /proc files we need only make an open
* method which sets up the sequence operators.
*/
static int scullmem_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, scull_read_procmem, NULL);
}
static int scullseq_proc_open(struct inode *inode, struct file *file)
{
return seq_open(file, &scull_seq_ops);
}
/*
* Create a set of file operations for our proc files.
*/
static struct file_operations scullmem_proc_ops = {
.owner = THIS_MODULE,
.open = scullmem_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release
};
static struct file_operations scullseq_proc_ops = {
.owner = THIS_MODULE,
.open = scullseq_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
/*
* Actually create (and remove) the /proc file(s).
*/
static void scull_create_proc(void)
{
proc_create_data("scullmem", 0 /* default mode */,
NULL /* parent dir */, &scullmem_proc_ops,
NULL /* client data */);
proc_create("scullseq", 0, NULL, &scullseq_proc_ops);
}
static void scull_remove_proc(void)
{
/* no problem if it was not registered */
remove_proc_entry("scullmem", NULL /* parent dir */);
remove_proc_entry("scullseq", NULL);
}
#endif /* SCULL_DEBUG */
/*
* Open and close
*/
int scull_open(struct inode *inode, struct file *filp)
{
struct scull_dev *dev; /* device information */
dev = container_of(inode->i_cdev, struct scull_dev, cdev);
filp->private_data = dev; /* for other methods */
/* now trim to 0 the length of the device if open was write-only */
if ( (filp->f_flags & O_ACCMODE) == O_WRONLY) {
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
scull_trim(dev); /* ignore errors */
up(&dev->sem);
}
return 0; /* success */
}
int scull_release(struct inode *inode, struct file *filp)
{
return 0;
}
/*
* Follow the list
*/
struct scull_qset *scull_follow(struct scull_dev *dev, int n)
{
struct scull_qset *qs = dev->data;
/* Allocate first qset explicitly if need be */
if (! qs) {
qs = dev->data = kmalloc(sizeof(struct scull_qset), GFP_KERNEL);
if (qs == NULL)
return NULL; /* Never mind */
memset(qs, 0, sizeof(struct scull_qset));
}
/* Then follow the list */
while (n--) {
if (!qs->next) {
qs->next = kmalloc(sizeof(struct scull_qset), GFP_KERNEL);
if (qs->next == NULL)
return NULL; /* Never mind */
memset(qs->next, 0, sizeof(struct scull_qset));
}
qs = qs->next;
continue;
}
return qs;
}
/*
* Data management: read and write
*/
ssize_t scull_read(struct file *filp, char __user *buf, size_t count,
loff_t *f_pos)
{
struct scull_dev *dev = filp->private_data;
struct scull_qset *dptr; /* the first listitem */
int quantum = dev->quantum, qset = dev->qset;
int itemsize = quantum * qset; /* how many bytes in the listitem */
int item, s_pos, q_pos, rest;
ssize_t retval = 0;
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
if (*f_pos >= dev->size)
goto out;
if (*f_pos + count > dev->size)
count = dev->size - *f_pos;
/* find listitem, qset index, and offset in the quantum */
item = (long)*f_pos / itemsize;
rest = (long)*f_pos % itemsize;
s_pos = rest / quantum; q_pos = rest % quantum;
/* follow the list up to the right position (defined elsewhere) */
dptr = scull_follow(dev, item);
if (dptr == NULL || !dptr->data || ! dptr->data[s_pos])
goto out; /* don't fill holes */
/* read only up to the end of this quantum */
if (count > quantum - q_pos)
count = quantum - q_pos;
if (copy_to_user(buf, dptr->data[s_pos] + q_pos, count)) {
retval = -EFAULT;
goto out;
}
*f_pos += count;
retval = count;
out:
up(&dev->sem);
return retval;
}
ssize_t scull_write(struct file *filp, const char __user *buf, size_t count,
loff_t *f_pos)
{
struct scull_dev *dev = filp->private_data;
struct scull_qset *dptr;
int quantum = dev->quantum, qset = dev->qset;
int itemsize = quantum * qset;
int item, s_pos, q_pos, rest;
ssize_t retval = -ENOMEM; /* value used in "goto out" statements */
if (down_interruptible(&dev->sem))
return -ERESTARTSYS;
/* find listitem, qset index and offset in the quantum */
item = (long)*f_pos / itemsize;
rest = (long)*f_pos % itemsize;
s_pos = rest / quantum; q_pos = rest % quantum;
/* follow the list up to the right position */
dptr = scull_follow(dev, item);
if (dptr == NULL)
goto out;
if (!dptr->data) {
dptr->data = kmalloc(qset * sizeof(char *), GFP_KERNEL);
if (!dptr->data)
goto out;
memset(dptr->data, 0, qset * sizeof(char *));
}
if (!dptr->data[s_pos]) {
dptr->data[s_pos] = kmalloc(quantum, GFP_KERNEL);
if (!dptr->data[s_pos])
goto out;
}
/* write only up to the end of this quantum */
if (count > quantum - q_pos)
count = quantum - q_pos;
if (copy_from_user(dptr->data[s_pos]+q_pos, buf, count)) {
retval = -EFAULT;
goto out;
}
*f_pos += count;
retval = count;
/* update the size */
if (dev->size < *f_pos)
dev->size = *f_pos;
out:
up(&dev->sem);
return retval;
}
/*
* The ioctl() implementation
*/
long scull_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int err = 0, tmp;
int retval = 0;
/*
* extract the type and number bitfields, and don't decode
* wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
*/
if (_IOC_TYPE(cmd) != SCULL_IOC_MAGIC) return -ENOTTY;
if (_IOC_NR(cmd) > SCULL_IOC_MAXNR) return -ENOTTY;
/*
* the direction is a bitmask, and VERIFY_WRITE catches R/W
* transfers. `Type' is user-oriented, while
* access_ok is kernel-oriented, so the concept of "read" and
* "write" is reversed
*/
if (_IOC_DIR(cmd) & _IOC_READ)
err = !access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd));
else if (_IOC_DIR(cmd) & _IOC_WRITE)
err = !access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd));
if (err) return -EFAULT;
switch(cmd) {
case SCULL_IOCRESET:
scull_quantum = SCULL_QUANTUM;
scull_qset = SCULL_QSET;
break;
case SCULL_IOCSQUANTUM: /* Set: arg points to the value */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
retval = __get_user(scull_quantum, (int __user *)arg);
break;
case SCULL_IOCTQUANTUM: /* Tell: arg is the value */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
scull_quantum = arg;
break;
case SCULL_IOCGQUANTUM: /* Get: arg is pointer to result */
retval = __put_user(scull_quantum, (int __user *)arg);
break;
case SCULL_IOCQQUANTUM: /* Query: return it (it's positive) */
return scull_quantum;
case SCULL_IOCXQUANTUM: /* eXchange: use arg as pointer */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_quantum;
retval = __get_user(scull_quantum, (int __user *)arg);
if (retval == 0)
retval = __put_user(tmp, (int __user *)arg);
break;
case SCULL_IOCHQUANTUM: /* sHift: like Tell + Query */
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_quantum;
scull_quantum = arg;
return tmp;
case SCULL_IOCSQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
retval = __get_user(scull_qset, (int __user *)arg);
break;
case SCULL_IOCTQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
scull_qset = arg;
break;
case SCULL_IOCGQSET:
retval = __put_user(scull_qset, (int __user *)arg);
break;
case SCULL_IOCQQSET:
return scull_qset;
case SCULL_IOCXQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_qset;
retval = __get_user(scull_qset, (int __user *)arg);
if (retval == 0)
retval = put_user(tmp, (int __user *)arg);
break;
case SCULL_IOCHQSET:
if (! capable (CAP_SYS_ADMIN))
return -EPERM;
tmp = scull_qset;
scull_qset = arg;
return tmp;
/*
* The following two change the buffer size for scullpipe.
* The scullpipe device uses this same ioctl method, just to
* write less code. Actually, it's the same driver, isn't it?
*/
case SCULL_P_IOCTSIZE:
scull_p_buffer = arg;
break;
case SCULL_P_IOCQSIZE:
return scull_p_buffer;
default: /* redundant, as cmd was checked against MAXNR */
return -ENOTTY;
}
return retval;
}
/*
* The "extended" operations -- only seek
*/
loff_t scull_llseek(struct file *filp, loff_t off, int whence)
{
struct scull_dev *dev = filp->private_data;
loff_t newpos;
switch(whence) {
case 0: /* SEEK_SET */
newpos = off;
break;
case 1: /* SEEK_CUR */
newpos = filp->f_pos + off;
break;
case 2: /* SEEK_END */
newpos = dev->size + off;
break;
default: /* can't happen */
return -EINVAL;
}
if (newpos < 0) return -EINVAL;
filp->f_pos = newpos;
return newpos;
}
struct file_operations scull_fops = {
.owner = THIS_MODULE,
.llseek = scull_llseek,
.read = scull_read,
.write = scull_write,
.unlocked_ioctl = scull_ioctl,
.open = scull_open,
.release = scull_release,
};
/*
* Finally, the module stuff
*/
/*
* The cleanup function is used to handle initialization failures as well.
* Thefore, it must be careful to work correctly even if some of the items
* have not been initialized
*/
void scull_cleanup_module(void)
{
int i;
dev_t devno = MKDEV(scull_major, scull_minor);
/* Get rid of our char dev entries */
if (scull_devices) {
for (i = 0; i < scull_nr_devs; i++) {
scull_trim(scull_devices + i);
cdev_del(&scull_devices[i].cdev);
}
kfree(scull_devices);
}
#ifdef SCULL_DEBUG /* use proc only if debugging */
scull_remove_proc();
#endif
/* cleanup_module is never called if registering failed */
unregister_chrdev_region(devno, scull_nr_devs);
/* and call the cleanup functions for friend devices */
scull_p_cleanup();
scull_access_cleanup();
}
/*
* Set up the char_dev structure for this device.
*/
static void scull_setup_cdev(struct scull_dev *dev, int index)
{
int err, devno = MKDEV(scull_major, scull_minor + index);
cdev_init(&dev->cdev, &scull_fops);
dev->cdev.owner = THIS_MODULE;
dev->cdev.ops = &scull_fops;
err = cdev_add (&dev->cdev, devno, 1);
/* Fail gracefully if need be */
if (err)
printk(KERN_NOTICE "Error %d adding scull%d", err, index);
}
int scull_init_module(void)
{
int result, i;
dev_t dev = 0;
/*
* Get a range of minor numbers to work with, asking for a dynamic
* major unless directed otherwise at load time.
*/
if (scull_major) {
dev = MKDEV(scull_major, scull_minor);
result = register_chrdev_region(dev, scull_nr_devs, "scull");
} else {
result = alloc_chrdev_region(&dev, scull_minor, scull_nr_devs,
"scull");
scull_major = MAJOR(dev);
}
if (result < 0) {
printk(KERN_WARNING "scull: can't get major %d\n", scull_major);
return result;
}
/*
* allocate the devices -- we can't have them static, as the number
* can be specified at load time
*/
scull_devices = kmalloc(scull_nr_devs * sizeof(struct scull_dev), GFP_KERNEL);
if (!scull_devices) {
result = -ENOMEM;
goto fail; /* Make this more graceful */
}
memset(scull_devices, 0, scull_nr_devs * sizeof(struct scull_dev));
/* Initialize each device. */
for (i = 0; i < scull_nr_devs; i++) {
scull_devices[i].quantum = scull_quantum;
scull_devices[i].qset = scull_qset;
sema_init(&scull_devices[i].sem, 1);
scull_setup_cdev(&scull_devices[i], i);
}
/* At this point call the init function for any friend device */
dev = MKDEV(scull_major, scull_minor + scull_nr_devs);
dev += scull_p_init(dev);
dev += scull_access_init(dev);
#ifdef SCULL_DEBUG /* only when debugging */
scull_create_proc();
#endif
return 0; /* succeed */
fail:
scull_cleanup_module();
return result;
}
module_init(scull_init_module);
module_exit(scull_cleanup_module);

SOLUTION:
Of course, I was doing the things in the wrong way: in order to load CORRECTLY the scull device module, in the book's source code and, also, in the other link of the question, there is load_scull script that does everything for you.
# source scull_load
# lsmod
Module Size Used by
scull 20480 0
# ls -l /dev/ | grep scull
lrwxrwxrwx 1 root root 6 Dec 19 15:00 scull -> scull0
crw-rw-r-- 1 root staff 243, 0 Dec 19 15:00 scull0
crw-rw-r-- 1 root staff 243, 1 Dec 19 15:00 scull1
crw-rw-r-- 1 root staff 243, 2 Dec 19 15:00 scull2
crw-rw-r-- 1 root staff 243, 3 Dec 19 15:00 scull3
lrwxrwxrwx 1 root root 10 Dec 19 15:00 scullpipe -> scullpipe0
crw-rw-r-- 1 root staff 243, 4 Dec 19 15:00 scullpipe0
crw-rw-r-- 1 root staff 243, 5 Dec 19 15:00 scullpipe1
crw-rw-r-- 1 root staff 243, 6 Dec 19 15:00 scullpipe2
crw-rw-r-- 1 root staff 243, 7 Dec 19 15:00 scullpipe3
crw-rw-r-- 1 root staff 243, 11 Dec 19 15:00 scullpriv
crw-rw-r-- 1 root staff 243, 8 Dec 19 15:00 scullsingle
crw-rw-r-- 1 root staff 243, 9 Dec 19 15:00 sculluid
crw-rw-r-- 1 root staff 243, 10 Dec 19 15:00 scullwuid

OpenCL stackoverflow. How to solve it?

I'm having a problem when I try to run the reduction program from the OpenCL in Action's sources.
Im using Visual Studio 2008. This is the error:
Unhandled exception in 0x013526a7 in Reduction.exe: 0xC00000FD: Stack
overflow.
And in the asm file the cursor is to
test dword ptr [eax],eax ; probe page.
I tried to debug it, but when I put a breakpoint in the main function, the debugging starts, but the program does not keep running.
I don't know what is the really problem.
These are the source files:
reduction.cpp
#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "reduction_complete.cl"
#define ARRAY_SIZE 1048576
#define KERNEL_1 "reduction_vector"
#define KERNEL_2 "reduction_complete"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {
cl_platform_id platform;
cl_device_id dev;
int err;
/* Identify a platform */
err = clGetPlatformIDs(1, &platform, NULL);
if(err < 0) {
perror("Couldn't identify a platform");
exit(1);
}
/* Access a device */
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
if(err == CL_DEVICE_NOT_FOUND) {
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
}
if(err < 0) {
perror("Couldn't access any devices");
exit(1);
}
return dev;
}
/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
int err;
/* Read program file and place content into buffer */
program_handle = fopen(filename, "r");
if(program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
/* Create program from file */
program = clCreateProgramWithSource(ctx, 1,
(const char**)&program_buffer, &program_size, &err);
if(err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if(err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*) malloc(log_size + 1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
log_size + 1, program_log, NULL);
printf("%s\n", program_log);
free(program_log);
exit(1);
}
return program;
}
int main() {
/* OpenCL structures */
cl_device_id device;
cl_context context;
cl_program program;
cl_kernel vector_kernel, complete_kernel;
cl_command_queue queue;
cl_event start_event, end_event;
cl_int i, err;
size_t local_size, global_size;
/* Data and buffers */
float data[ARRAY_SIZE];
float sum, actual_sum;
cl_mem data_buffer, sum_buffer;
cl_ulong time_start, time_end, total_time;
/* Initialize data */
for(i=0; i<ARRAY_SIZE; i++) {
data[i] = 1.0f*i;
}
/* Create device and determine local size */
device = create_device();
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(local_size), &local_size, NULL);
if(err < 0) {
perror("Couldn't obtain device information");
exit(1);
}
/* Create a context */
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err < 0) {
perror("Couldn't create a context");
exit(1);
}
/* Build program */
program = build_program(context, device, PROGRAM_FILE);
/* Create data buffer */
data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err);
sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float), NULL, &err);
if(err < 0) {
perror("Couldn't create a buffer");
exit(1);
};
/* Create a command queue */
queue = clCreateCommandQueue(context, device,
CL_QUEUE_PROFILING_ENABLE, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Create kernels */
vector_kernel = clCreateKernel(program, KERNEL_1, &err);
complete_kernel = clCreateKernel(program, KERNEL_2, &err);
if(err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Set arguments for vector kernel */
err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL);
/* Set arguments for complete kernel */
err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL);
err |= clSetKernelArg(complete_kernel, 2, sizeof(cl_mem), &sum_buffer);
if(err < 0) {
perror("Couldn't create a kernel argument");
exit(1);
}
/* Enqueue kernels */
global_size = ARRAY_SIZE/4;
err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size,
&local_size, 0, NULL, &start_event);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
printf("Global size = %zu\n", global_size);
/* Perform successive stages of the reduction */
while(global_size/local_size > local_size) {
global_size = global_size/local_size;
err = clEnqueueNDRangeKernel(queue, vector_kernel, 1, NULL, &global_size,
&local_size, 0, NULL, NULL);
printf("Global size = %zu\n", global_size);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
}
global_size = global_size/local_size;
err = clEnqueueNDRangeKernel(queue, complete_kernel, 1, NULL, &global_size,
NULL, 0, NULL, &end_event);
printf("Global size = %zu\n", global_size);
/* Finish processing the queue and get profiling information */
clFinish(queue);
clGetEventProfilingInfo(start_event, CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(end_event, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL);
total_time = time_end - time_start;
/* Read the result */
err = clEnqueueReadBuffer(queue, sum_buffer, CL_TRUE, 0,
sizeof(float), &sum, 0, NULL, NULL);
if(err < 0) {
perror("Couldn't read the buffer");
exit(1);
}
/* Check result */
actual_sum = 1.0f * (ARRAY_SIZE/2)*(ARRAY_SIZE-1);
if(fabs(sum - actual_sum) > 0.01*fabs(sum))
printf("Check failed.\n");
else
printf("Check passed.\n");
printf("Total time = %lu\n", total_time);
/* Deallocate resources */
clReleaseEvent(start_event);
clReleaseEvent(end_event);
clReleaseMemObject(sum_buffer);
clReleaseMemObject(data_buffer);
clReleaseKernel(vector_kernel);
clReleaseKernel(complete_kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
reduction_complete.cl
__kernel void reduction_vector(__global float4* data,
__local float4* partial_sums) {
int lid = get_local_id(0);
int group_size = get_local_size(0);
partial_sums[lid] = data[get_global_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = group_size/2; i>0; i >>= 1) {
if(lid < i) {
partial_sums[lid] += partial_sums[lid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0) {
data[get_group_id(0)] = partial_sums[0];
}
}
__kernel void reduction_complete(__global float4* data,
__local float4* partial_sums, __global float* sum) {
int lid = get_local_id(0);
int group_size = get_local_size(0);
partial_sums[lid] = data[get_local_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = group_size/2; i>0; i >>= 1) {
if(lid < i) {
partial_sums[lid] += partial_sums[lid + i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0) {
*sum = partial_sums[0].s0 + partial_sums[0].s1 +
partial_sums[0].s2 + partial_sums[0].s3;
}
}
I dont know what causes the stackoverflow...

I don't see any recursion so my guess is the float data[ARRAY_SIZE]; where #define ARRAY_SIZE 1048576 is putting 4MB on the stack which is pretty large. Try changing that to a dynamic allocation.

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Windows WriteFile and ReadFile interferes with each other - windows

Communication through serial port is slow. You should use WriteFileEx instead of WriteFile. You can specify a routine which may execute when the writting(transmitting) is completed or failed. If you write next data after that routine has been executed, overflow of data may not be occured.

Related

STM32L486 enable LPUART1 with DMA

i.MX8 mini GPT implementation issue

Deepstream back to back detectors with DashCamNetand VehicleMakeNet not classifying correctly

'scull' device from ldd3 is not shown under /dev/

OpenCL stackoverflow. How to solve it?

Categories

Resources