Issues with a cuda program - visual-studio-2010

I am writing a simple cuda program where I am creating a 2D array in the device and then I am doing very basic operation in the kernel function and after the operation I am copying it back to a 2D array of the host. I wrote this code after following several threads of stackoverlow and also cuda forum. I followed what was suggested but the output of the code I am getting is 0 whereas I am expecting an output of 10 for all the members of the array. I am posting my code below :
__global__ void test_kernel(int *dev_ptr[])
{
int tidx = threadIdx.x;
int tidy = threadIdx.y;
dev_ptr[tidx][tidy] = dev_ptr[tidx][tidy] +10;
}
int main(int argc,char *argv[])
{
int env_end =50;
int **h_ptr ;
int **d_ptr ;
int **env_t;
int i,k,j;
/************************************************************************/
/* cpu
/************************************************************************/
env_t =(int **) malloc(env_end * sizeof *env_t);
for(k=0;k<env_end;k++)
{env_t[k] = (int *)malloc(env_end* env_end* sizeof *env_t[0]);
}
for (k = 1; k < env_end; ++k)
env_t[k] = env_t[k - 1] + env_end;
memset(*env_t, 0, env_end * env_end* sizeof **env_t);
for (i=0;i<env_end;i++)
{ for(j=0;j<env_end;j++)
{printf("%d\t",env_t[i][j]); }
if (j==env_end-1)
{printf("\n"); }
}
/************************************************************************/
/* gpu
/************************************************************************/
h_ptr = (int **)malloc(env_end*sizeof(int *));
for (i=0;i<env_end;i++)
{ cudaMalloc((void **)&h_ptr[i],env_end*sizeof(int));
cudaMemcpy(h_ptr[i],&env_t[i][0],env_end*sizeof(int),cudaMemcpyHostToDevice);
}
cudaMalloc((void ***)d_ptr,env_end*sizeof(int));
cudaMemcpy(d_ptr,h_ptr,env_end*sizeof(int),cudaMemcpyHostToDevice);
/************************************************************************/
/* kernel function and declaration
/************************************************************************/
dim3 blockDim(env_end,env_end,1);
test_kernel<<<1,blockDim>>>(d_ptr);
/************************************************************************/
/* Copying data back to host
************************************************************************/
for (i=0;i<env_end;i++)
{cudaMemcpy(env_t[i],h_ptr[i],env_end*sizeof(int),cudaMemcpyDeviceToHost);
}
for (i=0;i<env_end;i++)
{ for(j=0;j<env_end;j++)
{printf("%d\t",env_t[i][j]); }
if (j==env_end-1)
{printf("\n"); }
}
/************************************************************************/
/* Freeing the memory locations
/************************************************************************/
for (i=0;i<env_end;i++)
{cudaFree(h_ptr[i]);
}
cudaFree(d_ptr);
free(h_ptr);
for (i=0;i<env_end;i++)
{ free(env_t[i]);
}
free(env_t);
}
One more thing is that I am writing the code in MS visual studio 2010 and I am getting a debug assertion failed notification. I am not sure what I have done wrong and why this notification is coming. Thanks for all your help.

There are a few issues with this code. including:
Size mismatch between h_ptr, env_t, and d_ptr.
Use & instead of (***void) for cudaMalloc.
Do not allocate the host memory by cudaMalloc.
Optimization: 2D memory is allocated non-consequently in global memory.
Allocate 1D memory and refer to it as 2D.
Here is the full code:
#include <stdio.h>
#define SIZE 10
#define INDEX(i,j,k) i*k+j
__global__ void test_kernel(int *dev_ptr, int row_size)
{
int tidx = threadIdx.x;
int tidy = threadIdx.y;
dev_ptr[INDEX(tidx,tidy,row_size)] = dev_ptr[INDEX(tidx,tidy,row_size)] +10;
}
int main(int argc,char *argv[])
{
int env_end =SIZE;
int *d_ptr=NULL;
int *env_t;
int i,j;
/************************************************************************/
// cpu
/************************************************************************/
env_t =(int *) malloc(env_end * env_end * sizeof(int));
memset(env_t, 0, env_end * env_end* sizeof(int));
printf("Input Array:\n");
for (i=0;i<env_end;i++)
{ for(j=0;j<env_end;j++)
{printf("%d\t",env_t[INDEX(i,j,env_end)]); }
printf("\n");
}
printf("\n");
/************************************************************************/
// gpu
/************************************************************************/
cudaMalloc(&d_ptr,env_end*env_end*sizeof(int));
cudaMemcpy(d_ptr,env_t,env_end*env_end*sizeof(int),cudaMemcpyHostToDevice);
/************************************************************************/
// kernel function and declaration
/************************************************************************/
dim3 blckDim(env_end,env_end,1);
test_kernel<<<1,blckDim>>>(d_ptr, env_end);
/************************************************************************/
// Copying data back to host
/************************************************************************/
cudaMemcpy(env_t,d_ptr,env_end*env_end*sizeof(int),cudaMemcpyDeviceToHost);
printf("Output Array:\n");
for (i=0;i<env_end;i++)
{ for(j=0;j<env_end;j++)
{printf("%d\t",env_t[INDEX(i,j,env_end)]); }
printf("\n");
}
printf("\n");
/************************************************************************/
// Freeing the memory locations
/************************************************************************/
cudaFree(d_ptr);
free(env_t);
}

Related

Create an IOMMU entry in Linux

I've been browsing through the Linux IOMMU code for quite a while now and couldn't find an easy approach to directly create an IOMMU entry.
I want to specify the physical address (maybe also the virtual but it is not necessary) and the device. The range should be inserted into the IOMMU and the virt address printed through printk.
I am searching for a function that lets me easily do it.
Thanks
I ended up with a pretty hacky solution, not the optimal one, but it worked for my usecase. Adjusted the function iommu_dma_map_page in dma-iommu.c to look like the following and export it.
(vanilla 5.18 except for this modification)
dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
bool coherent = dev_is_dma_coherent(dev);
int prot = dma_info_to_prot(dir, coherent, attrs);
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
dma_addr_t iova, dma_mask = dma_get_mask(dev);
phys_addr_t phys;
if (page->flags == 0xF0F0F0F0F0F0F) {
phys = page->dma_addr;
} else {
phys = page_to_phys(page) + offset;
}
/*
* If both the physical buffer start address and size are
* page aligned, we don't need to use a bounce page.
*/
if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size, aligned_size;
aligned_size = iova_align(iovad, size);
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
iova_mask(iovad), dir, attrs);
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
/* Cleanup the padding area. */
padding_start = phys_to_virt(phys);
padding_size = aligned_size;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
padding_start += size;
padding_size -= size;
}
memset(padding_start, 0, padding_size);
}
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
return iova;
}
EXPORT_SYMBOL(iommu_dma_map_page);
Then use the following kernel module to program the entry. This could be also extended and programmed in a more usable manner, but for prototyping, it should be enough.
#include <linux/init.h>
#include <asm/io.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/pci.h>
extern dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs);
int magic_value = 0xF0F0F0F0F0F0F;
struct page page_ = {
.flags = 0xF0F0F0F0F0F0F,
.dma_addr = 0x0000002f000f0000,
};
static int my_init(void)
{
dma_addr_t dma_addr;
struct pci_dev *dummy = pci_get_device(0x10EE, 0x0666, NULL);
if (dummy != NULL)
{
printk(KERN_INFO "module loaded.\n");
dma_addr = iommu_dma_map_page(&(dummy->dev), &page_, 0, 4096, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
printk(KERN_INFO "DMA_addr: %llx", dma_addr);
}
else
{
printk("Error getting device");
}
return 0;
}
static void my_exit(void)
{
printk(KERN_INFO "iommu_alloc unloaded.\n");
return;
}
module_init(my_init);
module_exit(my_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("benedict.schlueter#inf.ethz.ch");
MODULE_DESCRIPTION("Alloc IOMMU entry");

std::atomic on struct bit-fields

I'm modifying some existing open source library and there is a struct (say named as Node) containing bit-fields, e.g.
struct Node {
std::atomic<uint32_t> size:30;
std::atomic<uint32_t> isnull:1;
};
To fit my needs, these fields need to be atomic so I was expecting to use std::atomic for this and faced compile time error:
bit-field 'size' has non-integral type 'std::atomic<uint32_t>'
According to documentation, there is a restricted set of types which can be used for std::atomic
Can anyone advise/have idea on how to get functionality of atomic fields with the minimum impact to the existing source code?
Thanks in advance!
I used an unsigned short as an example below.
This is less ideal, but you could sacrifice 8 bits and insert a std::atomic_flag in the bit field with a union. Unfortunately, std::atomic_flag type is a std::atomic_bool type.
This structure can be spin locked manually every time you access it. However, the code should have minimal performance degradation (unlike creating, locking, unlocking, destroying with a std::mutex and std::unique_lock).
This code may waste about 10-30 clock cycles to enable low cost multi-threading.
PS. Make sure the reserved 8 bits below are not messed up by the endian structure of the processor. You may have to define at the end for big-endian processors. I only tested this code on an Intel CPU (always little-endian).
#include <iostream>
#include <atomic>
#include <thread>
union Data
{
std::atomic_flag access = ATOMIC_FLAG_INIT; // one byte
struct
{
typedef unsigned short ushort;
ushort reserved : 8;
ushort count : 4;
ushort ready : 1;
ushort unused : 3;
} bits;
};
class SpinLock
{
public:
inline SpinLock(std::atomic_flag &access, bool locked=true)
: mAccess(access)
{
if(locked) lock();
}
inline ~SpinLock()
{
unlock();
}
inline void lock()
{
while (mAccess.test_and_set(std::memory_order_acquire))
{
}
}
// each attempt will take about 10-30 clock cycles
inline bool try_lock(unsigned int attempts=0)
{
while(mAccess.test_and_set(std::memory_order_acquire))
{
if (! attempts) return false;
-- attempts;
}
return true;
}
inline void unlock()
{
mAccess.clear(std::memory_order_release);
}
private:
std::atomic_flag &mAccess;
};
void aFn(int &i, Data &d)
{
SpinLock lock(d.access, false);
// manually locking/unlocking can be tighter
lock.lock();
if (d.bits.ready)
{
++d.bits.count;
}
d.bits.ready ^= true; // alternate each time
lock.unlock();
}
int main(void)
{
Data f;
f.bits.count = 0;
f.bits.ready = true;
std::thread *p[8];
for (int i = 0; i < 8; ++ i)
{
p[i] = new std::thread([&f] (int i) { aFn(i, f); }, i);
}
for (int i = 0; i < 8; ++i)
{
p[i]->join();
delete p[i];
}
std::cout << "size: " << sizeof(f) << std::endl;
std::cout << "count: " << f.bits.count << std::endl;
}
The result is as expected...
size: 2
count: 4

LINUX KERNEL driver hangs/freeze after handling mapped register

I'm completely new developing in LINUX kernel, and I'm having some problems in a new LINUX driver I'm developing.
After I map NXP PWM registers using ioremap()/ioremap_nocache() and then I try to write to the register mappend my system hags/freeze.
Could you please help me understanding what it is happening?
My Driver is this:
#include <linux/device.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kthread.h> // for threads
#include <linux/fs.h>
#include <linux/sched.h> // for task_struct
#include <linux/delay.h> // for ndelay
#include <linux/uaccess.h> // Required for the copy to user function
#include <asm/io.h> // for ioremap()
#include <linux/interrupt.h>
#include <linux/gpio.h>
#define MX3_PWMCR_PRESCALER(x) ((((x) - 1) & 0xFFF) << 4)
#define DEVICE_NAME "pwm_cus_drv"
#define CLASS_NAME "pwm_custom_driver"
static volatile void __iomem *mmio_pwm1_base = NULL;
static volatile void __iomem *mmio_pwm2_base = NULL;
static int majorNumber;
static struct class* vfd_char_dev_class = NULL;
static struct device* vfd_char_dev = NULL;
static struct device_driver vfd_driver;
static int dev_open(struct inode *inodep, struct file *file_ptr);
static int dev_release(struct inode *inodep, struct file *file_ptr);
static ssize_t dev_read(struct file *file_ptr, char *buffer, size_t len, loff_t *offset);
static ssize_t dev_write(struct file *file_ptr, char *buffer, size_t len, loff_t *offset);
static long dev_ioctl(struct file *file_ptr, unsigned int cmd, unsigned long arg);
static irqreturn_t pwm_imx_futaba_isr(int irq, void *dev_id);
static bool Initialize_PWM_Signals(void);
static void pwm_init(void);
/**
* ISR used to attend PWM rising edge interrupt activation.
*/
irqreturn_t pwm_imx_futaba_isr(int irq, void *dev_id)
{
if(NULL != mmio_pwm1_base)
{
writel(0x00000078, mmio_pwm1_base + 0x04);
gpio_set_value(47, 1);
gpio_set_value(47, 0);
}
return IRQ_HANDLED;
}
/**
*
*/
bool Initialize_PWM_Signals(void)
{
u32 cs_pin_dir_value = 0;
u32 cs_pin_out_value = 0;
u32 duty_cycles = 0;
u32 period_cycles = 0;
u32 cr_1 = 0;
u32 cr_2 = 0;
pwm_init();
period_cycles = ((24000000)/(4000)) - 2; /* 4 KHz */
duty_cycles = period_cycles / 2; /* duty = 50% */
printk(KERN_NOTICE "PWM data. PERIOD[%d] DUTY[%d]\n", period_cycles, duty_cycles);
cr_1 = MX3_PWMCR_PRESCALER(1) | (1 << 24) | (1 << 23) | (2 << 16);
cr_2 = MX3_PWMCR_PRESCALER(1) | (1 << 24) | (1 << 23) | (2 << 16);
printk(KERN_NOTICE "Disabling IMX6UL PWMs \n");
/*******************************/
/* AFTER THIS, THE KERNEL HANGS*/
/*******************************/
writel(cr_1, mmio_pwm1_base + 0x00);
writel(cr_2, mmio_pwm2_base + 0x00);
printk(KERN_NOTICE "PWMs disabled\n");
if (1)
{
/* Configure IMX6UL PWM1 */
printk(KERN_NOTICE " Configuring PWM1 \n");
writel(duty_cycles, mmio_pwm1_base + 0x0C);
writel(period_cycles, mmio_pwm1_base + 0x10);
/* Configure IMX6UL PWM2 */
printk(KERN_NOTICE " Configuring PWM2 \n");
writel(duty_cycles, mmio_pwm2_base + 0x0C);
writel(period_cycles, mmio_pwm2_base + 0x10);
cr_1 |= (1 << 0);
cr_2 |= (1 << 0);
printk(KERN_NOTICE "Enabling IRQs !!\n");
writel(0x00000002, mmio_pwm1_base + 0x08);
/* Enabling IMX6UL PWMs */
printk(KERN_NOTICE " Enabling PWMs \n");
writel(cr_1, mmio_pwm1_base + 0x00);
writel(cr_2, mmio_pwm2_base + 0x00);
}
return 0;
}
/**
*
*/
int dev_open(struct inode *inodep, struct file *file_ptr)
{
printk(KERN_NOTICE "\n[%s]\n", __func__);
Initialize_PWM_Signals();
printk(KERN_NOTICE "[%s] Driver initialized \n", __func__);
}
/**
*
*/
int dev_release(struct inode *inodep, struct file *file_ptr)
{
printk(KERN_NOTICE "\n[%s]\n", __func__);
}
/**
*
*/
ssize_t dev_read(struct file *file_ptr, char *buffer, size_t len, loff_t *offset)
{
printk(KERN_NOTICE "\n[%s]\n", __func__);
}
/**
*
*/
ssize_t dev_write(struct file *file_ptr, char *buffer, size_t len, loff_t *offset)
{
printk(KERN_NOTICE "\n[%s]\n", __func__);
}
/**
*
*/
long dev_ioctl(struct file *file_ptr, unsigned int cmd, unsigned long arg)
{
printk(KERN_NOTICE "\n[%s]\n", __func__);
}
/**
*
*/
void pwm_init(void)
{
printk(KERN_ALERT "[%s]\n", __func__);
if(NULL != request_mem_region(0x2080000, 0x4000, DEVICE_NAME))
{
mmio_pwm1_base = ioremap_nocache(0x2080000, 0x4000);
if(IS_ERR(mmio_pwm1_base))
{
printk(KERN_NOTICE "Failed to map memory 1\n");
}
}
else
{
printk(KERN_NOTICE "Failed to map memory 2\n");
}
if(NULL != request_mem_region(0x2084000, 0x4000, DEVICE_NAME))
{
mmio_pwm2_base = ioremap_nocache(0x2084000, 0x4000);
if(IS_ERR(mmio_pwm2_base))
{
printk(KERN_NOTICE "Failed to map memory 3\n");
}
}
else
{
printk(KERN_NOTICE "Failed to map memory 4\n");
}
printk(KERN_NOTICE "PWMs memory mapped \n");
}
static const struct file_operations fops =
{
.owner = THIS_MODULE,
.open = dev_open,
.read = dev_read,
.write = dev_write,
.release = dev_release,
.unlocked_ioctl = dev_ioctl,
.compat_ioctl = dev_ioctl,
};
struct bus_type futaba_bus_type =
{
.name = DEVICE_NAME,
};
static int pwm_driver_init(void)
{
unsigned irqflags = 0;
unsigned ret = 0;
const char *dev_name = "pwm1_irq";
u32 pwm_irq = 25;
majorNumber = register_chrdev(0, DEVICE_NAME, &fops);
if (majorNumber < 0)
{
printk(KERN_NOTICE "EBBChar failed to register a major number\n");
return majorNumber;
}
vfd_char_dev_class = class_create(THIS_MODULE, CLASS_NAME);
if (IS_ERR(vfd_char_dev_class))
{
unregister_chrdev(majorNumber, DEVICE_NAME);
printk(KERN_NOTICE "Failed to register device class\n");
return PTR_ERR(vfd_char_dev_class);
}
vfd_char_dev = device_create(vfd_char_dev_class, NULL, MKDEV(majorNumber, 0), NULL, DEVICE_NAME);
if (IS_ERR(vfd_char_dev))
{
class_destroy(vfd_char_dev_class);
unregister_chrdev(majorNumber, DEVICE_NAME);
printk(KERN_ALERT "Failed to create the device\n");
return PTR_ERR(vfd_char_dev);
}
ret = request_irq(pwm_irq, pwm_imx_futaba_isr, irqflags, dev_name, DEVICE_NAME);
if (0 != ret)
{
printk(KERN_NOTICE "can't get irq: %d\n", ret);
}
return 0;
}
static void pwm_driver_exit(void)
{
device_destroy(vfd_char_dev_class, MKDEV(majorNumber, 0));
class_unregister(vfd_char_dev_class);
class_destroy(vfd_char_dev_class);
unregister_chrdev(majorNumber, DEVICE_NAME);
iounmap(mmio_pwm1_base);
iounmap(mmio_pwm2_base);
}
module_init(pwm_driver_init);
module_exit(pwm_driver_exit);
MODULE_AUTHOR("New Drivers developer");
MODULE_DESCRIPTION(" PWM Handler ");
MODULE_LICENSE("GPL");
Thanks for all your answers. I solved this issue after using some NXP recommendations. At the end the problem was caused due to the IPG/PER clocks were not enabled. According NXP, clocks shall be enabled first before starting to modify registers associated with the module(in my case for PWM).
Typical reason of hang at register access is that hardware module owning the register is either powered down, or not clocked.
Per imx6ul device tree from mainline kernel (arch/arm/boot/dts/imx6ul.dtsi), there are clocks to enable:
pwm1: pwm#2080000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02080000 0x4000>;
interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM1>,
<&clks IMX6UL_CLK_PWM1>;
clock-names = "ipg", "per";
#pwm-cells = <2>;
status = "disabled";
};
By the way, driver for this module is available, drivers/pwm/pwm-imx.c

There is no entry for device under /dev even after class_create and device_create

I am making one simple char driver and I learnt that there are 2 ways I can get Major number for my driver to pair with - alloc_chrdev_region(and register_chrdev_region) and register_chrdev. I initially started with register_chrdev and it gave me my major number and also created entry in /dev (class and device create used).
But when I change for register_chrdev to alloc_chrdev_region to acquire major number (using chrdev_init and chrdev_add), leaving rest of the entry function same, I don't see an entry in /dev, though when I make it manually with mknode, and run the test application to use the driver, it works fine.
Below is the code of entry point that does not produce the /dev entry
#include<linux/module.h>
#include<linux/init.h>
#include<linux/fs.h>
#include<linux/device.h>
#include<linux/kernel.h>
#include<linux/slab.h>
#include<linux/uaccess.h>
#include<linux/stat.h>
#include<linux/cdev.h>
#include <linux/version.h>
#include <linux/types.h>
#include <linux/kdev_t.h>
#define DEVICE_NAME "myCharDevice"
#define MODULE_NAME "myCharDriver"
#define CLASS_NAME "myCharClass"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("YASH BHATT");
MODULE_VERSION(".01");
static char *bufferMemory;
static int bufferPointer;
static int bufferSize = 15;
static dev_t myChrDevid;
static struct cdev *myChrDevCdev;
static struct class *pmyCharClass;
static struct device *pmyCharDevice;
int majorNumber = 0;
static int charDriverOpen(struct inode *inodep, struct file *filep);
static int charDriverClose(struct inode *inodep, struct file *filep);
static ssize_t charDriverWrite(struct file *filep, const char *buffer, size_t len, loff_t *offset);
static ssize_t charDriverRead(struct file *filep, char *buffer, size_t len, loff_t *offset);
static int charDriverEntry(void);
static void charDriverExit(void);
static ssize_t attrShowData(struct device*, struct device_attribute*, char*);
static ssize_t attrStoreData(struct device*, struct device_attribute*, const char*, size_t);
static ssize_t attrShowBuffer(struct device*, struct device_attribute*, char*);
static ssize_t attrStoreBuffer(struct device*, struct device_attribute*, const char*, size_t);
/* The following function is called when the file placed on the sysfs is accessed for read*/
static ssize_t attrShowData(struct device* pDev, struct device_attribute* attr, char* buffer)
{
printk(KERN_INFO "MESG: The data has been accessed through the entry in sysfs\n");
if (bufferPointer == 0)
{
printk(KERN_WARNING "Thre is no data to read from buffer!\n");
return -1;
}
strncpy(buffer, bufferMemory, bufferPointer);
/* Note : Here we can directly use strncpy because we are already in kernel space and do not need to translate address*/
return bufferPointer;
}
static ssize_t attrStoreData(struct device* pDev, struct device_attribute* attr, const char* buffer, size_t length)
{
printk(KERN_INFO "Writing to attribute\n");
bufferPointer = length;
strncpy(bufferMemory, buffer, length);
return length;
}
static ssize_t attrShowBuffer(struct device* pDev, struct device_attribute* attr, char* buffer)
{
int counter;
int temp = bufferSize;
char bufferSizeArray[4] = {0};
counter = 3;
//printk(KERN_INFO "Buffer = %d\n",bufferSize % 10);
do
{
bufferSizeArray[counter] = '0' + (bufferSize % 10);
//printk(KERN_INFO "Character at %d is : %c\n",counter,bufferSizeArray[counter]);
bufferSize /= 10;
counter--;
}
while(counter != -1);
strncpy(buffer, bufferSizeArray, 4);
bufferSize = temp;
/* Note : Here we can directly use strncpy because we are already in kernel space and do not need to translate address*/
return 4;
}
static ssize_t attrStoreBuffer(struct device* pDev, struct device_attribute* attr, const char* buffer, size_t length)
{
int counter;
bufferPointer = length;
//printk(KERN_INFO "Length : %d With first char %c\n",length,buffer[0]);
bufferSize = 0;
for (counter = 0; counter < length-1 ; counter++)
{
bufferSize = (bufferSize * 10) + (buffer[counter] - '0') ;
}
//printk(KERN_INFO "Buffer size new : %d\n",bufferSize);
return length;
}
/* These macros converts the function in to instances dev_attr_<_name>*/
/* Defination of the macro is as follows : DEVICE_ATTR(_name, _mode, _show, _store) */
/* Note the actual implementation of the macro makes an entry in the struct device_attribute. This macro does that for us */
static DEVICE_ATTR(ShowData, S_IRWXU, attrShowData, attrStoreData); // S_IRUSR gives read access to the user
static DEVICE_ATTR(Buffer, S_IRWXU, attrShowBuffer, attrStoreBuffer); // S_IRUSR gives read access to the user
static struct file_operations fops =
{
.open = charDriverOpen,
.release = charDriverClose,
.read = charDriverRead,
.write = charDriverWrite,
};
static int __init charDriverEntry()
{
int returnValue;
//majorNumber = register_chrdev(0, DEVICE_NAME, &fops);
returnValue = alloc_chrdev_region(&myChrDevid, 0, 1, DEVICE_NAME);
/* This function takes 4 arguments - dev_t address, start of minor number, range/count of minor number, Name; Note - unlike register_chrdev fops have not
yet been tied to the major number */
if (returnValue < 0)
{
printk(KERN_ALERT "ERROR : can not aquire major number! error %d",returnValue);
return -1;
}
printk(KERN_INFO "Aquired Major Number! : %d\n", MAJOR(myChrDevid));
//cdev_init(&myChrDevCdev,&fops);
myChrDevCdev = cdev_alloc();
if (IS_ERR(myChrDevCdev))
{
printk(KERN_ALERT "Failed to allocate space for CharDev struct\n");
unregister_chrdev_region(myChrDevid, 1);
return -1;
}
cdev_init(myChrDevCdev,&fops);
myChrDevCdev->owner = THIS_MODULE;
//myChrDevCdev->ops = &fops;/* this function inits the c_dev structure with memset 0 and then does basic konject setup and then adds fops to cdev struct*/
/* this function adds the cdev to the kernel structure so that it becomes available for the users to use it */
// Now we will create class for this device
pmyCharClass = class_create(THIS_MODULE,CLASS_NAME);
if (IS_ERR(pmyCharClass))
{
printk(KERN_ALERT "Failed to Register Class\n");
cdev_del(myChrDevCdev);
kfree(myChrDevCdev);
unregister_chrdev_region(myChrDevid, 1);
return -1;
}
printk(KERN_INFO "Class created!\n");
pmyCharDevice = device_create(pmyCharClass, NULL, MKDEV(majorNumber,0),NULL,DEVICE_NAME);
if (IS_ERR(pmyCharDevice))
{
printk(KERN_ALERT "Failed to Register Class\n");
class_unregister(pmyCharClass);
class_destroy(pmyCharClass);
cdev_del(myChrDevCdev);
kfree(myChrDevCdev);
unregister_chrdev_region(myChrDevid, 1);
return -1;
}
printk(KERN_INFO "Device created!\n");
returnValue = cdev_add(myChrDevCdev, myChrDevid, 1);
if (returnValue < 0)
{
printk(KERN_ALERT "Failed to add chdev \n");
return -1;
}
/* We now have created the class and we have aquired major numer. But we have not yet tied out created fileops with anything.
We will do that now */
//returnValue = cdev_init(cdev)
printk(KERN_INFO "Now We will create the attribute entry in sysfs\n");
/* the function used is device_create_file(struct device *, struct device_attribute*) */
device_create_file(pmyCharDevice, &dev_attr_ShowData); // The second argumnet is the structure created by the DEVICE_ATTR macro
device_create_file(pmyCharDevice, &dev_attr_Buffer);
return 0;
}
static void __exit charDriverExit()
{
device_remove_file(pmyCharDevice, &dev_attr_Buffer);
device_remove_file(pmyCharDevice, &dev_attr_ShowData);
device_destroy(pmyCharClass, MKDEV(majorNumber,0));
class_unregister(pmyCharClass);
class_destroy(pmyCharClass);
//unregister_chrdev(majorNumber,DEVICE_NAME);
cdev_del(myChrDevCdev);
unregister_chrdev_region(myChrDevid, 1);
kfree(myChrDevCdev);
printk(KERN_INFO "Unmounting module done !\n");
}
static int charDriverOpen(struct inode *inodep, struct file *filep)
{
if ((filep->f_flags & O_ACCMODE) != O_RDWR)
{
printk(KERN_ALERT "WARNING : This driver can only be opened in both read and write mode\n");
return -1;
}
printk(KERN_INFO "INFO : CHARATER DRIVER OPENED\n");
bufferMemory = kmalloc(bufferSize,GFP_KERNEL);
bufferPointer = 0;
return 0;
}
static int charDriverClose(struct inode *inodep, struct file *filep)
{
kfree(bufferMemory);
printk(KERN_INFO "INFO : CHARACTER DRIVER CLOSED\n");
return 0;
}
static ssize_t charDriverWrite(struct file *filep, const char *buffer, size_t len, loff_t *offset)
{
// Here we will only allow to write one byte of data
if (len > bufferSize)
{
printk(KERN_WARNING "Attempted to write data larger than 15 byte!\n");
return 0;
}
//bufferMemory[bufferPointer] = *buffer;
copy_from_user(bufferMemory, buffer, len);
bufferPointer += len;
return len;
}
static ssize_t charDriverRead(struct file *filep, char *buffer, size_t len, loff_t *offset)
{
if(len > bufferSize || len > bufferPointer)
{
printk(KERN_WARNING "Attempting to read more than buffer size ! Deny\n");
return 0;
}
copy_to_user(buffer, bufferMemory, len);
// buffer[0] = bufferMemory[0];
bufferPointer -= len;
return len;
}
module_init(charDriverEntry);
module_exit(charDriverExit);
module_param(bufferSize, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(bufferSize, "Buffer Memory Size [15]");
Now if I replace the while alloc_chrdev_region, cdev_init and cdev_add with just register_chrdev(), The entry in /dev pops up. I am unable to figure out what more does register_chrdev() do that the former combination does not.
Thank you
Edit : Found the issue.
it was due to using MKDEV(majorNumber, 0); Without actually storing major number in the majorNumber variable using MAJOR();
Not deleting the question as someone can find it useful

What is the modern version of `procfs1.c` for Linux 2.6.x kernels?

I have been following the Linux 2.6 Kernel Module Programming Guide, when I ran into the first example from Chapter 5, called procfs1.c.
It would not compile out of the box, and after checking various related questions, it still took me quite some time to figure out the correct changes to make it compile and work as intended.
Therefore, I am posting my updated code for people in the future. I am running kernel 2.6.32-279.el6.x86_64.
Here is an updated version of the example that works with kernel version 2.6.32-279.el6.x86_64.
/*
* procfs1.c - create a "file" in /proc
*
*/
#include <linux/module.h> /* Specifically, a module */
#include <linux/kernel.h> /* We're doing kernel work */
#include <linux/proc_fs.h> /* Necessary because we use the proc fs */
#define procfs_name "helloworld"
/**
* This structure hold information about the /proc file
*
*/
struct proc_dir_entry *Our_Proc_File;
static ssize_t procfile_read(struct file *filp,
char *buffer,
size_t length,
loff_t * offset)
{
int ret;
printk(KERN_INFO "procfile_read (/proc/%s) called\n", procfs_name);
if (*offset > 0) {
/* we have finished to read, return 0 */
ret = 0;
} else {
/* fill the buffer, return the buffer size */
ret = sprintf(buffer, "HelloWorld!\n");
*offset = ret;
}
return ret;
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.read = procfile_read,
};
int init_module()
{
Our_Proc_File = proc_create(procfs_name, S_IFREG | S_IRUGO, NULL, &fops);
if (Our_Proc_File == NULL) {
remove_proc_entry(procfs_name, NULL);
printk(KERN_ALERT "Error: Could not initialize /proc/%s\n",
procfs_name);
return -ENOMEM;
}
Our_Proc_File->uid = 0;
Our_Proc_File->gid = 0;
Our_Proc_File->size = 37;
printk(KERN_INFO "/proc/%s created\n", procfs_name);
return 0; /* everything is ok */
}
void cleanup_module()
{
remove_proc_entry(procfs_name, NULL);
printk(KERN_INFO "/proc/%s removed\n", procfs_name);
}

Resources