Using an old device file for char device driver - linux-kernel

I have two questions as I'm trying device drivers as a beginner.
I created one module , loaded it, it dynamically took major number 251 say. Number of minor devices is kept 1 only i.e minor number 0. For testing , I tried echo and cat on the device file (created using mknod) and it works as expected. Now if I unload the module but don't remove /dev entry and again load the module with same major number and try writing/reading to same device file which was used previously, kernel crashes. I know we shouldn't do this but just want to understand what happens in this scenario which causes this crash. I think something that VFS does.
When I do cat on device file, the read keeps on happening indefinitely. why? To stop that needed to use offset manipulation. This looks to be because buffer length is coming as 32768 as default to read?
EDIT: further in this I added one ioctl function as below, then I'm getting error regarding the storage class of init and cleanup function, which work well if no ioctl is defined. Not getting the link between ioctl and the init/cleanup functions' storage class. Updated code is posted. Errors are below:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:95:12: error: invalid storage class for function ‘flow_init’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c: In function ‘flow_init’:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:98:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c: In function ‘flow_ioctl’:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:112:13: error: invalid storage class for function ‘flow_terminate’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:119:1: error: invalid storage class for function ‘__inittest’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:119:1: warning: ‘alias’ attribute ignored [-Wattributes]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: error: invalid storage class for function ‘__exittest’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: warning: ‘alias’ attribute ignored [-Wattributes]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: error: expected declaration or statement at end of input
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c: At top level:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:73:13: warning: ‘flow_ioctl’ defined but not used [-Wunused-function]
Below is the code:
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/cdev.h>
#include <linux/kdev_t.h>
#include <linux/errno.h>
#include <linux/ioctl.h>
#define SUCCESS 0
#define BUF_LEN 80
#define FLOWTEST_MAGIC 'f'
#define FLOW_QUERY _IOR(FLOWTEST_MAGIC,1,int)
MODULE_LICENSE("GPL");
int minor_num=0,i;
int num_devices=1;
int fopen=0,counter=0,ioctl_test;
static struct cdev ms_flow_cd;
static char c;
///// Open , close and rest of the things
static int flow_open(struct inode *f_inode, struct file *f_file)
{
printk(KERN_ALERT "flowtest device: OPEN\n");
return SUCCESS;
}
static ssize_t flow_read(struct file *f_file, char __user *buf, size_t
len, loff_t *off)
{
printk(KERN_INFO "flowtest Driver: READ()\nlength len=%d, Offset = %d\n",len,*off);
/* Check to avoid the infinitely printing on screen. Return 1 on first read, and 0 on subsequent read */
if(*off==1)
return 0;
printk(KERN_INFO "Copying...\n");
copy_to_user(buf,&c,1);
printk(KERN_INFO "Copied : %s\n",buf);
*off = *off+1;
return 1; // Return 1 on first read
}
static ssize_t flow_write(struct file *f_file, const char __user *buf,
size_t len, loff_t *off)
{
printk(KERN_INFO "flowtest Driver: WRITE()\n");
if (copy_from_user(&c,buf+len-2,1) != 0)
return -EFAULT;
else
{
printk(KERN_INFO "Length len = %d\n\nLast character written is - %c\n",len,*(buf+len-2));
return len;
}
}
static int flow_close(struct inode *i, struct file *f)
{
printk(KERN_INFO "ms_tty Device: CLOSE()\n");
return 0;
}
///* ioctl commands *///
static long flow_ioctl (struct file *filp,unsigned int cmd, unsigned long arg)
{
switch(cmd) {
case FLOW_QUERY:
ioctl_test=51;
return ioctl_test;
default:
return -ENOTTY;
}
///////////////////File operations structure below/////////////////////////
struct file_operations flow_fops = {
.owner = THIS_MODULE,
.llseek = NULL,
.read = flow_read,
.write = flow_write,
.unlocked_ioctl = flow_ioctl,
.open = flow_open,
.release = flow_close
};
static int flow_init(void)
{
printk(KERN_ALERT "Here with flowTest module ... loading...\n");
int result=0;
dev_t dev=0;
result = alloc_chrdev_region(&dev, minor_num,
num_devices,"mod_flowtest"); // allocate major number dynamically.
i=MAJOR(dev);
printk(KERN_ALERT "Major allocated = %d",i);
cdev_init(&ms_flow_cd,&flow_fops);
cdev_add(&ms_flow_cd,dev,1);
return 0;
}
static void flow_terminate(void)
{
dev_t devno=MKDEV(i,0); // wrap major/minor numbers in a dev_t structure , to pass for deassigning.
printk(KERN_ALERT "Going out... exiting...\n");
unregister_chrdev_region(devno,num_devices); //remove entry from the /proc/devices
}
module_init(flow_init);
module_exit(flow_terminate);

1- You're missing cdev_del() in your cleanup function. Which means the device stays registered, but the functions to handle it are unloaded, thus the crash. Also, cdev_add probably fails on the next load, but you don't know because you're not checking return values.
2- It looks ok... you modify offset, return the correct number of bytes, and then return 0 if offset is 1, which indicates EOF. But you should really check for *off >= 1.
EDIT-
The length passed into your read handler function comes all the way from user-land read(). If the user opens the device file and calls read(fd, buf, 32768);, that just means the user wants to read up to 32768 bytes of data. That length gets passed all the way to your read handler. If you don't have 32768 bytes of data to supply, you supply what you have, and return the length. Now, the user code isn't sure if that's the end of the file or not, so it tries for another 32768 read. You really have no data now, so you return 0, which tells the user code that it has hit EOF, so it stops.
In summary, what you're seeing as some sort of default value at the read handler is just the block size that the utility cat uses to read anything. If you want to see a different number show up at your read function, try using dd instead, since it lets you specify the block size.
dd if=/dev/flowtest of=/dev/null bs=512 count=1
In addition, this should read one block and stop, since you're specifying count=1. If you omit count=1, it will look more like cat, and try to read until EOF.

For 2, make sure you start your module as a char device when using mknod.
mknod /dev/you_device c major_number minor_number

Related

How to trigger fops poll function from the kernel driver

I am working on a kernel driver which logs some spi data in a virtual file using debugfs.
My main goal is to be able to "listen" for incomming data from userspace using for example $ tail -f /sys/kernel/debug/spi-logs which is using select to wait for new data on the debugfs file.
I've implemented the fops poll function in the driver and when I am trying to get the data from the userspace, the poll function is never called even though there is new data available in the kernel to be read.
I assume that the poll function never gets called because the debugfs file never gets actually written.
My question is, is there a way to trigger the poll function from the kernel space when new data is available?
EDIT: Added an example
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
#include <linux/wait.h>
#include <linux/poll.h>
struct module_ctx {
struct wait_queue_head wq;
};
struct module_ctx module_ctx;
static ssize_t debugfs_read(struct file *filp, char __user *buff, size_t count, loff_t *off)
{
// simulate no data left to read for now
return 0;
}
static __poll_t debugfs_poll(struct file *filp, struct poll_table_struct *wait) {
struct module_ctx *module_hdl;
__poll_t mask = 0;
module_hdl = filp->f_path.dentry->d_inode->i_private;
pr_info("CALLED!!!");
poll_wait(filp, &module_hdl->wq, wait);
if (is_data_available_from_an_external_ring_buffer())
mask |= POLLIN | POLLRDNORM;
return mask;
}
loff_t debugfs_llseek(struct file *filp, loff_t offset, int orig)
{
loff_t pos = filp->f_pos;
switch (orig) {
case SEEK_SET:
pos = offset;
break;
case SEEK_CUR:
pos += offset;
break;
case SEEK_END:
pos = 0; /* Going to the end => to the beginning */
break;
default:
return -EINVAL;
}
filp->f_pos = pos;
return pos;
}
static const struct file_operations debugfs_fops = {
.owner = THIS_MODULE,
.read = debugfs_read,
.poll = debugfs_poll,
.llseek = debugfs_llseek,
};
static int __init rb_example_init(void)
{
struct dentry *file;
init_waitqueue_head(&module_ctx.wq);
file = debugfs_create_file("spi_logs", 0666, NULL, &module_ctx,
&debugfs_fops);
if (!file) {
pr_err("qm35: failed to create /sys/kernel/debug/spi_logs\n");
return 1;
}
return 0;
}
static void __exit
rb_example_exit(void) {
}
module_init(rb_example_init);
module_exit(rb_example_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mihai Pop");
MODULE_DESCRIPTION("A simple example Linux module.");
MODULE_VERSION("0.01");
Using tail -f /sys/kernel/debug/spi_logs, the poll function never gets called
Semantic of poll is to return whenever encoded operations (read and/or write) on a file would return without block. In case of read operation, "block" means:
If read is called in nonblocking mode (field f_flags of the struct file has flag O_NONBLOCK set), then it returns -EAGAIN.
If read is called in blocking mode, then it puts a thread into the waiting state.
As you can see, your read function doesn't follow that convention and returns 0, which means EOF. So the caller has no reason to call poll after that.
Semantic of -f option for tail:
... not stop when end of file is reached, but rather to wait ...
is about the situation, when read returns 0, but the program needs to wait.
As you can see, poll semantic is not suitable for such wait. Instead, such programs use inotify mechanism.

How to dump/list all kernel symbols with addresses from Linux kernel module?

In a kernel module, how to list all the kernel symbols with their addresses?
The kernel should not be re-compiled.
I know "cat /proc/kallsyms" in an interface, but how to get them directly from kernel data structures, using functions like kallsyms_lookup_name.
Example
Working module code:
#include <linux/module.h>
#include <linux/kallsyms.h>
static int prsyms_print_symbol(void *data, const char *namebuf,
struct module *module, unsigned long address)
{
pr_info("### %lx\t%s\n", address, namebuf);
return 0;
}
static int __init prsyms_init(void)
{
kallsyms_on_each_symbol(prsyms_print_symbol, NULL);
return 0;
}
static void __exit prsyms_exit(void)
{
}
module_init(prsyms_init);
module_exit(prsyms_exit);
MODULE_AUTHOR("Sam Protsenko");
MODULE_DESCRIPTION("Module for printing all kernel symbols");
MODULE_LICENSE("GPL");
Explanation
kernel/kallsyms.c implements /proc/kallsyms. Some of its functions are available for external usage. They are exported via EXPORT_SYMBOL_GPL() macro. Yes, your module should have GPL license to use it. Those functions are:
kallsyms_lookup_name()
kallsyms_on_each_symbol()
sprint_symbol()
sprint_symbol_no_offset()
To use those functions, include <linux/kallsyms.h> in your module. It should be mentioned that CONFIG_KALLSYMS must be enabled (=y) in your kernel configuration.
To print all the symbols you obviously have to use kallsyms_on_each_symbol() function. The documentation says next about it:
/* Call a function on each kallsyms symbol in the core kernel */
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
unsigned long), void *data);
where fn is your callback function that should be called for each symbol found, and data is a pointer to some private data of yours (will be passed as first parameter to your callback function).
Callback function must have next signature:
int fn(void *data, const char *namebuf, struct module *module,
unsigned long address);
This function will be called for each kernel symbol with next parameters:
data: will contain pointer to your private data you passed as last argument to kallsyms_on_each_symbol()
namebuf: will contain name of current kernel symbol
module: will always be NULL, just ignore that
address: will contain address of current kernel symbol
Return value should always be 0 (on non-zero return value the iteration through symbols will be interrupted).
Supplemental
Answering the questions in your comment.
Also, is there a way to output the size of each function?
Yes, you can use sprint_symbol() function I mentioned above to do that. It will print symbol information in next format:
symbol_name+offset/size [module_name]
Example:
psmouse_poll+0x0/0x30 [psmouse]
Module name part can be omitted if symbol is built-in.
I tried the module and see the result with "dmesg". But a lot of symbols are missing such as "futex_requeue". The output symbol number is about 10K, while it is 100K when I use "nm vmlinux".
This is most likely because your printk buffer size is insufficient to store all the output of module above.
Let's improve above module a bit, so it provides symbols information via miscdevice. Also let's add function size to the output, as requested. The code as follows:
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/sizes.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#define DEVICE_NAME "prsyms2"
/* 16 MiB is sufficient to store information about approx. 200K symbols */
#define SYMBOLS_BUF_SIZE SZ_16M
struct symbols {
char *buf;
size_t pos;
};
static struct symbols symbols;
/* ---- misc char device definitions ---- */
static ssize_t prsyms2_read(struct file *file, char __user *buf, size_t count,
loff_t *pos)
{
return simple_read_from_buffer(buf, count, pos, symbols.buf,
symbols.pos);
}
static const struct file_operations prsyms2_fops = {
.owner = THIS_MODULE,
.read = prsyms2_read,
};
static struct miscdevice prsyms2_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = DEVICE_NAME,
.fops = &prsyms2_fops,
};
/* ---- module init/exit definitions ---- */
static int prsyms2_store_symbol(void *data, const char *namebuf,
struct module *module, unsigned long address)
{
struct symbols *s = data;
int count;
/* Append address of current symbol */
count = sprintf(s->buf + s->pos, "%lx\t", address);
s->pos += count;
/* Append name, offset, size and module name of current symbol */
count = sprint_symbol(s->buf + s->pos, address);
s->pos += count;
s->buf[s->pos++] = '\n';
if (s->pos >= SYMBOLS_BUF_SIZE)
return -ENOMEM;
return 0;
}
static int __init prsyms2_init(void)
{
int ret;
ret = misc_register(&prsyms2_misc);
if (ret)
return ret;
symbols.pos = 0;
symbols.buf = vmalloc(SYMBOLS_BUF_SIZE);
if (symbols.buf == NULL) {
ret = -ENOMEM;
goto err1;
}
dev_info(prsyms2_misc.this_device, "Populating symbols buffer...\n");
ret = kallsyms_on_each_symbol(prsyms2_store_symbol, &symbols);
if (ret != 0) {
ret = -EINVAL;
goto err2;
}
symbols.buf[symbols.pos] = '\0';
dev_info(prsyms2_misc.this_device, "Symbols buffer is ready!\n");
return 0;
err2:
vfree(symbols.buf);
err1:
misc_deregister(&prsyms2_misc);
return ret;
}
static void __exit prsyms2_exit(void)
{
vfree(symbols.buf);
misc_deregister(&prsyms2_misc);
}
module_init(prsyms2_init);
module_exit(prsyms2_exit);
MODULE_AUTHOR("Sam Protsenko");
MODULE_DESCRIPTION("Module for printing all kernel symbols");
MODULE_LICENSE("GPL");
And here is how to use it:
$ sudo insmod prsyms2.ko
$ sudo cat /dev/prsyms2 >symbols.txt
$ wc -l symbols.txt
$ sudo rmmod prsyms2
File symbols.txt will contain all kernel symbols (both built-in and from loaded modules) in next format:
ffffffffc01dc0d0 psmouse_poll+0x0/0x30 [psmouse]
It seems that I can use kallsyms_lookup_name() to find the address of the function, can then use a function pointer to call the function?
Yes, you can. If I recall correctly, it's called reflection. Below is an example how to do so:
typedef int (*custom_print)(const char *fmt, ...);
custom_print my_print;
my_print = (custom_print)kallsyms_lookup_name("printk");
if (my_print == 0) {
pr_err("Unable to find printk\n");
return -EINVAL;
}
my_print(KERN_INFO "### printk found!\n");

copy_from_user is fetching unexpected data

I want to use the write sycall for copying a struct
from userspace to kernel.
In both user and kernel space, the struct is defined as
struct packet{
unsigned char packet[256];
int length;
}__attribute__ ((packed));
User space uses a local variable of type struct packet and passes it to the write syscall.
struct packet p;
/* ... (fill in data) */
printf("packet.length: %d\n",packet.length); /* looks correct */
result = write(uartFD, &p, sizeof(struct packet));
The kernel side looks like this, checking for correct length is done, just removed from example.
/* write syscall */
ssize_t packet_write(
struct file *file_ptr,
const char __user *user_buffer,
size_t count, loff_t *position)
{
struct packet p;
int retval;
if (copy_from_user((void*)&p, user_buffer, sizeof(struct packet))){
retval = -EACCES;
goto err;
}
/* looks wrong - different numbers like 96373062 or 96373958 */
printk("packet length: %d\n",p.length);
The opposite direction using read sycall is working as expected:
/* read syscall */
struct packet p;
/* ... (fill in data) */
copy_to_user(user_buffer, (void*)&p, sizeof(struct packet));
/* userspace */
read(uartFD, (void*)&packet, sizeof(struct packet));
What am I doing wrong with write syscall?
(Posted on behalf of the OP).
This is solved - it was my own silly. Both copying an integer and an unsigned char buffer separately was working, so it had to be something about the struct.
One site was packed, the other was not... reusing old code...

Retrieving inode struct given the path to a file

I've seen lots of questions about getting a file's path from it's inode, but almost none about doing the reverse. My kernel module needs to do this to get further information about the subjects of requests passed to open(), such as its file flags or whether or not it's a device. From what I was able to scrounge together from mailing lists, manual pages, and the Linux source code, I came up with this small function:
struct inode* get_inode_from_pathname(const char *pathname) {
struct path path;
kern_path(pathname, LOOKUP_FOLLOW, &path);
return path.dentry->d_inode;
}
Trying to use it in my replacement system call makes kernel messages get printed to the console, though:
struct inode *current_inode;
...
asmlinkage int custom_open(char const *__user file_name, int flags, int mode) {
current_inode = get_inode_from_pathname(file_name);
printk(KERN_INFO "intercepted: open(\"%s\", %X, %X)\n", file_name, flags, mode);
printk(KERN_INFO "i_mode of %s:%hu\n", file_name, current_inode->i_mode);
return real_open(file_name, flags, mode);
}
Is there a better way to do this? I'm almost positive my way is wrong.
You can use the kern_path kernel API to get the inode information from the path string. This function in turn calls the do_path_lookup() function which performs the path look up operation. You can verify the results of the kern_path function by printing the inode number (i_ino field of the inode structure) of the inode you get from your get_inode_from_pathname function and matching it with the inode number from an ls command (ls -i <path of the file>)
I made the following kernel module and it's not crashing the kernel. I am using 2.6.39 kernel.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/namei.h>
#include <linux/fs.h>
#include <linux/namei.h>
char *path_name = "/home/shubham/test_prgs/temp.c";
int myinit(void)
{
struct inode *inode;
struct path path;
kern_path(path_name, LOOKUP_FOLLOW, &path);
inode = path.dentry->d_inode;
printk("Path name : %s, inode :%lu\n", path_name, inode->i_ino);
return 0;
}
void myexit(void)
{
return;
}
module_init(myinit);
module_exit(myexit);
//MODULE_AUTHOR("Shubham");
//MODULE_DESCRIPTION("Module to get inode from path");
MODULE_LICENSE("GPL");
MODULE_LICENSE("GPL v2");
Can you send the crash stack trace?
I guess the author has already fixed his problem, but this question was the first link in google's search results, so I'll explain it further.
The problem with the code from the question was using __user pointer.
When you hook a function that deals with __user pointers, first thing you have to make is to copy content to your own kernel buffer where you will deal with it or make sure that pointer won't become invalid while you are dealing with it.
To copy it to your buffer you could use copy_from_user function
char path[MAX_PATH] = {0};
if (copy_from_user(path, user_path, strlen_user(user_path))
{
//error
}
//success
If you hook sys_open, you can use getname/putname functions, as it is done in do_sys_open function:
1010 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
1011 {
1012 struct open_flags op;
1013 int fd = build_open_flags(flags, mode, &op);
1014 struct filename *tmp;
1015
1016 if (fd)
1017 return fd;
1018
1019 tmp = getname(filename);
1020 if (IS_ERR(tmp))
1021 return PTR_ERR(tmp);
1022
1023 fd = get_unused_fd_flags(flags);
1024 if (fd >= 0) {
1025 struct file *f = do_filp_open(dfd, tmp, &op);
1026 if (IS_ERR(f)) {
1027 put_unused_fd(fd);
1028 fd = PTR_ERR(f);
1029 } else {
1030 fsnotify_open(f);
1031 fd_install(fd, f);
1032 }
1033 }
1034 putname(tmp);
1035 return fd;
1036 }
ps: code from S_S's answer won't crash because in fact it allocated buffer for path inside kernel, so it couldn't become invalid while the module is working with it.

Regarding how the parameters to the read function is passed in simple char driver

I am newbei to driver programming i am started writing the simple char driver . Then i created special file for my char driver mknod /dev/simple-driver c 250 0 .when it type cat /dev/simple-driver. it shows the string "Hello world from Kernel mode!". i know that function
static const char g_s_Hello_World_string[] = "Hello world tamil_vanan!\n\0";
static const ssize_t g_s_Hello_World_size = sizeof(g_s_Hello_World_string);
static ssize_t device_file_read(
struct file *file_ptr
, char __user *user_buffer
, size_t count
, loff_t *possition)
{
printk( KERN_NOTICE "Simple-driver: Device file is read at offset =
%i, read bytes count = %u", (int)*possition , (unsigned int)count );
if( *possition >= g_s_Hello_World_size )
return 0;
if( *possition + count > g_s_Hello_World_size )
count = g_s_Hello_World_size - *possition;
if( copy_to_user(user_buffer, g_s_Hello_World_string + *possition, count) != 0 )
return -EFAULT;
*possition += count;
return count;
}
is get called . This is mapped to (*read) in file_opreation structure of my driver .My question is how this function is get called , how the parameters like struct file,char,count, offset are passed bcoz is i simply typed cat command ..Please elabroate how this happening
In Linux all are considered as files. The type of file, whether it is a driver file or normal file depends upon the mount point where it is mounted.
For Eg: If we consider your case : cat /dev/simple-driver traverses back to the mount point of device files.
From the device file name simple-driver it retrieves Major and Minor number.
From those number(especially from minor number) it associates the driver file for your character driver.
From the driver it uses struct file ops structure to find the read function, which is nothing but your read function:
static ssize_t device_file_read(struct file *file_ptr, char __user *user_buffer, size_t count, loff_t *possition)
User_buffer will always take sizeof(size_t count).It is better to keep a check of buffer(In some cases it throws warning)
String is copied to User_buffer(copy_to_user is used to check kernel flags during copy operation).
postion is 0 for first copy and it increments in the order of count:position+=count.
Once read function returns the buffer to cat. and cat flushes the buffer contents on std_out which is nothing but your console.
cat will use some posix version of read call from glibc. Glibc will put the arguments on the stack or in registers (this depends on your hardware architecture) and will switch to kernel mode. In the kernel the values will be copied to the kernel stack. And in the end your read function will be called.

Resources