Couldn't link kernel provided function to my user defined kernel module - linux-kernel

I'm writing a kernel module that works with page table entries, in order to implement a new page replacement policy. I want to get a list of page table entries owned by a specific process. So far I have retrieved a linked list of vma structures which basically carry information about pages owned by a process. There is a function called walk_page_vma which takes the pointer to the vma and gives back the page tables, it has been defined in mm/pagewalk.c also declared in linux/mm.h. Therefore, I have included linux/mm.h in my code.
process_pid = -1;
struct task_struct* task_list;
size_t process_counter = 0;
for_each_process(task_list) {
if (strcmp(task_list->comm, process_name) == 0){
process_pid = task_list->pid;
pr_info("found %s pid = %d \n", process_name, process_pid);
struct vm_area_struct *mmap = task_list->mm->mmap;
while(mmap != NULL){
struct mm_walk walk;
int res = walk_page_vma(mmap, &walk);
if (res == 0) {
printk("walked successfully\n");
} else {
printk("failed to walk!\n");
}
mmap = mmap->vm_next;
}
// break;
}
// pr_info("== %s [%d]\n", task_list->comm, task_list->pid);
++process_counter;
}
if (process_pid){
// pr_info("found %s pid = %d \n", process_name, process_pid);
} else {
pr_info("couldn't find %s pid. exiting! \n", process_name);
}
// printk(KERN_INFO "== Number of process: %zu\n", process_counter);
At building time, it throws a warning saying
WARNING: "walk_page_vma" [/home/myusername/Projects/ProjectModule/my_module.ko] undefined!
and is unable to load it when calling insmode.

walk_page_vma is not exported and so it can't be used in a dynamically loadable module.
You must export it by patching the kernel (be aware that such a change will be refused by upstream devs) or compile your code as "built-in".

Related

Copy structure with included user pointers from user space to kernel space (copy_from_user)

I want to transfer a transaction structure, which contains an user space pointer to an array, to kernel by using copy_from_user.
The goal is, to get access to the array elements in kernel space.
User space side:
I allocate an array of _sg_param structures in user space. Now i put the address of this array in a transaction structure (line (*)).
Then i transfer the transaction structure to the kernel via ioctl().
Kernel space side:
On executing this ioctl, the complete transaction structure is copied to kernel space (line ()). Now kernel space is allocated for holding the array (line (*)). Then i try to copy the array from user space to the new allocated kernel space (line (****)), and here start my problems:
The kernel is corrupted during execution of this copy. dmesg shows following output:
[ 54.443106] Unhandled fault: page domain fault (0x01b) at 0xb6f09738
[ 54.448067] pgd = ee5ec000
[ 54.449465] [b6f09738] *pgd=2e9d7831, *pte=2d56875f, *ppte=2d568c7f
[ 54.454411] Internal error: : 1b [#1] PREEMPT SMP ARM
Any ideas ???
Following an simplified extract of my code:
// structure declaration
typedef struct _sg_param {
void *seg_buf;
int seg_len;
int received;
} sg_param_t;
struct transaction {
...
int num_of_elements;
sg_param_t *pbuf_list; // Array of sg_param structure
...
} trans;
// user space side:
if ((pParam = (sg_param_t *) malloc(NR_OF_STRUCTS * sizeof(sg_param_t))) == NULL) {
return -ENOMEM;
}
else {
trans.num_of_elements = NR_OF_STRUCTS;
trans.pbuf_list = pParam; // (*)
}
rc = ioctl(dev->fd, MY_CMD, &trans);
if (rc < 0) {
return rc;
}
// kernel space side
static long ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
arg_ptr = (void __user *)arg;
// Perform the specified command
switch (cmd) {
case MY_CMD:
{
struct transaction *__user user_trans;
user_trans = (struct transaction *__user)arg_ptr;
if (copy_from_user(&trans, arg_ptr, sizeof(trans)) != 0) { // (**)
k_err("Unable to copy transfer info from userspace for "
"AXIDMA_DMA_START_DMA.\n");
return -EFAULT;
}
int size = trans.num_of_elements * sizeof(sg_param_t);
if (trans.pbuf_list != NULL) {
// Allocate kernel memory for buf_list
trans.pbuf_list = (sg_param_t *) kmalloc(size, GFP_KERNEL); // (***)
if (trans.pbuf_list == NULL) {
k_err("Unable to allocate array for buffers.\n");
return -ENOMEM;
}
// Now copy pbuf_list from user space to kernel space
if (copy_from_user(trans.pbuf_list, user_trans->pbuf_list, size) != 0) { // (****)
kfree(trans.pbuf_list);
return -EFAULT;
}
}
break;
}
}
You're directly accessing userspace data (user_trans->pbuf_list). You should use the one that you've already copied to kernel (trans.pbuf_list).
Code for this would normally be something like:
sg_param_t *local_copy = kmalloc(size, ...);
// TODO check it succeeded
if (copy_from_user(local_copy, trans.pbuf_list, size) ...)
trans.pbuf_list = local_copy;
// use trans.pbuf_list
Note that you also need to check trans.num_of_elements to be valid (0 would make kmalloc return ZERO_SIZE_PTR, and too big value might be a way for DoS).

Why pam_loginuid module fails on writing to /proc/self/loginuid with -EPERM?

I found that application using pam library to authenticate fails on error:
Error writing /proc/self/loginuid: Operation not permitted
By strace i found that fail is on write to the /proc/self/loginuid file.
Further inspection and adding some debug code to kernel (code below):
static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
uid_t loginuid;
kuid_t kloginuid;
int rv;
printk(KERN_DEBUG "proc_loginuid_write\n");
printk(KERN_DEBUG "a+++ %s\n", current->comm);
printk(KERN_DEBUG "b+++ %s\n", pid_task(proc_pid(inode), PIDTYPE_PID)->comm);
printk(KERN_DEBUG "+++2++ pid = %d\n", current->pid);
printk(KERN_DEBUG "+++3++ pid = %d\n", pid_task(proc_pid(inode), PIDTYPE_PID)->pid);
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
rcu_read_unlock();
printk(KERN_ERR "proc_loginuid_write failed by permission!\n");
return -EPERM;
}
rcu_read_unlock();
if (*ppos != 0) {
/* No partial writes. */
return -EINVAL;
}
rv = kstrtou32_from_user(buf, count, 10, &loginuid);
if (rv < 0)
return rv;
/* is userspace tring to explicitly UNSET the loginuid? */
if (loginuid == AUDIT_UID_UNSET) {
kloginuid = INVALID_UID;
} else {
kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
if (!uid_valid(kloginuid))
return -EINVAL;
}
rv = audit_set_loginuid(kloginuid);
if (rv < 0)
return rv;
return count;
}
showed in dmesg that:
[ 30.672242] proc_loginuid_write
[ 30.672249] a+++ testapp
[ 30.672251] b+++ testapp
[ 30.672254] +++2++ pid = 2920
[ 30.672257] +++3++ pid = 2451
[ 30.672259] proc_loginuid_write failed by permission!
Name testapp is intentionally changed name. So it looks like the file /proc/self/loginuid is file created by parent, and it is read by child thread.
I tested same code on kernel 3.14 and 4.9 and on 3.14 kernel it works and on kernel 4.9 it doesn't works. Why?
I found the solution for the problem.
Old kernel 3.14 has turned off option CONFIG_AUDITSYSCALL in config. So on there was no file /proc/self/loginuid and pam module simply don't cares when there is no such file.
On newer kernel 4.9 option is automatically selected by CONFIG_AUDIT=y.
So simplest solution is to turn off CONFIG_AUDIT option, but why in process of kernel evolution CONFIG_AUDITSYSCALL became a non controllable option is matter for other question.
Thanks!

How do cdev and its associated file operation work?

Actually working on a PCI driver.
I have two PCIe cards with same device ID and vendor ID.
So to make a difference, I assign these two cards with two different MINOR numbers.
//request for device numbers
error = alloc_chrdev_region(&devt, 0, cards_found, DEVICE_NAME);
if (error == 0)
{
major = MAJOR(devt);
printk(KERN_INFO "(drv_init): MAJOR number is %d\n", major);
printk(KERN_INFO "(drv_init): MINOR number range from 0 to %d\n", cards_found-1);
cdevs = cdev_alloc();
cdevs->owner = THIS_MODULE;
cdev_init(cdevs, fops);
for(i=0;i<cards_found,i++)
{
devt = MKDEV(major, i);
error = cdev_add(cdevs, devt, 1);
if (error == 0)
{
printk(KERN_INFO "(drv_init): cdev_add success for minor number: %d", i);
}
else
{
printk(KERN_ALERT "(drv_init): cdev_add failed for minor number: %d,error code: %d, exit driver\n", i, error);
devt = MKDEV(major, 0);
unregister_chrdev_region(devt, cards_found);
pci_unregister_driver(&my_pci_driver);
return(error);
}
}
} `
I'm doing this because all docs I found on internet suggests that one MINOR number for one device.
But I can't understand how could OS know which card is targeted when I do a fops_open, since the fops is bundled to all devices.
Need your help, Thx everyone.
PS: fops = file operations
Signature of .open operation is
int open(struct inode* inode, struct file* file)
Minor number of device opened can be obtained via
iminor(inode)
Other file operations also may obtain device number using file->f_inode as inode.
Alternatively, .open may store some device-specific data in file->f_private, and other operations may access them that way.

How to remove dynamically assigned major number from /proc/devices?

In my kernel driver project I register with a dynamic major number by calling
register_chrdev(0, "xxxxx", &xxxxx);
and unregistered my module with
unregister_chrdev(0. "xxxxx");
When I load my driver with insmod, I received dynamic major number, for example 243, and, after rmmod, success removing module.
But, after removing the module /proc/devices still shows the major number (243).
How do I get removing my driver to also remove its major number from the list in /proc/devices?
When you call register_chrdev() with 0 as the first argument to request the assignment of a dynamic major number, the return value will be the assigned major number, which you should save.
Then when you call unregister_chrdev() you should pass the saved major number as an argument, rather than the 0 you were. Also make sure that the device name argument matches. And be aware that this function returns a result, which you can check for status/failure - in the latter case you definitely want to printk() a message so that you know that your code has not accomplished its goal.
You can see a complete example at http://www.tldp.org/LDP/lkmpg/2.6/html/x569.html with the key parts being:
static int Major; /* Major number assigned to our device driver */
int init_module(void)
{
Major = register_chrdev(0, DEVICE_NAME, &fops);
if (Major < 0) {
printk(KERN_ALERT "Registering char device failed with %d\n", Major);
return Major;
}
return SUCCESS;
}
void cleanup_module(void)
{
int ret = unregister_chrdev(Major, DEVICE_NAME);
if (ret < 0)
printk(KERN_ALERT "Error in unregister_chrdev: %d\n", ret);
}
Also be aware that this method of registering a device is considered outdated - you might want to research the newer method.

How do I make sure insmod fails on error?

I developed a peripheral driver for Linux. The .probe function performs the usual error checks like memory allocation failures, and also attempts to communicate with the hardware and in any type of error, deallocates any memory and returns an error code like -ENOMEM or -EIO.
The problem is, although the module probe function return -EIO when the hardware is unreachable, I still see the module is listed in lsmod output. Is it possible to make sure an insmod completely fails when there is a problem during initialization?
Here is my current probe function. All device specific functions return an appropriate error code on failure, usually -EIO.
static int mlx90399_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
int err;
struct mlx90399_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) {
dev_err(&client->dev, "Memory allocation fails\n");
err = -ENOMEM;
goto exit;
}
i2c_set_clientdata(client, data);
data->client = client;
mutex_init(&data->lock);
data->mode = MLX90399_MODE_OFF;
err = mlx90399_reset(client);
if (err < 0)
goto exit;
msleep(1); /* nominal 0.6ms from reset */
err = mlx90399_write_register_defaults(client);
if (err < 0)
goto exit;
err = mlx90399_update_scale(client);
if (err < 0)
goto exit;
data->indio_dev = iio_allocate_device(0);
if (data->indio_dev == NULL) {
err = -ENOMEM;
goto exit;
}
data->indio_dev->dev.parent = &client->dev;
data->indio_dev->info = &mlx90399_info;
data->indio_dev->dev_data = (void *)(data);
data->indio_dev->modes = INDIO_DIRECT_MODE;
mlx90399_setup_irq(client);
err = iio_device_register(data->indio_dev);
if(err < 0)
goto exit;
return 0;
exit:
kfree(data);
return err;
}
See the comment in __driver_attach():
/*
* Lock device and try to bind to it. We drop the error
* here and always return 0, because we need to keep trying
* to bind to devices and some drivers will return an error
* simply if it didn't support the device.
*
* driver_probe_device() will spit a warning if there
* is an error.
*/
To make the module initialization fail, unregister the driver and return an error code from your init function.
Note that there isn't necessarily a 1:1 relationship between a module and a device. One module may be used for several devices. With the use of device trees, for example, a device tree may declare several on-board UARTs, all using one serial device kernel module. The module's probe function would be called several times, once for each device. Just because one probe call fails, that doesn't necessarily mean the module should be unloaded.

Resources