kernel tried to execute NX-protected page - exploit attempt? - linux-kernel

Look at this very basic linux module:
static ssize_t checksec_write(struct file *f, const char __user *buf,size_t len, loff_t *off)
{
unsigned long addr_fonction_userspace;
memcpy(&addr_fonction_userspace,buf,sizeof(unsigned long));
void (*functionPtr)(void);
functionPtr = addr_fonction_userspace;
(*functionPtr)();
return len;
}
This module works this /dev/checksec char device.
and look at this basic c userland program:
void fonction_userland();
void fonction_userland()
{
asm ("nop");
}
void main()
{
FILE *f=fopen("/dev/checksec","w");
unsigned long addr_fonction_userland = &fonction_userland;
fwrite(&addr_fonction_userland,sizeof(unsigned long),1,f);
}
When i run the program, i get this error in dmesg:
kernel tried to execute NX-protected page - exploit attempt?
I understand it is an exploit attempt, because this is what i am trying to learn. But i do not understand why the page is NX-protected. The function is not in a non execute page, it is a userland function.
SMEP and SMAP are not enabled (bits 0 in CR4 regsiter)
Thanks a lot

Related

How to dump/list all kernel symbols with addresses from Linux kernel module?

In a kernel module, how to list all the kernel symbols with their addresses?
The kernel should not be re-compiled.
I know "cat /proc/kallsyms" in an interface, but how to get them directly from kernel data structures, using functions like kallsyms_lookup_name.
Example
Working module code:
#include <linux/module.h>
#include <linux/kallsyms.h>
static int prsyms_print_symbol(void *data, const char *namebuf,
struct module *module, unsigned long address)
{
pr_info("### %lx\t%s\n", address, namebuf);
return 0;
}
static int __init prsyms_init(void)
{
kallsyms_on_each_symbol(prsyms_print_symbol, NULL);
return 0;
}
static void __exit prsyms_exit(void)
{
}
module_init(prsyms_init);
module_exit(prsyms_exit);
MODULE_AUTHOR("Sam Protsenko");
MODULE_DESCRIPTION("Module for printing all kernel symbols");
MODULE_LICENSE("GPL");
Explanation
kernel/kallsyms.c implements /proc/kallsyms. Some of its functions are available for external usage. They are exported via EXPORT_SYMBOL_GPL() macro. Yes, your module should have GPL license to use it. Those functions are:
kallsyms_lookup_name()
kallsyms_on_each_symbol()
sprint_symbol()
sprint_symbol_no_offset()
To use those functions, include <linux/kallsyms.h> in your module. It should be mentioned that CONFIG_KALLSYMS must be enabled (=y) in your kernel configuration.
To print all the symbols you obviously have to use kallsyms_on_each_symbol() function. The documentation says next about it:
/* Call a function on each kallsyms symbol in the core kernel */
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
unsigned long), void *data);
where fn is your callback function that should be called for each symbol found, and data is a pointer to some private data of yours (will be passed as first parameter to your callback function).
Callback function must have next signature:
int fn(void *data, const char *namebuf, struct module *module,
unsigned long address);
This function will be called for each kernel symbol with next parameters:
data: will contain pointer to your private data you passed as last argument to kallsyms_on_each_symbol()
namebuf: will contain name of current kernel symbol
module: will always be NULL, just ignore that
address: will contain address of current kernel symbol
Return value should always be 0 (on non-zero return value the iteration through symbols will be interrupted).
Supplemental
Answering the questions in your comment.
Also, is there a way to output the size of each function?
Yes, you can use sprint_symbol() function I mentioned above to do that. It will print symbol information in next format:
symbol_name+offset/size [module_name]
Example:
psmouse_poll+0x0/0x30 [psmouse]
Module name part can be omitted if symbol is built-in.
I tried the module and see the result with "dmesg". But a lot of symbols are missing such as "futex_requeue". The output symbol number is about 10K, while it is 100K when I use "nm vmlinux".
This is most likely because your printk buffer size is insufficient to store all the output of module above.
Let's improve above module a bit, so it provides symbols information via miscdevice. Also let's add function size to the output, as requested. The code as follows:
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/sizes.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#define DEVICE_NAME "prsyms2"
/* 16 MiB is sufficient to store information about approx. 200K symbols */
#define SYMBOLS_BUF_SIZE SZ_16M
struct symbols {
char *buf;
size_t pos;
};
static struct symbols symbols;
/* ---- misc char device definitions ---- */
static ssize_t prsyms2_read(struct file *file, char __user *buf, size_t count,
loff_t *pos)
{
return simple_read_from_buffer(buf, count, pos, symbols.buf,
symbols.pos);
}
static const struct file_operations prsyms2_fops = {
.owner = THIS_MODULE,
.read = prsyms2_read,
};
static struct miscdevice prsyms2_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = DEVICE_NAME,
.fops = &prsyms2_fops,
};
/* ---- module init/exit definitions ---- */
static int prsyms2_store_symbol(void *data, const char *namebuf,
struct module *module, unsigned long address)
{
struct symbols *s = data;
int count;
/* Append address of current symbol */
count = sprintf(s->buf + s->pos, "%lx\t", address);
s->pos += count;
/* Append name, offset, size and module name of current symbol */
count = sprint_symbol(s->buf + s->pos, address);
s->pos += count;
s->buf[s->pos++] = '\n';
if (s->pos >= SYMBOLS_BUF_SIZE)
return -ENOMEM;
return 0;
}
static int __init prsyms2_init(void)
{
int ret;
ret = misc_register(&prsyms2_misc);
if (ret)
return ret;
symbols.pos = 0;
symbols.buf = vmalloc(SYMBOLS_BUF_SIZE);
if (symbols.buf == NULL) {
ret = -ENOMEM;
goto err1;
}
dev_info(prsyms2_misc.this_device, "Populating symbols buffer...\n");
ret = kallsyms_on_each_symbol(prsyms2_store_symbol, &symbols);
if (ret != 0) {
ret = -EINVAL;
goto err2;
}
symbols.buf[symbols.pos] = '\0';
dev_info(prsyms2_misc.this_device, "Symbols buffer is ready!\n");
return 0;
err2:
vfree(symbols.buf);
err1:
misc_deregister(&prsyms2_misc);
return ret;
}
static void __exit prsyms2_exit(void)
{
vfree(symbols.buf);
misc_deregister(&prsyms2_misc);
}
module_init(prsyms2_init);
module_exit(prsyms2_exit);
MODULE_AUTHOR("Sam Protsenko");
MODULE_DESCRIPTION("Module for printing all kernel symbols");
MODULE_LICENSE("GPL");
And here is how to use it:
$ sudo insmod prsyms2.ko
$ sudo cat /dev/prsyms2 >symbols.txt
$ wc -l symbols.txt
$ sudo rmmod prsyms2
File symbols.txt will contain all kernel symbols (both built-in and from loaded modules) in next format:
ffffffffc01dc0d0 psmouse_poll+0x0/0x30 [psmouse]
It seems that I can use kallsyms_lookup_name() to find the address of the function, can then use a function pointer to call the function?
Yes, you can. If I recall correctly, it's called reflection. Below is an example how to do so:
typedef int (*custom_print)(const char *fmt, ...);
custom_print my_print;
my_print = (custom_print)kallsyms_lookup_name("printk");
if (my_print == 0) {
pr_err("Unable to find printk\n");
return -EINVAL;
}
my_print(KERN_INFO "### printk found!\n");

using system call in Linux kernel file

I am implementing a custom process scheduler in Linux. And I want to use a system call to record my program so that I can debug easily.
The file I write is
source code : linux-x.x.x/kernel/sched_new_scheduler.c
In sched_new_scheduler.c could I use syscall(the id of the system call, parameter); directly? It seems syscall(); is used with #include<sys/syscalls.h> in C program, but the ".h" can not be found in the kernel/.
I just want to know how my program executes by recording something, so could I directly write printk("something"); in sched_new_scheduler.c ? Or try a correct way to use system call?
System call look like wrapper around other kernel function one of ways how to use syscall inside kernel is find sub function for exact system call. For example:
int open(const char *pathname, int flags, mode_t mode); -> filp_open
////////////////////////////////////////////////////////////////////////////////////////////////
struct file* file_open(const char* path, int flags, int rights)
{
struct file* filp = NULL;
mm_segment_t oldfs;
int err = 0;
oldfs = get_fs();
set_fs(get_ds());
filp = filp_open(path, flags, rights);
set_fs(oldfs);
if(IS_ERR(filp)) {
err = PTR_ERR(filp);
return NULL;
}
return filp;
}
ssize_t write(int fd, const void *buf, size_t count); -> vfs_write
////////////////////////////////////////////////////////////////////////////////////////////////
int file_write(struct file* file, unsigned long long offset, unsigned char* data, unsigned int size)
{
mm_segment_t oldfs;
int ret;
oldfs = get_fs();
set_fs(get_ds());
ret = vfs_write(file, data, size, &offset);
set_fs(oldfs);
return ret;
}
A system call is supposed to be used by an application program to avail a service from kernel. You can implement a system call in your kernel module, but that should be called from an application program. If you just want to expose the statistics of your new scheduler to the userspace for debugging, you can use interfaces like proc, sys, debugfs etc. And that would be much more easier than implementing a system call and writing a userspace application to use it.

Is a spinlock necessary in this Linux device driver code?

Is the following Linux device driver code safe, or do I need to protect access to interrupt_flag with a spinlock?
static DECLARE_WAIT_QUEUE_HEAD(wq_head);
static int interrupt_flag = 0;
static ssize_t my_write(struct file* filp, const char* __user buffer, size_t length, loff_t* offset)
{
interrupt_flag = 0;
wait_event_interruptible(wq_head, interrupt_flag != 0);
}
static irqreturn_t handler(int irq, void* dev_id)
{
interrupt_flag = 1;
wake_up_interruptible(&wq_head);
return IRQ_HANDLED;
}
Basically, I kick off some event in my_write() and wait for the interrupt to indicate that it completes.
If so, which form of spin_lock() do I need to use? I thought spin_lock_irq() was appropriate, but when I tried that I got a warning about the IRQ handler enabling interrupts.
Doesn't wait_event_interruptible evaluate the interrupt_flag != 0 condition? That would imply that the lock should be held while it reads the flag, right?
No lock is needed in the example given. Memory barriers are needed after the store of the flag, and before the load -- to ensure visibility to the flag -- but the wait_event_* and wake_up_* functions provide those. See the section entitled "Sleep and wake-up functions" in this document: https://www.kernel.org/doc/Documentation/memory-barriers.txt
Before adding a lock, consider what is being protected. Generally locks are needed if you're setting two or more separate pieces of data and you need to ensure that another cpu/core doesn't see an incomplete intermediate state (after you started but before you finished). In this case, there's no point in protecting the storing / loading of the flag value because stores and loads of a properly aligned integer are always atomic.
So, depending on what else your driver is doing, it's quite possible you do need a lock, but it isn't needed for the snippet you've provided.
Yes you need a lock. With the given example (that uses int and no specific arch is mentioned), the process context may be interrupted while accessing the interrupt_flag. Upon return from the IRQ, it may continue and interrupt_flag may be left in inconsistent state.
Try this:
static DECLARE_WAIT_QUEUE_HEAD(wq_head);
static int interrupt_flag = 0;
DEFINE_SPINLOCK(lock);
static ssize_t my_write(struct file* filp, const char* __user buffer, size_t length, loff_t* offset)
{
/* spin_lock_irq() or spin_lock_irqsave() is OK here */
spin_lock_irq(&lock);
interrupt_flag = 0;
spin_unlock_irq(&lock);
wait_event_interruptible(wq_head, interrupt_flag != 0);
}
static irqreturn_t handler(int irq, void* dev_id)
{
unsigned long flags;
spin_lock_irqsave(&lock, flags);
interrupt_flag = 1;
spin_unlock_irqrestore(&lock, flags);
wake_up_interruptible(&wq_head);
return IRQ_HANDLED;
}
IMHO, the code has to be written without making any arch or compiler-related assumptions (like the 'properly aligned integer' in Gil Hamilton answer).
Now if we can change the code and use atomic_t instead of the int flag, then no locks should be needed.

CUDA constant memory issue: invalid device symbol with cudaGetSymbolAddress

I am trying to set constant values on my GPU's constant memory before launching a kernel which needs these values.
My code (simplified):
__constant__ size_t con_N;
int main()
{
size_t N;
size_t* dev_N = NULL;
cudaError_t cudaStatus;
//[...]
cudaStatus = cudaGetSymbolAddress((void **)&dev_N, &con_N);
if (cudaStatus != cudaSuccess) {
cout<<"cudaGetSymbolAddress (dev_N) failed: "<<cudaGetErrorString(cudaStatus)<<endl;
}
I planned to cudaMemcpy my N to dev_N afterwards.
However, all I get at this point in the code is:
cudaGetSymbolAddress (dev_N) failed: invalid device symbol
I'm working with CUDA 6.5 so it's not a quoted symbol issue, as it is in most of the Q&A I've been checking so far.
I tried to replace con_N with con_N[1] (and remove the & before con_N in cudaGetSymbolAddress parameters): same result.
As the prototype of this function is cudaGetSymbolAddress(void **devPtr , const void* symbol ), I guessed it wanted to be given my symbol's address. However, I tried with cudaStatus = cudaGetSymbolAddress((void **)&dev_N, (const void*) con_N); and I got the same message.
I'm also getting the very same error message when I remove cudaGetSymbolAddress((void **)&dev_N, &con_N) and go directly with cudaMemcpyToSymbol(&con_N, &N, sizeof(size_t)) instead.
I'm afraid I missed something essential. Any help will be greatly appreciated.
The correct usage of cudaGetSymbolAddress is
cudaGetSymbolAddress((void **)&dev_N, con_N)
I'm showing this with the simple example below.
As the documentation explains, the symbol should physically reside on the device. Accordingly, using &con_N in the API call appears to be meaningless, since, being cudaGetSymbolAddress a host API, accessing the address of something residing on the device directly from host should not be possible. I'm not sure if the prototype appearing in the CUDA Runtime API document should better read as `
template<class T>
cudaError_t cudaGetSymbolAddress (void **devPtr, const T symbol)
with device symbol reference instead of device symbol address.
#include <stdio.h>
__constant__ int const_symbol;
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/***************/
/* TEST KERNEL */
/***************/
__global__ void kernel() {
printf("Address of symbol from device = %p\n", &const_symbol);
}
/********/
/* MAIN */
/********/
int main()
{
const int N = 16;
int *pointer = NULL;
gpuErrchk(cudaGetSymbolAddress((void**)&pointer, const_symbol));
kernel<<<1,1>>>();
printf("Address of symbol from host = %p\n", pointer);
return 0;
}
In my opinion, A line of your code should be fixed like below.
cudaStatus = cudaGetSymbolAddress((void **)&dev_N, con_N);
Hope this helps you.

Using an old device file for char device driver

I have two questions as I'm trying device drivers as a beginner.
I created one module , loaded it, it dynamically took major number 251 say. Number of minor devices is kept 1 only i.e minor number 0. For testing , I tried echo and cat on the device file (created using mknod) and it works as expected. Now if I unload the module but don't remove /dev entry and again load the module with same major number and try writing/reading to same device file which was used previously, kernel crashes. I know we shouldn't do this but just want to understand what happens in this scenario which causes this crash. I think something that VFS does.
When I do cat on device file, the read keeps on happening indefinitely. why? To stop that needed to use offset manipulation. This looks to be because buffer length is coming as 32768 as default to read?
EDIT: further in this I added one ioctl function as below, then I'm getting error regarding the storage class of init and cleanup function, which work well if no ioctl is defined. Not getting the link between ioctl and the init/cleanup functions' storage class. Updated code is posted. Errors are below:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:95:12: error: invalid storage class for function ‘flow_init’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c: In function ‘flow_init’:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:98:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c: In function ‘flow_ioctl’:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:112:13: error: invalid storage class for function ‘flow_terminate’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:119:1: error: invalid storage class for function ‘__inittest’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:119:1: warning: ‘alias’ attribute ignored [-Wattributes]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: error: invalid storage class for function ‘__exittest’
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: warning: ‘alias’ attribute ignored [-Wattributes]
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:120:1: error: expected declaration or statement at end of input
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c: At top level:
/home/diwakar/Documents/my_modules/first_test_module/flowTest.c:73:13: warning: ‘flow_ioctl’ defined but not used [-Wunused-function]
Below is the code:
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/cdev.h>
#include <linux/kdev_t.h>
#include <linux/errno.h>
#include <linux/ioctl.h>
#define SUCCESS 0
#define BUF_LEN 80
#define FLOWTEST_MAGIC 'f'
#define FLOW_QUERY _IOR(FLOWTEST_MAGIC,1,int)
MODULE_LICENSE("GPL");
int minor_num=0,i;
int num_devices=1;
int fopen=0,counter=0,ioctl_test;
static struct cdev ms_flow_cd;
static char c;
///// Open , close and rest of the things
static int flow_open(struct inode *f_inode, struct file *f_file)
{
printk(KERN_ALERT "flowtest device: OPEN\n");
return SUCCESS;
}
static ssize_t flow_read(struct file *f_file, char __user *buf, size_t
len, loff_t *off)
{
printk(KERN_INFO "flowtest Driver: READ()\nlength len=%d, Offset = %d\n",len,*off);
/* Check to avoid the infinitely printing on screen. Return 1 on first read, and 0 on subsequent read */
if(*off==1)
return 0;
printk(KERN_INFO "Copying...\n");
copy_to_user(buf,&c,1);
printk(KERN_INFO "Copied : %s\n",buf);
*off = *off+1;
return 1; // Return 1 on first read
}
static ssize_t flow_write(struct file *f_file, const char __user *buf,
size_t len, loff_t *off)
{
printk(KERN_INFO "flowtest Driver: WRITE()\n");
if (copy_from_user(&c,buf+len-2,1) != 0)
return -EFAULT;
else
{
printk(KERN_INFO "Length len = %d\n\nLast character written is - %c\n",len,*(buf+len-2));
return len;
}
}
static int flow_close(struct inode *i, struct file *f)
{
printk(KERN_INFO "ms_tty Device: CLOSE()\n");
return 0;
}
///* ioctl commands *///
static long flow_ioctl (struct file *filp,unsigned int cmd, unsigned long arg)
{
switch(cmd) {
case FLOW_QUERY:
ioctl_test=51;
return ioctl_test;
default:
return -ENOTTY;
}
///////////////////File operations structure below/////////////////////////
struct file_operations flow_fops = {
.owner = THIS_MODULE,
.llseek = NULL,
.read = flow_read,
.write = flow_write,
.unlocked_ioctl = flow_ioctl,
.open = flow_open,
.release = flow_close
};
static int flow_init(void)
{
printk(KERN_ALERT "Here with flowTest module ... loading...\n");
int result=0;
dev_t dev=0;
result = alloc_chrdev_region(&dev, minor_num,
num_devices,"mod_flowtest"); // allocate major number dynamically.
i=MAJOR(dev);
printk(KERN_ALERT "Major allocated = %d",i);
cdev_init(&ms_flow_cd,&flow_fops);
cdev_add(&ms_flow_cd,dev,1);
return 0;
}
static void flow_terminate(void)
{
dev_t devno=MKDEV(i,0); // wrap major/minor numbers in a dev_t structure , to pass for deassigning.
printk(KERN_ALERT "Going out... exiting...\n");
unregister_chrdev_region(devno,num_devices); //remove entry from the /proc/devices
}
module_init(flow_init);
module_exit(flow_terminate);
1- You're missing cdev_del() in your cleanup function. Which means the device stays registered, but the functions to handle it are unloaded, thus the crash. Also, cdev_add probably fails on the next load, but you don't know because you're not checking return values.
2- It looks ok... you modify offset, return the correct number of bytes, and then return 0 if offset is 1, which indicates EOF. But you should really check for *off >= 1.
EDIT-
The length passed into your read handler function comes all the way from user-land read(). If the user opens the device file and calls read(fd, buf, 32768);, that just means the user wants to read up to 32768 bytes of data. That length gets passed all the way to your read handler. If you don't have 32768 bytes of data to supply, you supply what you have, and return the length. Now, the user code isn't sure if that's the end of the file or not, so it tries for another 32768 read. You really have no data now, so you return 0, which tells the user code that it has hit EOF, so it stops.
In summary, what you're seeing as some sort of default value at the read handler is just the block size that the utility cat uses to read anything. If you want to see a different number show up at your read function, try using dd instead, since it lets you specify the block size.
dd if=/dev/flowtest of=/dev/null bs=512 count=1
In addition, this should read one block and stop, since you're specifying count=1. If you omit count=1, it will look more like cat, and try to read until EOF.
For 2, make sure you start your module as a char device when using mknod.
mknod /dev/you_device c major_number minor_number

Resources