I develop a kernel module using DMA dma_alloc_coherent() and remap_pfn_range().
Sometimes, when I close the app that opened the character device, I get the following message in dmesg. That leads to a kernel panic few seconds (random) later.
[ 3275.772330] BUG: Bad page map in process gnome-shell pte:b3e05275201 pmd:238adf067
[ 3275.772337] addr:00007f20bce00000 vm_flags:08000070 anon_vma: (null) mapping:ffff969f236dcdd0 index:b8
[ 3275.772375] vma->vm_ops->fault: xfs_filemap_fault+0x0/0x30 [xfs]
[ 3275.772400] vma->vm_file->f_op->mmap: xfs_file_mmap+0x0/0x80 [xfs]
[ 3275.772413] CPU: 5 PID: 4809 Comm: gnome-shell Kdump: loaded Tainted: G OE ------------ 3.10.0-1127.19.1.el7.x86_64 #1
[ 3275.772416] Hardware name: System manufacturer System Product Name/PRIME H370M-PLUS, BIOS 1801 10/17/2019
[ 3275.772417] Call Trace:
[ 3275.772425] [<ffffffffbb97ffa5>] dump_stack+0x19/0x1b
[ 3275.772432] [<ffffffffbb3ee311>] print_bad_pte+0x1f1/0x290
[ 3275.772436] [<ffffffffbb3f0676>] vm_normal_page+0xa6/0xb0
[ 3275.772440] [<ffffffffbb3f0ccb>] unmap_page_range+0x64b/0xc80
[ 3275.772444] [<ffffffffbb3f1381>] unmap_single_vma+0x81/0xf0
[ 3275.772448] [<ffffffffbb3f2db9>] unmap_vmas+0x49/0x90
[ 3275.772454] [<ffffffffbb3fcdbc>] exit_mmap+0xac/0x1a0
[ 3275.772458] [<ffffffffbb454db5>] ? flush_old_exec+0x3b5/0x950
[ 3275.772463] [<ffffffffbb298667>] mmput+0x67/0xf0
[ 3275.772467] [<ffffffffbb454f00>] flush_old_exec+0x500/0x950
[ 3275.772472] [<ffffffffbb4b38d0>] load_elf_binary+0x340/0xdb0
[ 3275.772476] [<ffffffffbb52cd53>] ? ima_get_action+0x23/0x30
[ 3275.772479] [<ffffffffbb52c26e>] ? process_measurement+0x8e/0x250
[ 3275.772482] [<ffffffffbb52c729>] ? ima_bprm_check+0x49/0x50
[ 3275.772486] [<ffffffffbb45454a>] search_binary_handler+0x9a/0x1c0
[ 3275.772490] [<ffffffffbb455c56>] do_execve_common.isra.24+0x616/0x880
[ 3275.772493] [<ffffffffbb456159>] SyS_execve+0x29/0x30
[ 3275.772498] [<ffffffffbb993478>] stub_execve+0x48/0x80
Here the process is gnome-shell but that doesn't mean anything, I saw lots of different processes, it can be anything.
In BUG: Bad page map in process gnome-shell pte:b3e05275201 pmd:238adf067
238adf067 is the base physical address of a coherent memory allocated by my driver, with an offset of 0x67 (0x238adf000)
All those messages always come with the physical address and 0x67 offset!
Those prints are generated from here: https://github.com/torvalds/linux/blob/7cf726a59435301046250c42131554d9ccc566b8/mm/memory.c#L536
I tried to remove everything useless, here is the code showing the order I call API functions:
const struct file_operations pcie_fops = {
.owner = THIS_MODULE,
.open = chr_open,
.release = chr_release,
.mmap = chr_mmap,
};
static int chr_open(struct inode *inode, struct file *filp) {
struct custom_data *custom_data;
struct chr_dev_bookkeep *chr_dev_bk;
chr_dev_bk = container_of(inode->i_cdev, struct chr_dev_bookkeep, cdev);
custom_data = kzalloc(sizeof(*custom_data), GFP_KERNEL);
filp->private_data = custom_data;
return 0;
}
static int chr_release(struct inode *inode, struct file *filp) {
struct custom_data *custom_data;
custom_data = filp->private_data;
// ==========================> FREED HERE <================================
dma_free_coherent(&pdev->dev, size, virt_addr, bus_addr);
filp->private_data = NULL;
kfree(custom_data);
return 0;
}
static int chr_mmap(struct file *filp, struct vm_area_struct *vma) {
int ret;
struct custom_data *custom_data;
custom_data = filp->private_data;
chr_dev_bk = custom_data->chr_dev_bk;
vm_len = PAGE_ALIGN(vma->vm_end - vma->vm_start);
vma->vm_flags |= VM_PFNMAP | VM_DONTCOPY | VM_DONTEXPAND;
vma->vm_private_data = custom_data;/*not really used because no vm_operations_struct.close*/
virt_addr = dma_alloc_coherent(&pdev->dev, vm_len, &bus_addr, GFP_KERNEL | __GFP_ZERO);
set_memory_uc((unsigned long)virt_addr, (vm_len / PAGE_SIZE));
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ret = remap_pfn_range(vma, vma->vm_start,
bus_addr >> PAGE_SHIFT,
vm_len,
vma->vm_page_prot);
return ret;
}
I believe that bug can't come from my application code.
mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, filehandler, 0);.
Edit:
The #Ian Abbott's comment made me look at the kernel source code and I found a comment at dma_mmap_attrs() (=dma_mmap_coherent()): https://elixir.bootlin.com/linux/v4.7/source/include/linux/dma-mapping.h#L309
The coherent DMA buffer must not be freed by the
driver until the user space mapping has been released.
Has been released, I guess that means not during the release.
I believe that's also true for memory mapped with remap_pfn_range().
I'll write an answer if that was the problem.
Related
I have this device tree.
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ranges;
axpu_reserved_mem: axpursvd#90000000 {
no-map;
reg = <0x0 0x90000000 0x0 0x30000000>;
};
};
axpu#50000000 {
compatible = "ab21-axpu";
reg = <0 0x50000000 0 0x10000000>;
...
memory-region = <&axpu_reserved_mem>;
};
With simple test, in the probe function, I did something like this (reduced).
struct axpu_dev {
struct device *dev;
void __iomem *base;
u64 paddr;
u64 vaddr;
};
static int axpu_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
struct axpu_dev *axpu;
struct device_node *np;
int rc;
int ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
axpu = devm_kzalloc(dev, sizeof(*axpu), GFP_KERNEL);
axpu->dev = dev;
axpu->base = devm_ioremap(dev, res->start, resource_size(res));
res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
devm_request_irq(dev, res->start, axpu_irq_handler, IRQF_TRIGGER_HIGH, "axpu_irq", axpu);
np = of_parse_phandle(dev->of_node, "memory-region", 0);
rc = of_address_to_resource(np, 0, res);
axpu->paddr = res->start;
axpu->vaddr = memremap(res->start, resource_size(res), MEMREMAP_WB);
platform_set_drvdata(pdev, axpu);
axpu_init(axpu);
return sysfs_create_group(&dev->kobj, &axpu_attr_group);
}
and in the axpu_init function, I access it with virtual address.
static void axpu_init(struct axpu_dev *axpu)
{
printk("testing reserved memory ..\n");
writel_relaxed(0x12345678, axpu->vaddr + 0);
writel_relaxed(0x23456789, axpu->vaddr + 8);
printk("read-back data = %llx, %llx\n", readl_relaxed(axpu->vaddr + 0), readl_relaxed(axpu->vaddr + 8));
}
When I execute it, it runs ok.
/ # insmod axpu.ko
axpu_probe called!
MEM : res->start = 50000000, res->end = 5fffffff, res->name = axpu#50000000
axpu->base = ffffffc010000000
IRQ : res->start = 15, res->end = 15, res->name = axpu#50000000
axpu_dev 50000000.axpu: Allocated reserved memory, vaddr: 0xFFFFFFC080000000, paddr: 0x90000000
writing 0x12345678 at non-mapped reserved memory 0x90000000, 0x90000008 ..
read-back data = 12345678, 23456789
I have put no-map in the reserved memory sub-node. But is accessible with virtual address.
But if I try it with physical address, it crashes as below.
------------[ cut here ]------------
Ignoring spurious kernel translation fault at virtual address 0000000090000000
WARNING: CPU: 0 PID: 27 at arch/arm64/mm/fault.c:311 __do_kernel_fault+0x108/0x150
odules linked in: axpu(+)
CPU: 0 PID: 27 Comm: insmod Not tainted 5.10.0-rc5 #548
Hardware name: ETRI ab21m (DT)
pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--)
pc : __do_kernel_fault+0x108/0x150
lr : __do_kernel_fault+0x108/0x150
The document says about no-map :
no-map (optional) - empty property
- Indicates the operating system must not create a virtual mapping
of the region as part of its standard mapping of system memory,
nor permit speculative access to it under any circumstances other
than under the control of the device driver using the region.
Then what is wrong with my device tree?
I want to transfer a transaction structure, which contains an user space pointer to an array, to kernel by using copy_from_user.
The goal is, to get access to the array elements in kernel space.
User space side:
I allocate an array of _sg_param structures in user space. Now i put the address of this array in a transaction structure (line (*)).
Then i transfer the transaction structure to the kernel via ioctl().
Kernel space side:
On executing this ioctl, the complete transaction structure is copied to kernel space (line ()). Now kernel space is allocated for holding the array (line (*)). Then i try to copy the array from user space to the new allocated kernel space (line (****)), and here start my problems:
The kernel is corrupted during execution of this copy. dmesg shows following output:
[ 54.443106] Unhandled fault: page domain fault (0x01b) at 0xb6f09738
[ 54.448067] pgd = ee5ec000
[ 54.449465] [b6f09738] *pgd=2e9d7831, *pte=2d56875f, *ppte=2d568c7f
[ 54.454411] Internal error: : 1b [#1] PREEMPT SMP ARM
Any ideas ???
Following an simplified extract of my code:
// structure declaration
typedef struct _sg_param {
void *seg_buf;
int seg_len;
int received;
} sg_param_t;
struct transaction {
...
int num_of_elements;
sg_param_t *pbuf_list; // Array of sg_param structure
...
} trans;
// user space side:
if ((pParam = (sg_param_t *) malloc(NR_OF_STRUCTS * sizeof(sg_param_t))) == NULL) {
return -ENOMEM;
}
else {
trans.num_of_elements = NR_OF_STRUCTS;
trans.pbuf_list = pParam; // (*)
}
rc = ioctl(dev->fd, MY_CMD, &trans);
if (rc < 0) {
return rc;
}
// kernel space side
static long ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
arg_ptr = (void __user *)arg;
// Perform the specified command
switch (cmd) {
case MY_CMD:
{
struct transaction *__user user_trans;
user_trans = (struct transaction *__user)arg_ptr;
if (copy_from_user(&trans, arg_ptr, sizeof(trans)) != 0) { // (**)
k_err("Unable to copy transfer info from userspace for "
"AXIDMA_DMA_START_DMA.\n");
return -EFAULT;
}
int size = trans.num_of_elements * sizeof(sg_param_t);
if (trans.pbuf_list != NULL) {
// Allocate kernel memory for buf_list
trans.pbuf_list = (sg_param_t *) kmalloc(size, GFP_KERNEL); // (***)
if (trans.pbuf_list == NULL) {
k_err("Unable to allocate array for buffers.\n");
return -ENOMEM;
}
// Now copy pbuf_list from user space to kernel space
if (copy_from_user(trans.pbuf_list, user_trans->pbuf_list, size) != 0) { // (****)
kfree(trans.pbuf_list);
return -EFAULT;
}
}
break;
}
}
You're directly accessing userspace data (user_trans->pbuf_list). You should use the one that you've already copied to kernel (trans.pbuf_list).
Code for this would normally be something like:
sg_param_t *local_copy = kmalloc(size, ...);
// TODO check it succeeded
if (copy_from_user(local_copy, trans.pbuf_list, size) ...)
trans.pbuf_list = local_copy;
// use trans.pbuf_list
Note that you also need to check trans.num_of_elements to be valid (0 would make kmalloc return ZERO_SIZE_PTR, and too big value might be a way for DoS).
I want to reserve some memory to save kernel information. I copied reserve_crashkernel function to arm64 and modified it:
/* 16M alignment for crash kernel regions */
#define CRASH_ALIGN (16 << 20)
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_MEM
};
static void __init reserve_crashkernel(void)
{
unsigned long long crash_size, crash_base, total_mem;
int ret;
crash_size = CRASH_ALIGN;
total_mem = memblock_phys_mem_size();
pr_info("crashkernel find memory %x - %llx.\n", CRASH_ALIGN, memblock_end_of_DRAM());
crash_base = memblock_find_in_range(CRASH_ALIGN, memblock_end_of_DRAM(),
crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
ret = memblock_reserve(crash_base, crash_size);
if (ret) {
pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
return;
}
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
}
When the kernel started, I can find kernel print like this:
[ 0.000000] crashkernel find memory 1000000 - 210000000.
[ 0.000000] Reserving 16MB of memory at 8272MB for crashkernel (System RAM: 8190MB)
But the /proc/iomem doesn't seem right. Without my code there is a 'System RAM' region:
100000000-20fffffff : System RAM
Now with reserve_crashkernel, the region changed to:
205000000-205ffffff : Crash kernel
I don't why the 'System RAM' region disappeared and I'm not sure that my code is correct.
I wrote a simple FS that should only statically contain one file named hello. This file should contain the string Hello, world!. I did this for educational purposes. While the fs is mounted it actually behaves like expected. I can read the file just fine.
However after unmounting I always get
VFS: Busy inodes after unmount of dummyfs. Self-destruct in 5 seconds. Have a nice day...
If I called ls on the rootdir while the fs was mounted I get
BUG: Dentry (ptrval){i=2,n=hello} still in use (-1) [unmount of dummyfs dummyfs]
on top of that.
What does this mean in detail and how can I fix it?
The mount and kill_sb routines call mount_nodev and allocate space for a struct holding the 2 inodes this FS uses.
static struct dentry *dummyfs_mount(struct file_system_type* fs_type,
int flags, const char* dev_name, void* data)
{
struct dentry *ret;
ret = mount_nodev(fs_type, flags, data, dummyfs_fill_super);
if (IS_ERR(ret)) {
printk(KERN_ERR "dummyfs_mount failed");
}
return ret;
}
static void dummyfs_kill_sb(struct super_block *sb) {
kfree(sb->s_fs_info);
kill_litter_super(sb);
}
The fill superblock method creates the 2 inodes and saves them in the struct allocated by mount:
static int dummyfs_fill_super(struct super_block *sb, void *data, int flags)
{
struct dummyfs_info *fsi;
sb->s_magic = DUMMYFS_MAGIC;
sb->s_op = &dummyfs_sops;
fsi = kzalloc(sizeof(struct dummyfs_info), GFP_KERNEL);
sb->s_fs_info = fsi;
fsi->root = new_inode(sb);
fsi->root->i_ino = 1;
fsi->root->i_sb = sb;
fsi->root->i_op = &dummyfs_iops;
fsi->root->i_fop = &dummyfs_dops;
fsi->root->i_atime = fsi->root->i_mtime = fsi->root->i_ctime = current_time(fsi->root);
inode_init_owner(fsi->root, NULL, S_IFDIR);
fsi->file = new_inode(sb);
fsi->file->i_ino = 2;
fsi->file->i_sb = sb;
fsi->file->i_op = &dummyfs_iops;
fsi->file->i_fop = &dummyfs_fops;
fsi->file->i_atime = fsi->file->i_mtime = fsi->file->i_ctime = current_time(fsi->file);
inode_init_owner(fsi->file, fsi->root, S_IFREG);
sb->s_root = d_make_root(fsi->root);
return 0;
}
The lookup method just adds the fsi->file_inode to the dentry if the parent is the root dir:
if (parent_inode->i_ino == fsi->root->i_ino) {
d_add(child_dentry, fsi->file);
}
And the iterate method just emits the dot files and the hello file when called:
if (ctx->pos == 0) {
dir_emit_dots(file, ctx);
ret = 0;
}
if (ctx->pos == 2) {
dir_emit(ctx, "hello", 5, file->f_inode->i_ino, DT_UNKNOWN);
++ctx->pos;
ret = 0;
}
The read method just writes a static string using copy_to_user. The offsets are calculated correctly and on EOF the method just returns 0. However since the problems occur even when the read method was not called I think it is out-of-scope for this already too long question.
For actually running this I use user-mode linux from the git master (4.15+x commit d48fcbd864a008802a90c58a9ceddd9436d11a49). The userland is compiled from scratch and the init process is a derivative of Rich Felker's minimal init to which i added mount calls for /proc, /sys and / (remount).
My command line is ./linux ubda=../uml/image root=/dev/ubda
Any pointers to more thorough documentation are also appreciated.
Using gdb watching the dentry->d_lockref.count I realized that the kill_litter_super call in umount was actually responsible for the dentry issues. Replacing it with kill_anon_super solved that problem.
The busy inode problem vanished too mostly except when i unmounted after immediately after mounting. Allocating the second inode lazily solved that problem too.
I am trying to interface freescale imx6 SoC with mpu92/65 sensor device.
I have taken mpu92/65 device driver from android (https://github.com/NoelMacwan/Kernel-10.4.1.B.0.101/tree/master/drivers/staging/iio/imu ) and have done necessary modifications to the driver and device tree.
Device tree modifications:
&i2c3{
...
extaccelerometer: mpu9250#68{
compatible = "mpu9250";
reg = <0x68>;
interrupt-parent = <&gpio2>;
interrupts = <9>;
int_config = /bits/ 8 <0x00>;
level_shifter = /bits/ 8 <0>;
orientation = [ 01 00 00 00 01 00 00 00 01 ];
sec_slave_type = <2>;
sec_slave_id = <0x12>;
secondary_i2c_addr = /bits/ 16 <0x0C>;
secondary_orientation = [ 00 01 00 ff 00 00 00 00 01 ];
};
}
inv_mpu_iio driver modifications:
static void get_platdata(struct device *dev, struct inv_mpu_iio_s *st){
struct device_node *np = dev->of_node;
int i=0;
of_property_read_u8(np, "int_config", &st->plat_data.int_config);
of_property_read_u8(np, "level_shifter", &st->plat_data.level_shifter);
of_property_read_u8_array(np, "orientation", &st->plat_data.orientation,9);
of_property_read_u32(np, "sec_slave_type", &st->plat_data.sec_slave_type);
of_property_read_u32(np, "sec_slave_id", &st->plat_data.sec_slave_id);
of_property_read_u16(np, "secondary_i2c_addr", &st->plat_data.secondary_i2c_addr);
of_property_read_u8_array(np, "secondary_orientation", &st->plat_data.secondary_orientation,9);
}
static int inv_mpu_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
.....
if (client->dev.of_node) {
get_platdata(&client->dev, st);
} else {
st->plat_data = *(struct mpu_platform_data *)dev_get_platdata(&client->dev);
}
.....
}
I have retrieved the platform data from device tree in the above manner. In probe function I am getting client->irq=0. But I have mentioned about the IRQ in the device tree. Please can someone tell me what else I need to do to mention gpio2-9 (linux pad) as an interrupt line for this i2c device.
0x68 is the slave address of the i2c device. Driver probe functionality is trying to write on to the device for verifying the chip type initially. So the data and the address of the slave is sent to the adapter driver where in the adapter driver start function writes onto and reads from control and status registers is successfully executed.
static int i2c_imx_start(struct imx_i2c_struct *i2c_imx)
{
unsigned int temp = 0;
int result;
dev_dbg(&i2c_imx->adapter.dev, "<%s>\n", __func__);
i2c_imx_set_clk(i2c_imx);
result = clk_prepare_enable(i2c_imx->clk);
if (result)
return result;
imx_i2c_write_reg(i2c_imx->ifdr, i2c_imx, IMX_I2C_IFDR,__func__);
/* Enable I2C controller */
imx_i2c_write_reg(i2c_imx->hwdata->i2sr_clr_opcode, i2c_imx, IMX_I2C_I2SR,__func__);
imx_i2c_write_reg(i2c_imx->hwdata->i2cr_ien_opcode, i2c_imx, IMX_I2C_I2CR,__func__);
/* Wait controller to be stable */
udelay(50);
/* Start I2C transaction */
temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp |= I2CR_MSTA;
imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR,__func__);
result = i2c_imx_bus_busy(i2c_imx, 1);
if (result)
return result;
i2c_imx->stopped = 0;
temp |= I2CR_IIEN | I2CR_MTX | I2CR_TXAK;
temp &= ~I2CR_DMAEN;
imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR,__func__);
return result;
}
Then the adapter driver writes on to the data register
imx_i2c_write_reg(msgs->addr << 1, i2c_imx, IMX_I2C_I2DR,__func__);
After this the adapter interrupt is generated ( bus interrupt got i2c3: 291).
static irqreturn_t i2c_imx_isr(int irq, void *dev_id)
{
struct imx_i2c_struct *i2c_imx = dev_id;
unsigned int temp;
printk("irq:%d\n",irq);
temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2SR);
if (temp & I2SR_IIF) {
/* save status register */
i2c_imx->i2csr = temp;
temp &= ~I2SR_IIF;
printk("temp=%d\n",temp);
temp |= (i2c_imx->hwdata->i2sr_clr_opcode & I2SR_IIF);
imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR,__func__);
wake_up(&i2c_imx->queue);
return IRQ_HANDLED;
}
return IRQ_NONE;
}
In ISR after reading status register the value should be 162 (last bit should be 0 to indicate acknowledged) but for my device I am getting this value as 163 (last bit is 1 so it is not acknowledged). Then in acknowledge success function -EIO error is thrown. For all the other device connected to this bus the status register after writing onto the data register is 162.
I don't know why I am getting the above behavior. And one more thing is that even if I don't connect the device the start function is able to write into and read from the status and control registers. I am not sure which status register is being read and writing into. If I assume that this writes and reads the adapter registers, then can I also assume that the adapter h/w automatically reads and writes onto the device connected. If so then how am I getting the same behavior if I don't connect the device?
Please help me out.
In probe function I am getting client->irq=0. But I have mentioned about the IRQ in the device tree. Please can someone tell me what else I need to do to mention gpio2-9 (linux pad) as an interrupt line for this i2c device.
Wrong definition of interrupts property
Your interrupts definition seems incorrect:
interrupts = <9>;
It should be in "two cells" format (see Documentation/devicetree/bindings/interrupt-controller/interrupts.txt for details).
I ran next command:
$ find arch/arm/boot/dts/ -name '*imx6*' -exec grep -Hn interrupt {} \; | grep cell
and I see that most of imx6 SoCs have two-cell format for GPIO interrupts. So your definition of interrupts should look like that:
interrupts = <9 IRQ_TYPE_EDGE_FALLING>;
or if your kernel version still doesn't have named constants for IRQ types:
interrupts = <9 2>;
Refer to the datasheet or driver code for MPU9250 to figure out the type of IRQ (falling/rising).
Missingof_match_table
I'm not 100% sure that what explained next is the cause of your issue, but at least that's worth to be checked.
As I see it, the problem is that OF (device tree) matching is not happening. To fix this, in addition to .id_table you need to define and assign .of_match_table in your driver struct. So for now you have next driver definition in your driver:
static const struct i2c_device_id inv_mpu_id[] = {
...
{"mpu9250", INV_MPU9250},
...
{}
};
static struct i2c_driver inv_mpu_driver = {
...
.id_table = inv_mpu_id,
...
};
And you need to add something like this:
#include <linux/of.h>
#ifdef CONFIG_OF
static const struct of_device_id inv_mpu_of_table[] = {
...
{ .compatible = "invensense,mpu9250" },
...
{ }
};
MODULE_DEVICE_TABLE(of, inv_mpu_of_table);
#endif
static struct i2c_driver inv_mpu_driver = {
.driver = {
.of_match_table = of_match_ptr(inv_mpu_of_table),
...
},
...
};
Be sure that your compatible strings have exactly "vendor,product" format (which is "invensense,mpu9250" in your case).
Now in your device tree you can describe your device using "invensense,mpu9250" as a value for compatible property:
&i2c3 {
...
extaccelerometer: mpu9250#68 {
compatible = "invensense,mpu9250";
...
}
After these steps OF matching should happen correctly and you should see your client->irq assigned appropriately (so it's not 0).
Run next command to list all I2C/IIO drivers that has device tree support, and you'll see that they all have both tables in driver struct:
$ git grep --all-match -e of_match_table -e '\i2c_driver' -e '\.id_table\b' drivers/iio/* | sed 's/:.*//g' | sort -u
Under the hood
Look into drivers/i2c/i2c-core.c, i2c_device_probe() function to see how IRQ number is being read from device tree for I2C device:
static int i2c_device_probe(struct device *dev)
{
...
if (dev->of_node) {
...
irq = of_irq_get(dev->of_node, 0);
}
...
client->irq = irq;
...
status = driver->probe(client, i2c_match_id(driver->id_table, client));
}
This function is being executed when device/driver match happens. Devices information is read from device tree on your I2C adapter probe. So on i2c_add_driver() call for your driver there can be match (by compatible string) with device from device tree, and i2c_device_probe() called, populating client->irq and calling your driver probe function next.
of_irq_get() function obtains IRQ number from device tree interrupts property
Also, there was an attempt to get rid of .id_table and use .of_match_table exclusively for device matching: commit. But then it was reverted further in this commit, due to some side effects. So for now we must define both .id_table AND .of_match_table for I2C driver to work correctly.