How can I use the 'no-map' property of reserved-memory in device tree? Still accessed with virtual address - memory-management

I have this device tree.
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
ranges;
axpu_reserved_mem: axpursvd#90000000 {
no-map;
reg = <0x0 0x90000000 0x0 0x30000000>;
};
};
axpu#50000000 {
compatible = "ab21-axpu";
reg = <0 0x50000000 0 0x10000000>;
...
memory-region = <&axpu_reserved_mem>;
};
With simple test, in the probe function, I did something like this (reduced).
struct axpu_dev {
struct device *dev;
void __iomem *base;
u64 paddr;
u64 vaddr;
};
static int axpu_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
struct axpu_dev *axpu;
struct device_node *np;
int rc;
int ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
axpu = devm_kzalloc(dev, sizeof(*axpu), GFP_KERNEL);
axpu->dev = dev;
axpu->base = devm_ioremap(dev, res->start, resource_size(res));
res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
devm_request_irq(dev, res->start, axpu_irq_handler, IRQF_TRIGGER_HIGH, "axpu_irq", axpu);
np = of_parse_phandle(dev->of_node, "memory-region", 0);
rc = of_address_to_resource(np, 0, res);
axpu->paddr = res->start;
axpu->vaddr = memremap(res->start, resource_size(res), MEMREMAP_WB);
platform_set_drvdata(pdev, axpu);
axpu_init(axpu);
return sysfs_create_group(&dev->kobj, &axpu_attr_group);
}
and in the axpu_init function, I access it with virtual address.
static void axpu_init(struct axpu_dev *axpu)
{
printk("testing reserved memory ..\n");
writel_relaxed(0x12345678, axpu->vaddr + 0);
writel_relaxed(0x23456789, axpu->vaddr + 8);
printk("read-back data = %llx, %llx\n", readl_relaxed(axpu->vaddr + 0), readl_relaxed(axpu->vaddr + 8));
}
When I execute it, it runs ok.
/ # insmod axpu.ko
axpu_probe called!
MEM : res->start = 50000000, res->end = 5fffffff, res->name = axpu#50000000
axpu->base = ffffffc010000000
IRQ : res->start = 15, res->end = 15, res->name = axpu#50000000
axpu_dev 50000000.axpu: Allocated reserved memory, vaddr: 0xFFFFFFC080000000, paddr: 0x90000000
writing 0x12345678 at non-mapped reserved memory 0x90000000, 0x90000008 ..
read-back data = 12345678, 23456789
I have put no-map in the reserved memory sub-node. But is accessible with virtual address.
But if I try it with physical address, it crashes as below.
------------[ cut here ]------------
Ignoring spurious kernel translation fault at virtual address 0000000090000000
WARNING: CPU: 0 PID: 27 at arch/arm64/mm/fault.c:311 __do_kernel_fault+0x108/0x150
odules linked in: axpu(+)
CPU: 0 PID: 27 Comm: insmod Not tainted 5.10.0-rc5 #548
Hardware name: ETRI ab21m (DT)
pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--)
pc : __do_kernel_fault+0x108/0x150
lr : __do_kernel_fault+0x108/0x150
The document says about no-map :
no-map (optional) - empty property
- Indicates the operating system must not create a virtual mapping
of the region as part of its standard mapping of system memory,
nor permit speculative access to it under any circumstances other
than under the control of the device driver using the region.
Then what is wrong with my device tree?

Related

Linux device drivers: mapping MMIO with `devm_of_iomap()` not working

I am writing a kernel platform device driver for booting group of remoteprocs. I am iterating over the groups of remoteprocs (child device nodes) and iomap them by their indexes inside the group with the help of for_each_child_of_node() macro. driver fails on the first devm_of_iomap() function call attempt. Now, I suspected that the platform device driver framework is not recognizing the resources inside the child device nodes (and I was right!), hence I have printed the device resources number from platform.c platform_get_resource() function and the result was 1 (which is the shared-buffer resource).
The Q: Maintaining the current DT format, how can I retrieve (or make them visible to the platform framework) these inner resources (adsp_dtcm, adsp_conf...etc) inside for loop?
// Device Tree
// ----------------------------
// Not the real values of course
// ----------------------------
dsp-cluster {
#address-cells = <2>;
#size-cells = <2>;
compatible = "xxxxxx,dsp_remoteproc";
dsp_count = <2>;
reg = <0x0 0x10000000 0x0 0x100000>; // the only one that recognized by platform framework
reg-names = "share-buffer";
dsp#0 {
reg = <0x0 0xfff00000 0x0 0x40000>,
<0x0 0xfffc0000 0x0 0x20000>,
<0x0 0xfff00000 0x0 0x20000>,
<0x0 0xfffa0000 0x0 0x20000>,
<0x0 0xfffc0000 0x0 0x4000>,
<0x0 0xfffc8000 0x0 0x8000>,
<0x0 0xfffd0000 0x0 0x8000>;
reg-names = "adsp_dtcm", "adsp_conf", "vdsp_dtcm", "vdsp_conf",
"cdsp_dtcm", "cdsp_itcm", "cdsp_conf";
};
dsp#1 {
reg = <0x0 0x10400000 0x0 0x40000>,
<0x0 0xxxx0000 0x0 0x20000>,
<0x0 0xyyy00000 0x0 0x20000>,
<0x0 0xzzza0000 0x0 0x20000>,
<0x0 0xxxxc0000 0x0 0x4000>,
<0x0 0xyyyc8000 0x0 0x8000>,
<0x0 0xeeed0000 0x0 0x8000>;
reg-names = "adsp_dtcm", "adsp_conf", "vdsp_dtcm", "vdsp_conf",
"cdsp_dtcm", "cdsp_itcm", "cdsp_conf";
};
};
My driver code (only the section that trying to iterate and iomap these all resources inside dsp#0 and dsp#1):
static int dsp_remoteproc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
u32 dsp_count = 1;
int ret;
struct device_node *dev_node = dev->of_node;
struct device_node *child = NULL;
unsigned j = 0;
if (of_property_read_u32(dev->of_node, "dsp_count ", &dsp_count))
dev_warn(dev, "dsp_count property not exist, defaulting to 1\n");
for_each_child_of_node(dev_node, child) {
if (!child || (j++ > dsp_count))
break;
void __iomem *iomap_ret;
// iomap adsp conf regs
iomap_ret = devm_of_iomap(dev, dev_node, 1, NULL);
if (IS_ERR(iomap_ret))
return -ENODEV;
}
Translation of addresses in a node's reg property relies on the presence of a ranges property in the parent node. This is checked by the of_translate_one function in "drivers/of/address.c", an extract of which is shown below:
/*
* Normally, an absence of a "ranges" property means we are
* crossing a non-translatable boundary, and thus the addresses
* below the current cannot be converted to CPU physical ones.
* Unfortunately, while this is very clear in the spec, it's not
* what Apple understood, and they do have things like /uni-n or
* /ht nodes with no "ranges" property and a lot of perfectly
* useable mapped devices below them. Thus we treat the absence of
* "ranges" as equivalent to an empty "ranges" property which means
* a 1:1 translation at that level. It's up to the caller not to try
* to translate addresses that aren't supposed to be translated in
* the first place. --BenH.
*
* As far as we know, this damage only exists on Apple machines, so
* This code is only enabled on powerpc. --gcl
*
* This quirk also applies for 'dma-ranges' which frequently exist in
* child nodes without 'dma-ranges' in the parent nodes. --RobH
*/
ranges = of_get_property(parent, rprop, &rlen);
if (ranges == NULL && !of_empty_ranges_quirk(parent) &&
strcmp(rprop, "dma-ranges")) {
pr_debug("no ranges; cannot translate\n");
return 1;
}
Here, parent is the parent node, rprop is the string "ranges" for a normal range (the same function is also used for translating DMA addresses, where rprop would be the string "dma-ranges"), and the non-zero return value indicates failure. (Don't worry about the !of_empty_ranges_quirk(parent). That is just for some weird special cases.) If the parent node doesn't have the ranges property (for a normal range) then the ranges variable will be NULL and the function will return 1 to indicate failure to translate the address.
You may wonder why the code doesn't search up the tree until it finds a ranges property. The reason is that not all reg properties are used for translating physical addresses. This is explained in Device Tree Usage # Ranges (Address Translation) when discussing the reg property for the rtc#58 node (an I2C device) whose parent is the i2c#1,0 node:
You should also notice that there is no ranges property in the i2c#1,0 node. The reason for this is that unlike the external bus, devices on the i2c bus are not memory mapped on the CPU's address domain. Instead, the CPU indirectly accesses the rtc#58 device via the i2c#1,0 device. The lack of a ranges property means that a device cannot be directly accessed by any device other than it's parent.

Why? BUG: Bad page map in process *process* pte:b3e05275201 pmd:238adf067

I develop a kernel module using DMA dma_alloc_coherent() and remap_pfn_range().
Sometimes, when I close the app that opened the character device, I get the following message in dmesg. That leads to a kernel panic few seconds (random) later.
[ 3275.772330] BUG: Bad page map in process gnome-shell pte:b3e05275201 pmd:238adf067
[ 3275.772337] addr:00007f20bce00000 vm_flags:08000070 anon_vma: (null) mapping:ffff969f236dcdd0 index:b8
[ 3275.772375] vma->vm_ops->fault: xfs_filemap_fault+0x0/0x30 [xfs]
[ 3275.772400] vma->vm_file->f_op->mmap: xfs_file_mmap+0x0/0x80 [xfs]
[ 3275.772413] CPU: 5 PID: 4809 Comm: gnome-shell Kdump: loaded Tainted: G OE ------------ 3.10.0-1127.19.1.el7.x86_64 #1
[ 3275.772416] Hardware name: System manufacturer System Product Name/PRIME H370M-PLUS, BIOS 1801 10/17/2019
[ 3275.772417] Call Trace:
[ 3275.772425] [<ffffffffbb97ffa5>] dump_stack+0x19/0x1b
[ 3275.772432] [<ffffffffbb3ee311>] print_bad_pte+0x1f1/0x290
[ 3275.772436] [<ffffffffbb3f0676>] vm_normal_page+0xa6/0xb0
[ 3275.772440] [<ffffffffbb3f0ccb>] unmap_page_range+0x64b/0xc80
[ 3275.772444] [<ffffffffbb3f1381>] unmap_single_vma+0x81/0xf0
[ 3275.772448] [<ffffffffbb3f2db9>] unmap_vmas+0x49/0x90
[ 3275.772454] [<ffffffffbb3fcdbc>] exit_mmap+0xac/0x1a0
[ 3275.772458] [<ffffffffbb454db5>] ? flush_old_exec+0x3b5/0x950
[ 3275.772463] [<ffffffffbb298667>] mmput+0x67/0xf0
[ 3275.772467] [<ffffffffbb454f00>] flush_old_exec+0x500/0x950
[ 3275.772472] [<ffffffffbb4b38d0>] load_elf_binary+0x340/0xdb0
[ 3275.772476] [<ffffffffbb52cd53>] ? ima_get_action+0x23/0x30
[ 3275.772479] [<ffffffffbb52c26e>] ? process_measurement+0x8e/0x250
[ 3275.772482] [<ffffffffbb52c729>] ? ima_bprm_check+0x49/0x50
[ 3275.772486] [<ffffffffbb45454a>] search_binary_handler+0x9a/0x1c0
[ 3275.772490] [<ffffffffbb455c56>] do_execve_common.isra.24+0x616/0x880
[ 3275.772493] [<ffffffffbb456159>] SyS_execve+0x29/0x30
[ 3275.772498] [<ffffffffbb993478>] stub_execve+0x48/0x80
Here the process is gnome-shell but that doesn't mean anything, I saw lots of different processes, it can be anything.
In BUG: Bad page map in process gnome-shell pte:b3e05275201 pmd:238adf067
238adf067 is the base physical address of a coherent memory allocated by my driver, with an offset of 0x67 (0x238adf000)
All those messages always come with the physical address and 0x67 offset!
Those prints are generated from here: https://github.com/torvalds/linux/blob/7cf726a59435301046250c42131554d9ccc566b8/mm/memory.c#L536
I tried to remove everything useless, here is the code showing the order I call API functions:
const struct file_operations pcie_fops = {
.owner = THIS_MODULE,
.open = chr_open,
.release = chr_release,
.mmap = chr_mmap,
};
static int chr_open(struct inode *inode, struct file *filp) {
struct custom_data *custom_data;
struct chr_dev_bookkeep *chr_dev_bk;
chr_dev_bk = container_of(inode->i_cdev, struct chr_dev_bookkeep, cdev);
custom_data = kzalloc(sizeof(*custom_data), GFP_KERNEL);
filp->private_data = custom_data;
return 0;
}
static int chr_release(struct inode *inode, struct file *filp) {
struct custom_data *custom_data;
custom_data = filp->private_data;
// ==========================> FREED HERE <================================
dma_free_coherent(&pdev->dev, size, virt_addr, bus_addr);
filp->private_data = NULL;
kfree(custom_data);
return 0;
}
static int chr_mmap(struct file *filp, struct vm_area_struct *vma) {
int ret;
struct custom_data *custom_data;
custom_data = filp->private_data;
chr_dev_bk = custom_data->chr_dev_bk;
vm_len = PAGE_ALIGN(vma->vm_end - vma->vm_start);
vma->vm_flags |= VM_PFNMAP | VM_DONTCOPY | VM_DONTEXPAND;
vma->vm_private_data = custom_data;/*not really used because no vm_operations_struct.close*/
virt_addr = dma_alloc_coherent(&pdev->dev, vm_len, &bus_addr, GFP_KERNEL | __GFP_ZERO);
set_memory_uc((unsigned long)virt_addr, (vm_len / PAGE_SIZE));
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ret = remap_pfn_range(vma, vma->vm_start,
bus_addr >> PAGE_SHIFT,
vm_len,
vma->vm_page_prot);
return ret;
}
I believe that bug can't come from my application code.
mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, filehandler, 0);.
Edit:
The #Ian Abbott's comment made me look at the kernel source code and I found a comment at dma_mmap_attrs() (=dma_mmap_coherent()): https://elixir.bootlin.com/linux/v4.7/source/include/linux/dma-mapping.h#L309
The coherent DMA buffer must not be freed by the
driver until the user space mapping has been released.
Has been released, I guess that means not during the release.
I believe that's also true for memory mapped with remap_pfn_range().
I'll write an answer if that was the problem.

How to reserve physical memory in kernel (arm64)

I want to reserve some memory to save kernel information. I copied reserve_crashkernel function to arm64 and modified it:
/* 16M alignment for crash kernel regions */
#define CRASH_ALIGN (16 << 20)
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
.start = 0,
.end = 0,
.flags = IORESOURCE_MEM
};
static void __init reserve_crashkernel(void)
{
unsigned long long crash_size, crash_base, total_mem;
int ret;
crash_size = CRASH_ALIGN;
total_mem = memblock_phys_mem_size();
pr_info("crashkernel find memory %x - %llx.\n", CRASH_ALIGN, memblock_end_of_DRAM());
crash_base = memblock_find_in_range(CRASH_ALIGN, memblock_end_of_DRAM(),
crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
ret = memblock_reserve(crash_base, crash_size);
if (ret) {
pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
return;
}
pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
}
When the kernel started, I can find kernel print like this:
[ 0.000000] crashkernel find memory 1000000 - 210000000.
[ 0.000000] Reserving 16MB of memory at 8272MB for crashkernel (System RAM: 8190MB)
But the /proc/iomem doesn't seem right. Without my code there is a 'System RAM' region:
100000000-20fffffff : System RAM
Now with reserve_crashkernel, the region changed to:
205000000-205ffffff : Crash kernel
I don't why the 'System RAM' region disappeared and I'm not sure that my code is correct.

Get device tree node properties from tty_struct structure

Is it possible to get device tree node properties of a tty device from its tty_struct structure?
The goal would be to retrieve some custom properties of the serial port from the device tree and use them in a line discipline that I'm going to link to the serial port.
This is the device tree node of the uart I'm using.
uart2: serial#021e8000 {
compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
reg = <0x021e8000 0x4000>;
interrupts = <0 27 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6QDL_CLK_UART_IPG>,
<&clks IMX6QDL_CLK_UART_SERIAL>;
clock-names = "ipg", "per";
dmas = <&sdma 27 4 0>, <&sdma 28 4 0>;
dma-names = "rx", "tx";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart2>;
status = "okay";
master = <0>;
baudrate = 9600;
};
This is part of the c code in the line discipline I'm working on.
static struct tty_ldisc_ops my_ldisc = {
.owner = THIS_MODULE,
.magic = TTY_LDISC_MAGIC,
.name = "custom_ldisc",
.open = my_ldisc_open,
...
};
static int my_ldisc_open(struct tty_struct *tty)
{
// here I would like to access "master" and
// "baudrate" properties
}
Using ldattach on ttymxc1 (tty device on uart2) with my_ldisc as discipline,
my_ldisc_open() is called, but the parameter *tty seems to have no references
to the uart2 device tree node.

How to work with reserved CMA memory?

I would like to allocate piece of physically contiguous reserved memory (in predefined physical addresses) for my device with DMA support.
As I see CMA has three options:
1. To reserve memory via kernel config file. 2. To reserve memory via kernel cmdline. 3. To reserve memory via device-tree memory node.
In the first case: size and number of areas could be reserved.
CONFIG_DMA_CMA=y
CONFIG_CMA_AREAS=7
CONFIG_CMA_SIZE_MBYTES=8
So I could use:
start_cma_virt = dma_alloc_coherent(dev->cmadev, (size_t)size_cma, &start_cma_dma, GFP_KERNEL);
in my driver to allocate contiguous memory. I could use it max 7 times and it will be possible allocate up to 8M. But unfortunately
dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
from arch/arm/mm/init.c:
void __init arm_memblock_init(struct meminfo *mi,const struct machine_desc *mdesc)
it is impossible to set predefined physical addresses for contiguous allocation.
Of Course I could use kernel cmdline:
mem=cma=cmadevlabel=8M#32M cma_map=mydevname=cmadevlabel
//struct device *dev = cmadev->dev; /*dev->name is mydevname*/
After that dma_alloc_coherent() should alloc memory in physical memory area from 32M + 8M (0x2000000 + 0x800000) up to 0x27FFFFF.
But unfortunately I have problem with this solution. Maybe my cmdline has error?
Next one try was device tree implementation.
cmadev_region: mycma {
/*no-map;*/ /*DMA coherent memory*/
/*reusable;*/
reg = <0x02000000 0x00100000>;
};
And phandle in some node:
memory-region = <&cmadev_region>;
As I saw in kernel usual it should be used like:
of_find_node_by_name(); //find needed node
of_parse_phandle(); //resolve a phandle property to a device_node pointer
of_get_address(); //get DT __be32 physical addresses
of_translate_address(); //DT represent local (bus, device) addresses so translate it to CPU physical addresses
request_mem_region(); //reserve IOMAP memory (cat /proc/iomem)
ioremap(); //alloc entry in page table for reserved memory and return kernel logical addresses.
But I want use DMA via (as I know only one external API function dma_alloc_coherent) dma_alloc_coherent() instead IO-MAP ioremap(). But how call
start_cma_virt = dma_alloc_coherent(dev->cmadev, (size_t)size_cma, &start_cma_dma, GFP_KERNEL);
associate memory from device-tree (reg = <0x02000000 0x00100000>;) to dev->cmadev ? In case with cmdline it is clear it has device name and addresses region.
Does reserved memory after call of_parse_phandle() automatically should be booked for your special driver (which parse DT). And next call dma_alloc_coherent will allocate dma area inside memory from cmadev_region: mycma?
To use dma_alloc_coherent() on reserved memory node, you need to declare that area as dma_coherent. You can do some thing like:
In dt:
cmadev_region: mycma {
compatible = "compatible-name"
no-map;
reg = <0x02000000 0x00100000>;
};
In your driver:
struct device *cma_dev;
static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
{
int ret;
if (!mem) {
ret = dma_declare_coherent_memory(cma_dev, rmem->base, rmem->base,
rmem->size, DMA_MEMORY_EXCLUSIVE);
if (ret) {
pr_err("Error");
return ret;
}
}
return 0;
}
static void rmem_dma_device_release(struct reserved_mem *rmem,
struct device *dev)
{
if (dev)
dev->dma_mem = NULL;
}
static const struct reserved_mem_ops rmem_dma_ops = {
.device_init = rmem_dma_device_init,
.device_release = rmem_dma_device_release,
};
int __init cma_setup(struct reserved_mem *rmem)
{
rmem->ops = &rmem_dma_ops;
return 0;
}
RESERVEDMEM_OF_DECLARE(some-name, "compatible-name", cma_setup);
Now on this cma_dev you can perform dma_alloc_coherent and get memory.

Resources