I want to count and sample memory load operations that happen in a chunk of code while running it on an AMD machine. I know that the following code can be used to count memory loads happening in a code chunk on Intel machine.
int
main(int argc, char **argv)
{
struct perf_event_attr pe;
long long count;
int fd;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_RAW;
pe.size = sizeof(struct perf_event_attr);
/* event number of MEM_UOPS_RETIRED.ALL_LOADS
event is 81d0 */
pe.config = 0x81d0;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
// code chunk to be profiled is here
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("Number of load operations: %lld\n", count);
close(fd);
}
However, I don't know how to get the count of the same event from AMD machine. I have read AMD's document on performance counter and IBS in https://www.amd.com/system/files/TechDocs/24593.pdf sections 13.2 and 13.3. Yet, there is no mention of any hardware event number for memory load that can be passed to pe.config in the above code.
So, how can I count or sample memory load operations in an AMD machine by using perf_event_open?
PS: The AMD processor that I am using is AMD EPYC 7551 32-Core.
I am currently using Kontron smarc-samx6i board and I am running this board with Yocto kernel. Here the kernel is not supporting the command i2cget to check i2c data to read from sensor. So, is there any other possible ways to read i2c data from the sensors?
You can add "i2c-tools" to your image:
IMAGE_INSTALL_append = " i2c-tools"
They include i2cget.c.
The recipe can be found in: /meta/recipes-devtools/i2c-tools/
You might also want to look into writing your own application using the i2c dev interface. See the kernel documentation on i2c.
read:
#include <stdio.h>
#include <fcntl.h>
#include <linux/i2c-dev.h>
#include <errno.h>
#define I2C_ADDR 0x20
int main (void) {
char buffer[1];
int fd;
fd = open("/dev/i2c-0", O_RDWR);
if (fd < 0) {
printf("Error opening file: %s\n", strerror(errno));
return 1;
}
if (ioctl(fd, I2C_SLAVE, I2C_ADDR) < 0) {
printf("ioctl error: %s\n", strerror(errno));
return 1;
}
buffer[0]=0xFF;
write(fd, buffer, 1);
read(fd, buffer, 1);
printf("0x%02X\n", buffer[0]);
return 0;
}
write:
#include <stdio.h>
#include <fcntl.h>
#include <linux/i2c-dev.h>
#include <errno.h>
#define I2C_ADDR 0x20
int main (void) {
int value;
int fd;
fd = open("/dev/i2c-0", O_RDWR);
if (fd < 0) {
printf("Error opening file: %s\n", strerror(errno));
return 1;
}
if (ioctl(fd, I2C_SLAVE, I2C_ADDR) < 0) {
printf("ioctl error: %s\n", strerror(errno));
return 1;
}
for (value=0; value<=255; value++) {
if (write(fd, &value, 1) != 1) {
printf("Error writing file: %s\n", strerror(errno));
}
usleep(100000);
}
return 0;
}
I would like to know the CR0-CR4 register values on x86. Can I write inline assembly to read it out? Are there any other methods? (e.g., does OS keep any file structures to record these values)
The Linux kernel has some function to read and write Control Registers, they are the read_crX and write_crX functions for the standard CR and xgetbv,xsetbv for the extended CR.
User mode applications need a LKM to indirectly use these functions.
In theory you just need to create a LKM with one or more devices and handle IO requests by reading or writing from CR. In practice you usually have more than one CPU, so you need to handle MP.
I used the kernel module for CPUID as a template and create this LKM.
CODE IS WITHOUT ANY WARRANTY, TESTED ON DEBIAN 8 ON 64 bit VM ONLY
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/fs.h> /* Needed for KERN_INFO */
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/smp.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/xcr.h>
#define MAKE_MINOR(cpu, reg) (cpu<<8 | reg)
#define GET_MINOR_REG(minor) (minor & 0xff)
#define GET_MINOR_CPU(minor) (minor >> 8)
#define XCR_MINOR_BASE 0x80
static int major_n = 0;
static struct class *ctrlreg_class;
struct ctrlreg_info
{
unsigned int reg;
unsigned long value;
unsigned int error;
};
static void ctrlreg_smp_do_read(void* p)
{
struct ctrlreg_info* info = p;
info->error = 0;
printk(KERN_INFO "ctrlreg: do read of reg%u\n", info->reg);
switch (info->reg)
{
case 0: info->value = read_cr0(); break;
case 2: info->value = read_cr2(); break;
case 3: info->value = read_cr3(); break;
case 4: info->value = read_cr4(); break;
#ifdef CONFIG_X86_64
case 8: info->value = read_cr8(); break;
#endif
case XCR_MINOR_BASE: info->value = xgetbv(0); break;
default:
info->error = -EINVAL;
}
}
static void ctrlreg_smp_do_write(void* p)
{
struct ctrlreg_info* info = p;
info->error = 0;
switch (info->reg)
{
case 0: write_cr0(info->value); break;
case 2: write_cr2(info->value); break;
case 3: write_cr3(info->value); break;
case 4: write_cr4(info->value); break;
#ifdef CONFIG_X86_64
case 8: read_cr8(); break;
#endif
case XCR_MINOR_BASE: xgetbv(0); break;
default:
info->error = -EINVAL;
}
}
static ssize_t ctrlreg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
unsigned int minor = iminor(file_inode(file));
unsigned int cpu = GET_MINOR_CPU(minor);
unsigned int reg = GET_MINOR_REG(minor);
struct ctrlreg_info info = {.reg = reg};
int err;
printk(KERN_INFO "ctrlreg: read for cpu%u reg%u\n", cpu, reg);
printk(KERN_INFO "ctrlreg: read of %zu bytes\n", count);
if (count < sizeof(unsigned long))
return -EINVAL;
printk(KERN_INFO "ctrlreg: scheduling read\n");
err = smp_call_function_single(cpu, ctrlreg_smp_do_read, &info, 1);
if (IS_ERR_VALUE(err))
return err;
printk(KERN_INFO "ctrlreg: read success: %x\n", info.error);
if (IS_ERR_VALUE(info.error))
return err;
err = copy_to_user(buf, &info.value, sizeof(unsigned long));
printk(KERN_INFO "ctrlreg: read copy result: %x ( %lu )\n", err, sizeof(unsigned long));
if (IS_ERR_VALUE(err))
return err;
printk(KERN_INFO "ctrlreg: read done\n");
return sizeof(unsigned long);
}
static ssize_t ctrlreg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
unsigned int minor = iminor(file_inode(file));
unsigned int cpu = GET_MINOR_CPU(minor);
unsigned int reg = GET_MINOR_REG(minor);
struct ctrlreg_info info = {.reg = reg};
int err;
printk(KERN_INFO "ctrlreg: write for cpu%u reg%u\n", cpu, reg);
printk(KERN_INFO "ctrlreg: write of %zu bytes\n", count);
if (count < sizeof(unsigned long))
return -EINVAL;
printk(KERN_INFO "ctrlreg: scheduling write\n");
err = copy_from_user((void*)buf, &info.value, sizeof(unsigned long));
printk(KERN_INFO "ctrlreg: write copy data: %x ( %lu )\n", err, sizeof(unsigned long));
if (IS_ERR_VALUE(err))
return err;
err = smp_call_function_single(cpu, ctrlreg_smp_do_write, &info, 1);
if (IS_ERR_VALUE(err))
return err;
printk(KERN_INFO "ctrlreg: write success: %x\n", info.error);
if (IS_ERR_VALUE(info.error))
return err;
printk(KERN_INFO "ctrlreg: write done\n");
return sizeof(unsigned long);
}
static void ctrlreg_can_open(void *p)
{
unsigned int* reg = p;
unsigned int reg_num = *reg;
unsigned int ebx, edx, eax, ecx;
unsigned int support_xgetbv, support_ia32e;
*reg = 0; //Success
printk(KERN_INFO "ctrlreg: can open reg %u\n", reg_num);
if (reg_num <= 4 && reg_num != 1)
return;
#ifdef CONFIG_X86_64
if (reg_num == 8)
return;
#endif
cpuid_count(0x0d, 1, &eax, &ebx, &ecx, &edx);
support_xgetbv = cpuid_ecx(1) & 0x04000000;
support_ia32e = cpuid_edx(0x80000001) & 0x20000000;
printk(KERN_INFO "ctrlreg: xgetbv = %d\n", support_xgetbv);
printk(KERN_INFO "ctrlreg: ia32e = %d\n", support_ia32e);
if (support_xgetbv && support_ia32e)
return;
printk(KERN_INFO "ctrlreg: open denied");
*reg = -EIO;
}
static int ctrlreg_open(struct inode *inode, struct file *file)
{
unsigned int cpu;
unsigned int reg;
unsigned int minor;
int err;
minor = iminor(file_inode(file));
cpu = GET_MINOR_CPU(minor);
reg = GET_MINOR_REG(minor);
printk(KERN_INFO "ctrlreg: open device for cpu%u reg%u\n", cpu, reg);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
err = smp_call_function_single(cpu, ctrlreg_can_open, ®, 1);
if (IS_ERR_VALUE(err))
return err;
return reg;
}
static const struct file_operations ctrlreg_fops =
{
.owner = THIS_MODULE,
.read = ctrlreg_read,
.write = ctrlreg_write,
.open = ctrlreg_open
};
static int ctrlreg_device_create(int cpu)
{
struct device *dev = NULL;
int i;
printk(KERN_INFO "ctrlreg: device create for cpu %d\n", cpu);
//CR0, 2-4, 8
for (i = 0; i <= 8; i++)
{
if ((i>4 && i<8) || i == 1)
continue; //Skip non existent regs
printk(KERN_INFO "ctrlreg: device cpu%dcr%d\n", cpu, i);
dev = device_create(ctrlreg_class, NULL, MKDEV(major_n, MAKE_MINOR(cpu, i)), NULL, "cpu%dcr%d", cpu, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
//XCR0
for (i = 0; i <= 0; i++)
{
printk(KERN_INFO "ctrlreg: device cpu%dxcr%d\n", cpu, i);
dev = device_create(ctrlreg_class, NULL, MKDEV(major_n, MAKE_MINOR(cpu, (XCR_MINOR_BASE+i))), NULL, "cpu%dxcr%d", cpu, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
return 0;
}
static void ctrlreg_device_destroy(int cpu)
{
int i;
//CR0, 2-4, 8
for (i = 0; i <= 8; i++)
{
if ((i>4 && i<8) || i == 1)
continue; //Skip non existent regs
device_destroy(ctrlreg_class, MKDEV(major_n, MAKE_MINOR(cpu, i)));
}
//XCR0
for (i = 0; i <= 0; i++)
device_destroy(ctrlreg_class, MKDEV(major_n, MAKE_MINOR(cpu, (XCR_MINOR_BASE+i))));
}
static int ctrlreg_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
int err = 0;
switch (action)
{
case CPU_UP_PREPARE:
err = ctrlreg_device_create(cpu);
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
ctrlreg_device_destroy(cpu);
break;
}
return notifier_from_errno(err);
}
static struct notifier_block __refdata ctrlreg_class_cpu_notifier =
{
.notifier_call = ctrlreg_class_cpu_callback,
};
static char* ctrlreg_devnode(struct device *dev, umode_t *mode)
{
unsigned int minor = MINOR(dev->devt), cpu = GET_MINOR_CPU(minor), reg = GET_MINOR_REG(minor);
if (reg < XCR_MINOR_BASE)
return kasprintf(GFP_KERNEL, "crs/cpu%u/cr%u", cpu, reg);
else
return kasprintf(GFP_KERNEL, "crs/cpu%u/xcr%u", cpu, reg-XCR_MINOR_BASE);
}
int __init ctrlreg_init(void)
{
int err = 0, i = 0;
printk(KERN_INFO "ctrlreg: init\n");
if ((major_n = __register_chrdev(0, 0, NR_CPUS, "crs", &ctrlreg_fops)) < 0)
return major_n;
printk(KERN_INFO "ctrlreg: major number is %u\n", major_n);
ctrlreg_class = class_create(THIS_MODULE, "ctrlreg\n");
if (IS_ERR(ctrlreg_class))
{
err = PTR_ERR(ctrlreg_class);
goto out_chrdev;
}
printk(KERN_INFO "ctrlreg: class created\n");
ctrlreg_class->devnode = ctrlreg_devnode;
cpu_notifier_register_begin();
for_each_online_cpu(i)
{
err = ctrlreg_device_create(i);
if (IS_ERR_VALUE(err))
goto out_class;
}
__register_hotcpu_notifier(&ctrlreg_class_cpu_notifier);
cpu_notifier_register_done();
printk(KERN_INFO "ctrlreg: init success\n");
err = 0;
goto out;
out_class:
i = 0;
for_each_online_cpu(i)
{
ctrlreg_device_destroy(i);
}
cpu_notifier_register_done();
class_destroy(ctrlreg_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "ctrlreg");
out:
return err;
}
static void __exit ctrlreg_exit(void)
{
int cpu = 0;
cpu_notifier_register_begin();
for_each_online_cpu(cpu)
ctrlreg_device_destroy(cpu);
class_destroy(ctrlreg_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "ctrlreg");
__unregister_hotcpu_notifier(&ctrlreg_class_cpu_notifier);
cpu_notifier_register_done();
}
module_init(ctrlreg_init);
module_exit(ctrlreg_exit);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Kee Nemesis 241");
MODULE_DESCRIPTION("Read and write Control Registers");
This module create the following dev nodes:
/dev/crs/cpu0/cr0
/dev/crs/cpu0/cr2
/dev/crs/cpu0/cr3
/dev/crs/cpu0/cr4
/dev/crs/cpu0/cr8
/dev/crs/cpu0/xcr0
/dev/crs/cpu1/cr0
/dev/crs/cpu1/cr2
/dev/crs/cpu1/cr3
/dev/crs/cpu1/cr4
/dev/crs/cpu1/cr8
/dev/crs/cpu1/xcr0
...
You can read/write these dev nodes. The minimum read/write length is 4 bytes on 32 bit system and 8 bytes on 64 bit ones (Linux do some buffering anyway).
To compile this LKM, save the code above as ctrlreg.c and create this Makefile
obj-m += ctrlreg.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
then use make to get ctrlreg.ko.
To load the module use sudo insmod ctrlreg.ko, to remove it sudo rmmod ctrlreg.
I have also written a small user mode utility to read CR:
CODE IS WITHOUT ANY WARRANTY, TESTED ON DEBIAN 8 ON 64 bit VM ONLY
#include <stdio.h>
#include <stdlib.h>
#define MAX_PATH 256
int main(int argc, char* argv[])
{
unsigned long cpu, reg;
FILE* fin;
char device[MAX_PATH];
unsigned long data;
if (argc < 3 || argc > 4)
return fprintf(stderr, "Usage:\n\t\t cr cpu reg [value]\n"), 1;
if (sscanf(argv[1], "cpu%u", &cpu) != 1)
return fprintf(stderr, "Invalid value '%s' for cpu\n", argv[1]), 2;
if (sscanf(argv[2], "cr%u", ®) != 1 && sscanf(argv[2], "xcr%u", ®) != 1)
return fprintf(stderr, "Invalid value '%s' for reg\n", argv[2]), 3;
if (argc == 4 && sscanf(argv[3], "%lu", &data) != 1)
return fprintf(stderr, "Invalid numeric value '%s'\n", argv[3]), 6;
snprintf(device, MAX_PATH, "/dev/crs/cpu%u/%s", cpu, argv[2]);
fin = fopen(device, argc == 4 ? "wb" : "rb");
if (!fin)
return fprintf(stderr, "Cannot open device %s\n", device), 4;
if (argc == 4)
{
if (fwrite(&data, sizeof(data), 1, fin) != 1)
return fprintf(stderr, "Cannot write device %s (%d)\n", device, ferror(fin)), 5;
}
else
{
if (fread(&data, sizeof(data), 1, fin) != 1)
return fprintf(stderr, "Cannot read device %s (%d)\n", device, ferror(fin)), 7;
printf("%016x\n", data);
}
fclose(fin);
return 0;
}
Save the code as cr.c and compile it.
To read cr0 of the second CPU you can use:
cr cpu1 cr0
To write the value 0 (be careful) into it
cr cpu1 cr0 0
Consider the following fragment of OpenMP code which transfers private data between two threads using an intermediate shared variable
#pragma omp parallel shared(x) private(a,b)
{
...
a = somefunction(b);
if (omp_get_thread_num() == 0) {
x = a;
}
}
#pragma omp parallel shared(x) private(a,b)
{
if (omp_get_thread_num() == 1) {
a = x;
}
b = anotherfunction(a);
...
}
I would (in pseudocode ) need to transfer of private data from one process to another using a single-sided message-passing library.
Any ideas?
This is possible, but there's a lot more "scaffolding" involved -- after all, you are communicating data between potentially completely different computers.
The coordination for this sort of thing is done between windows of data which are accessible from other processors, and with lock/unlock operations which coordinate the access of this data. The locks aren't really locks in the sense of being mutexes, but they are more like synchronization points coordinating data access to the window.
I don't have time right now to explain this in the detail I'd like, but below is an example of using MPI2 to do something like shared memory flagging in a system that doesn't have shared memory:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"
int main(int argc, char** argv)
{
int rank, size, *a, geta;
int x;
int ierr;
MPI_Win win;
const int RCVR=0;
const int SENDER=1;
ierr = MPI_Init(&argc, &argv);
ierr |= MPI_Comm_rank(MPI_COMM_WORLD, &rank);
ierr |= MPI_Comm_size(MPI_COMM_WORLD, &size);
if (ierr) {
fprintf(stderr,"Error initializing MPI library; failing.\n");
exit(-1);
}
if (rank == RCVR) {
MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &a);
*a = 0;
} else {
a = NULL;
}
MPI_Win_create(a, 1, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
if (rank == SENDER) {
/* Lock recievers window */
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, RCVR, 0, win);
x = 5;
/* put 1 int (from &x) to 1 int rank RCVR, at address 0 in window "win"*/
MPI_Put(&x, 1, MPI_INT, RCVR, 0, 1, MPI_INT, win);
/* Unlock */
MPI_Win_unlock(0, win);
printf("%d: My job here is done.\n", rank);
}
if (rank == RCVR) {
for (;;) {
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, RCVR, 0, win);
MPI_Get(&geta, 1, MPI_INT, RCVR, 0, 1, MPI_INT, win);
MPI_Win_unlock(0, win);
if (geta == 0) {
printf("%d: a still zero; sleeping.\n",rank);
sleep(2);
} else
break;
}
printf("%d: a now %d!\n",rank,geta);
printf("a = %d\n", *a);
MPI_Win_free(&win);
if (rank == RCVR) MPI_Free_mem(a);
MPI_Finalize();
return 0;
}
I am trying to benchmark file system I/O on Mac OS X using mmap.
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <stdio.h>
#include <math.h>
char c;
int main(int argc, char ** argv)
{
if (argc != 2)
{
printf("no files\n");
exit(1);
}
int fd = open(argv[1], O_RDONLY);
fcntl(fd, F_NOCACHE, 1);
int offset=0;
int size=0x100000;
int pagesize = getpagesize();
struct stat stats;
fstat(fd, &stats);
int filesize = stats.st_size;
printf("%d byte pages\n", pagesize);
printf("file %s # %d bytes\n", argv[1], filesize);
while(offset < filesize)
{
if(offset + size > filesize)
{
int pages = ceil((filesize-offset)/(double)pagesize);
size = pages*pagesize;
}
printf("mapping offset %x with size %x\n", offset, size);
void * mem = mmap(0, size, PROT_READ, 0, fd, offset);
if(mem == -1)
return 0;
offset+=size;
int i=0;
for(; i<size; i+=pagesize)
{
c = *((char *)mem+i);
}
munmap(mem, size);
}
return 0;
}
The idea is that I'll map a file or portion of it and then cause a page fault by dereferencing it. I am slowly losing my sanity since this doesn't at all work and I've done similar things on Linux before.
Change this line
void * mem = mmap(0, size, PROT_READ, 0, fd, offset);
to
void * mem = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
And, don't compare mem with -1. Use this instead:
if(mem == MAP_FAILED) { ... }
It's both more readable and more portable.
General advice: if you're on a different UNIX platform from what you're used to, it's a good idea to open the man page. For mmap on OS X, it can be found here. It says
Conforming applications must specify either MAP_PRIVATE or MAP_SHARED.
So, specifying 0 on the fourth
argument is not OK in OS X. I believe
this is true for BSD in general.