How to read i2c with out i2cget command? - embedded-linux

I am currently using Kontron smarc-samx6i board and I am running this board with Yocto kernel. Here the kernel is not supporting the command i2cget to check i2c data to read from sensor. So, is there any other possible ways to read i2c data from the sensors?

You can add "i2c-tools" to your image:
IMAGE_INSTALL_append = " i2c-tools"
They include i2cget.c.
The recipe can be found in: /meta/recipes-devtools/i2c-tools/

You might also want to look into writing your own application using the i2c dev interface. See the kernel documentation on i2c.

read:
#include <stdio.h>
#include <fcntl.h>
#include <linux/i2c-dev.h>
#include <errno.h>
#define I2C_ADDR 0x20
int main (void) {
char buffer[1];
int fd;
fd = open("/dev/i2c-0", O_RDWR);
if (fd < 0) {
printf("Error opening file: %s\n", strerror(errno));
return 1;
}
if (ioctl(fd, I2C_SLAVE, I2C_ADDR) < 0) {
printf("ioctl error: %s\n", strerror(errno));
return 1;
}
buffer[0]=0xFF;
write(fd, buffer, 1);
read(fd, buffer, 1);
printf("0x%02X\n", buffer[0]);
return 0;
}
write:
#include <stdio.h>
#include <fcntl.h>
#include <linux/i2c-dev.h>
#include <errno.h>
#define I2C_ADDR 0x20
int main (void) {
int value;
int fd;
fd = open("/dev/i2c-0", O_RDWR);
if (fd < 0) {
printf("Error opening file: %s\n", strerror(errno));
return 1;
}
if (ioctl(fd, I2C_SLAVE, I2C_ADDR) < 0) {
printf("ioctl error: %s\n", strerror(errno));
return 1;
}
for (value=0; value<=255; value++) {
if (write(fd, &value, 1) != 1) {
printf("Error writing file: %s\n", strerror(errno));
}
usleep(100000);
}
return 0;
}

Related

Userfaultfd write protection appears unsupported when checking through the UFFDIO_API ioctl

I am trying to use the write protection feature of Linux's userfaultfd, but it does not appear to be enabled in my kernel even though I am using version 5.13 (write protection should be fully supported in 5.10+).
When I run
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static int has_bit(uint64_t val, uint64_t bit) {
return (val & bit) == bit;
}
int main() {
long uffd; /* userfaultfd file descriptor */
struct uffdio_api uffdio_api;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
printf("UFFDIO_API: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_API));
printf("UFFDIO_REGISTER: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_REGISTER));
printf("UFFDIO_UNREGISTER: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_UNREGISTER));
printf("UFFDIO_WRITEPROTECT: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_WRITEPROTECT));
printf("UFFD_FEATURE_PAGEFAULT_FLAG_WP: %d\n", has_bit(uffdio_api.features, UFFD_FEATURE_PAGEFAULT_FLAG_WP));
}
The output is
UFFDIO_API: 1
UFFDIO_REGISTER: 1
UFFDIO_UNREGISTER: 1
UFFDIO_WRITEPROTECT: 0
UFFD_FEATURE_PAGEFAULT_FLAG_WP: 1
The UFFD_FEATURE_PAGEFAULT_FLAG_WP feature is enabled, but the UFFDIO_WRITEPROTECT ioctl is marked as not supported, which is necessary to enable write protection.
What might lead to this feature being disabled, and how can I enable it?
I am using Ubuntu MATE 21.10 with Linux kernel version 5.13.0-30-generic.
EDIT:
It seems like despite the man page section on the UFFD_API ioctl (https://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html), this might be the intended behavior for a system where write protection is enabled. However, when I run a full program that spawns a poller thread and writes to the protected memory, the poller thread does not receive any notification.
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static int page_size;
static void* fault_handler_thread(void* arg) {
long uffd; /* userfaultfd file descriptor */
uffd = (long) arg;
/* Loop, handling incoming events on the userfaultfd
file descriptor. */
for (;;) {
/* See what poll() tells us about the userfaultfd. */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if (nready == -1)
errExit("poll");
printf("\nfault_handler_thread():\n");
printf(
" poll() returns: nready = %d; "
"POLLIN = %d; POLLERR = %d\n",
nready, (pollfd.revents & POLLIN) != 0,
(pollfd.revents & POLLERR) != 0);
// received fault, exit the program
exit(EXIT_FAILURE);
}
}
int main() {
long uffd; /* userfaultfd file descriptor */
char* addr; /* Start of region handled by userfaultfd */
uint64_t len; /* Length of region handled by userfaultfd */
pthread_t thr; /* ID of thread that handles page faults */
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
struct uffdio_writeprotect uffdio_wp;
int s;
page_size = sysconf(_SC_PAGE_SIZE);
len = page_size;
/* Create and enable userfaultfd object. */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
errExit("mmap");
printf("Address returned by mmap() = %p\n", addr);
/* Register the memory range of the mapping we just created for
handling by the userfaultfd object. */
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");
uffdio_wp.range.start = (unsigned long) addr;
uffdio_wp.range.len = len;
uffdio_wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffdio_wp) == -1)
errExit("ioctl-UFFDIO_WRITEPROTECT");
/* Create a thread that will process the userfaultfd events. */
s = pthread_create(&thr, NULL, fault_handler_thread, (void*) uffd);
if (s != 0) {
errno = s;
errExit("pthread_create");
}
/* Main thread now touches memory in the mapping, touching
locations 1024 bytes apart. This will trigger userfaultfd
events for all pages in the region. */
usleep(100000);
size_t l;
l = 0xf; /* Ensure that faulting address is not on a page
boundary, in order to test that we correctly
handle that case in fault_handling_thread(). */
char i = 0;
while (l < len) {
printf("Write address %p in main(): ", addr + l);
addr[l] = i++;
printf("%d\n", addr[l]);
l += 1024;
usleep(100000); /* Slow things down a little */
}
exit(EXIT_SUCCESS);
}
The UFFD_API ioctl does not seem to ever report _UFFD_WRITEPROTECT as can be seen here in the kernel source code (1, 2). I assume that this is because whether this operation is supported or not depends on the kind of underlying mapping.
The feature is in fact reporeted on a per-registered-range basis. You will have to set the API with ioctl(uffd, UFFDIO_API, ...) first, then register a range with ioctl(uffd, UFFDIO_REGISTER, ...) and then check the uffdio_register.ioctls field.
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
int main(void) {
long uffd;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
struct uffdio_api uffdio_api = { .api = UFFD_API };
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl(UFFDIO_API)");
const size_t region_sz = 0x4000;
void *region = mmap(NULL, region_sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
if (region == MAP_FAILED)
errExit("mmap");
if (posix_memalign((void **)region, sysconf(_SC_PAGESIZE), region_sz))
errExit("posix_memalign");
printf("Region mapped at %p - %p\n", region, region + region_sz);
struct uffdio_register uffdio_register = {
.range = { .start = (unsigned long)region, .len = region_sz },
.mode = UFFDIO_REGISTER_MODE_WP
};
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl(UFFDIO_REGISTER)");
printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");
if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != UFFD_API_RANGE_IOCTLS)
errExit("bad ioctl set");
struct uffdio_writeprotect wp = {
.range = { .start = (unsigned long)region, .len = region_sz },
.mode = UFFDIO_WRITEPROTECT_MODE_WP
};
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp) == -1)
errExit("ioctl(UFFDIO_WRITEPROTECT)");
puts("ioctl(UFFDIO_WRITEPROTECT) successful.");
return EXIT_SUCCESS;
}
Output:
Region mapped at 0x7f45c48fe000 - 0x7f45c4902000
uffdio_register.ioctls = 0x5c
Have _UFFDIO_WRITEPROTECT? YES
ioctl(UFFDIO_WRITEPROTECT) successful.
I found the solution. The write-protected pages must be touched after registering but before marking them as write-protected. This is an undocumented requirement, from what I can tell.
In other words, add
for (size_t i = 0; i < len; i += page_size) {
addr[i] = 0;
}
between registering and write-protecting.
It works if I change the full example to
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static int page_size;
static void* fault_handler_thread(void* arg) {
long uffd; /* userfaultfd file descriptor */
uffd = (long) arg;
/* Loop, handling incoming events on the userfaultfd
file descriptor. */
for (;;) {
/* See what poll() tells us about the userfaultfd. */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if (nready == -1)
errExit("poll");
printf("\nfault_handler_thread():\n");
printf(
" poll() returns: nready = %d; "
"POLLIN = %d; POLLERR = %d\n",
nready, (pollfd.revents & POLLIN) != 0,
(pollfd.revents & POLLERR) != 0);
// received fault, exit the program
exit(EXIT_FAILURE);
}
}
int main() {
long uffd; /* userfaultfd file descriptor */
char* addr; /* Start of region handled by userfaultfd */
uint64_t len; /* Length of region handled by userfaultfd */
pthread_t thr; /* ID of thread that handles page faults */
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
struct uffdio_writeprotect uffdio_wp;
int s;
page_size = sysconf(_SC_PAGE_SIZE);
len = page_size;
/* Create and enable userfaultfd object. */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
errExit("mmap");
printf("Address returned by mmap() = %p\n", addr);
/* Register the memory range of the mapping we just created for
handling by the userfaultfd object. */
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");
for (size_t i = 0; i < len; i += page_size) {
addr[i] = 0;
}
uffdio_wp.range.start = (unsigned long) addr;
uffdio_wp.range.len = len;
uffdio_wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffdio_wp) == -1)
errExit("ioctl-UFFDIO_WRITEPROTECT");
/* Create a thread that will process the userfaultfd events. */
s = pthread_create(&thr, NULL, fault_handler_thread, (void*) uffd);
if (s != 0) {
errno = s;
errExit("pthread_create");
}
/* Main thread now touches memory in the mapping, touching
locations 1024 bytes apart. This will trigger userfaultfd
events for all pages in the region. */
usleep(100000);
size_t l;
l = 0xf; /* Ensure that faulting address is not on a page
boundary, in order to test that we correctly
handle that case in fault_handling_thread(). */
char i = 0;
while (l < len) {
printf("Write address %p in main(): ", addr + l);
addr[l] = i++;
printf("%d\n", addr[l]);
l += 1024;
usleep(100000); /* Slow things down a little */
}
exit(EXIT_SUCCESS);
}

Using perf_event_open to monitor Docker containers

I wrote a program in C to retrieve performance events such as cpu-cycles of Docker containers.
I mean, a user space program at host level (host level monitoring, not inside docker). I give pid of the docker container as pid entry of perf_event_open(), However, I have always 0 as returned value. I have tested the program for other non-docker pids for example firefox and it works very well.
I set PERF_FLAG_PID_CGROUP as flag, nothing changes!
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
int
main(int argc, char **argv)
{
struct perf_event_attr pe;
long long count;
int fd;
fd = open("/sys/fs/cgroup/perf_event/docker/f42c13cd9dd700544fe670e30d0b3216bdceaf01ddc370405618fdecfd10b26d", O_RDONLY);
if (fd == -1)
return 0;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_CPU_CYCLES;
pe.disabled = 1;
pe.exclude_kernel = 0;
pe.exclude_hv = 0;
fd = perf_event_open(&pe, fd, -1, -1, PERF_FLAG_PID_CGROUP);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
usleep(100);
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("Used %lld instructions\n", count);
close(fd);
}
According to the Man page of perf_event_open(), I also give the fd opened on the directory of docker container in groupfs. Doesn't work !
Would you please help me to solve the problem?
Thanks
Update:
I have checked with other events for example PERF_COUNT_HW_CACHE_REFERENCES,
I see 0 as returned value!
OS: Ubuntu 16.04
Kernel: 4.15.0-041500-generic
Architecture: X86_64
You have not specified which linux kernel version you are using. I will base my answer as per the latest linux kernel version.
The parameters that you passed to perf_event_open syscall look correct, except for one.
In your case, you pass cpu = -1 as a parameter to perf_event_open.
While this usually works in normal perf event filtering (i.e. per-cpu or per-thread), passing cpu = -1 will not work in cgroup based filtering for perf. In cgroup mode, the pid argument is used to pass the fd opened to the cgroup directory in cgroupfs (which you seemed to have passed correctly). The cpu argument designates the cpu on which to monitor threads from that cgroup. And when cpu=-1 it means the perf event measures the specified process/thread on any CPU (without regarding whether that CPU belongs to the cgroup you are measuring on).
This is how it is depicted in the latest Linux code.
if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
return -EINVAL;
You can clearly see that if either PID=-1 or CPU=-1, the syscall method will return error.
Working Example
From the perf_event_open documentation, it is quite clear that --
cgroup monitoring is available only for system-wide events and may therefore require extra permissions.
Since we are doing cgroup monitoring in this case, which is already understood as we are monitoring the perf-events of a container, we will have to monitor the event system-wide. This means the monitoring happens for ALL the available CPUs in the system.
Working code
I have 4 cores in my system and hence I use CPUs - 0,1,2,3
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <errno.h>
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
int
main(int argc, char **argv)
{
struct perf_event_attr pe;
long long count, count1, count2, count3;
int fd, fd1, fd2, fd3, fd4;
fd1 = open("/sys/fs/cgroup/perf_event/docker/001706b1a71617b0ce9d340f706d901e00ee398091dd62aded2a1863fc8c274a", O_RDONLY);
if (fd1 == -1)
return 0;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 0;
pe.exclude_hv = 0;
fd = perf_event_open(&pe, fd1, 0, -1, PERF_FLAG_PID_CGROUP|PERF_FLAG_FD_CLOEXEC);
if (fd == -1) {
fprintf(stderr, "Error opening leader: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
fd2 = perf_event_open(&pe, fd1, 1, -1, PERF_FLAG_PID_CGROUP|PERF_FLAG_FD_CLOEXEC);
if (fd2 == -1) {
fprintf(stderr, "Error: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
fd3 = perf_event_open(&pe, fd1, 2, -1, PERF_FLAG_PID_CGROUP|PERF_FLAG_FD_CLOEXEC);
if (fd3 == -1) {
fprintf(stderr, "Error: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
fd4 = perf_event_open(&pe, fd1, 3, -1, PERF_FLAG_PID_CGROUP|PERF_FLAG_FD_CLOEXEC);
if (fd4 == -1) {
fprintf(stderr, "Error: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
ioctl(fd2, PERF_EVENT_IOC_RESET, 0);
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
ioctl(fd3, PERF_EVENT_IOC_RESET, 0);
ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
ioctl(fd4, PERF_EVENT_IOC_RESET, 0);
ioctl(fd4, PERF_EVENT_IOC_ENABLE, 0);
sleep(10); // using sleep(10) to actually observe instructions
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
ioctl(fd4, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
read(fd2, &count1, sizeof(long long));
read(fd3, &count2, sizeof(long long));
read(fd4, &count3, sizeof(long long));
printf("Used %lld instructions\n", count+count1+count2+count3);
close(fd);
close(fd2);
close(fd3);
close(fd4);
}
`
Output : Used 55174 instructions

How to know the values of CR registers from linux user and kernel modes

I would like to know the CR0-CR4 register values on x86. Can I write inline assembly to read it out? Are there any other methods? (e.g., does OS keep any file structures to record these values)
The Linux kernel has some function to read and write Control Registers, they are the read_crX and write_crX functions for the standard CR and xgetbv,xsetbv for the extended CR.
User mode applications need a LKM to indirectly use these functions.
In theory you just need to create a LKM with one or more devices and handle IO requests by reading or writing from CR. In practice you usually have more than one CPU, so you need to handle MP.
I used the kernel module for CPUID as a template and create this LKM.
CODE IS WITHOUT ANY WARRANTY, TESTED ON DEBIAN 8 ON 64 bit VM ONLY
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/fs.h> /* Needed for KERN_INFO */
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/smp.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/xcr.h>
#define MAKE_MINOR(cpu, reg) (cpu<<8 | reg)
#define GET_MINOR_REG(minor) (minor & 0xff)
#define GET_MINOR_CPU(minor) (minor >> 8)
#define XCR_MINOR_BASE 0x80
static int major_n = 0;
static struct class *ctrlreg_class;
struct ctrlreg_info
{
unsigned int reg;
unsigned long value;
unsigned int error;
};
static void ctrlreg_smp_do_read(void* p)
{
struct ctrlreg_info* info = p;
info->error = 0;
printk(KERN_INFO "ctrlreg: do read of reg%u\n", info->reg);
switch (info->reg)
{
case 0: info->value = read_cr0(); break;
case 2: info->value = read_cr2(); break;
case 3: info->value = read_cr3(); break;
case 4: info->value = read_cr4(); break;
#ifdef CONFIG_X86_64
case 8: info->value = read_cr8(); break;
#endif
case XCR_MINOR_BASE: info->value = xgetbv(0); break;
default:
info->error = -EINVAL;
}
}
static void ctrlreg_smp_do_write(void* p)
{
struct ctrlreg_info* info = p;
info->error = 0;
switch (info->reg)
{
case 0: write_cr0(info->value); break;
case 2: write_cr2(info->value); break;
case 3: write_cr3(info->value); break;
case 4: write_cr4(info->value); break;
#ifdef CONFIG_X86_64
case 8: read_cr8(); break;
#endif
case XCR_MINOR_BASE: xgetbv(0); break;
default:
info->error = -EINVAL;
}
}
static ssize_t ctrlreg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
unsigned int minor = iminor(file_inode(file));
unsigned int cpu = GET_MINOR_CPU(minor);
unsigned int reg = GET_MINOR_REG(minor);
struct ctrlreg_info info = {.reg = reg};
int err;
printk(KERN_INFO "ctrlreg: read for cpu%u reg%u\n", cpu, reg);
printk(KERN_INFO "ctrlreg: read of %zu bytes\n", count);
if (count < sizeof(unsigned long))
return -EINVAL;
printk(KERN_INFO "ctrlreg: scheduling read\n");
err = smp_call_function_single(cpu, ctrlreg_smp_do_read, &info, 1);
if (IS_ERR_VALUE(err))
return err;
printk(KERN_INFO "ctrlreg: read success: %x\n", info.error);
if (IS_ERR_VALUE(info.error))
return err;
err = copy_to_user(buf, &info.value, sizeof(unsigned long));
printk(KERN_INFO "ctrlreg: read copy result: %x ( %lu )\n", err, sizeof(unsigned long));
if (IS_ERR_VALUE(err))
return err;
printk(KERN_INFO "ctrlreg: read done\n");
return sizeof(unsigned long);
}
static ssize_t ctrlreg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
unsigned int minor = iminor(file_inode(file));
unsigned int cpu = GET_MINOR_CPU(minor);
unsigned int reg = GET_MINOR_REG(minor);
struct ctrlreg_info info = {.reg = reg};
int err;
printk(KERN_INFO "ctrlreg: write for cpu%u reg%u\n", cpu, reg);
printk(KERN_INFO "ctrlreg: write of %zu bytes\n", count);
if (count < sizeof(unsigned long))
return -EINVAL;
printk(KERN_INFO "ctrlreg: scheduling write\n");
err = copy_from_user((void*)buf, &info.value, sizeof(unsigned long));
printk(KERN_INFO "ctrlreg: write copy data: %x ( %lu )\n", err, sizeof(unsigned long));
if (IS_ERR_VALUE(err))
return err;
err = smp_call_function_single(cpu, ctrlreg_smp_do_write, &info, 1);
if (IS_ERR_VALUE(err))
return err;
printk(KERN_INFO "ctrlreg: write success: %x\n", info.error);
if (IS_ERR_VALUE(info.error))
return err;
printk(KERN_INFO "ctrlreg: write done\n");
return sizeof(unsigned long);
}
static void ctrlreg_can_open(void *p)
{
unsigned int* reg = p;
unsigned int reg_num = *reg;
unsigned int ebx, edx, eax, ecx;
unsigned int support_xgetbv, support_ia32e;
*reg = 0; //Success
printk(KERN_INFO "ctrlreg: can open reg %u\n", reg_num);
if (reg_num <= 4 && reg_num != 1)
return;
#ifdef CONFIG_X86_64
if (reg_num == 8)
return;
#endif
cpuid_count(0x0d, 1, &eax, &ebx, &ecx, &edx);
support_xgetbv = cpuid_ecx(1) & 0x04000000;
support_ia32e = cpuid_edx(0x80000001) & 0x20000000;
printk(KERN_INFO "ctrlreg: xgetbv = %d\n", support_xgetbv);
printk(KERN_INFO "ctrlreg: ia32e = %d\n", support_ia32e);
if (support_xgetbv && support_ia32e)
return;
printk(KERN_INFO "ctrlreg: open denied");
*reg = -EIO;
}
static int ctrlreg_open(struct inode *inode, struct file *file)
{
unsigned int cpu;
unsigned int reg;
unsigned int minor;
int err;
minor = iminor(file_inode(file));
cpu = GET_MINOR_CPU(minor);
reg = GET_MINOR_REG(minor);
printk(KERN_INFO "ctrlreg: open device for cpu%u reg%u\n", cpu, reg);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
err = smp_call_function_single(cpu, ctrlreg_can_open, &reg, 1);
if (IS_ERR_VALUE(err))
return err;
return reg;
}
static const struct file_operations ctrlreg_fops =
{
.owner = THIS_MODULE,
.read = ctrlreg_read,
.write = ctrlreg_write,
.open = ctrlreg_open
};
static int ctrlreg_device_create(int cpu)
{
struct device *dev = NULL;
int i;
printk(KERN_INFO "ctrlreg: device create for cpu %d\n", cpu);
//CR0, 2-4, 8
for (i = 0; i <= 8; i++)
{
if ((i>4 && i<8) || i == 1)
continue; //Skip non existent regs
printk(KERN_INFO "ctrlreg: device cpu%dcr%d\n", cpu, i);
dev = device_create(ctrlreg_class, NULL, MKDEV(major_n, MAKE_MINOR(cpu, i)), NULL, "cpu%dcr%d", cpu, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
//XCR0
for (i = 0; i <= 0; i++)
{
printk(KERN_INFO "ctrlreg: device cpu%dxcr%d\n", cpu, i);
dev = device_create(ctrlreg_class, NULL, MKDEV(major_n, MAKE_MINOR(cpu, (XCR_MINOR_BASE+i))), NULL, "cpu%dxcr%d", cpu, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
return 0;
}
static void ctrlreg_device_destroy(int cpu)
{
int i;
//CR0, 2-4, 8
for (i = 0; i <= 8; i++)
{
if ((i>4 && i<8) || i == 1)
continue; //Skip non existent regs
device_destroy(ctrlreg_class, MKDEV(major_n, MAKE_MINOR(cpu, i)));
}
//XCR0
for (i = 0; i <= 0; i++)
device_destroy(ctrlreg_class, MKDEV(major_n, MAKE_MINOR(cpu, (XCR_MINOR_BASE+i))));
}
static int ctrlreg_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
int err = 0;
switch (action)
{
case CPU_UP_PREPARE:
err = ctrlreg_device_create(cpu);
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
ctrlreg_device_destroy(cpu);
break;
}
return notifier_from_errno(err);
}
static struct notifier_block __refdata ctrlreg_class_cpu_notifier =
{
.notifier_call = ctrlreg_class_cpu_callback,
};
static char* ctrlreg_devnode(struct device *dev, umode_t *mode)
{
unsigned int minor = MINOR(dev->devt), cpu = GET_MINOR_CPU(minor), reg = GET_MINOR_REG(minor);
if (reg < XCR_MINOR_BASE)
return kasprintf(GFP_KERNEL, "crs/cpu%u/cr%u", cpu, reg);
else
return kasprintf(GFP_KERNEL, "crs/cpu%u/xcr%u", cpu, reg-XCR_MINOR_BASE);
}
int __init ctrlreg_init(void)
{
int err = 0, i = 0;
printk(KERN_INFO "ctrlreg: init\n");
if ((major_n = __register_chrdev(0, 0, NR_CPUS, "crs", &ctrlreg_fops)) < 0)
return major_n;
printk(KERN_INFO "ctrlreg: major number is %u\n", major_n);
ctrlreg_class = class_create(THIS_MODULE, "ctrlreg\n");
if (IS_ERR(ctrlreg_class))
{
err = PTR_ERR(ctrlreg_class);
goto out_chrdev;
}
printk(KERN_INFO "ctrlreg: class created\n");
ctrlreg_class->devnode = ctrlreg_devnode;
cpu_notifier_register_begin();
for_each_online_cpu(i)
{
err = ctrlreg_device_create(i);
if (IS_ERR_VALUE(err))
goto out_class;
}
__register_hotcpu_notifier(&ctrlreg_class_cpu_notifier);
cpu_notifier_register_done();
printk(KERN_INFO "ctrlreg: init success\n");
err = 0;
goto out;
out_class:
i = 0;
for_each_online_cpu(i)
{
ctrlreg_device_destroy(i);
}
cpu_notifier_register_done();
class_destroy(ctrlreg_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "ctrlreg");
out:
return err;
}
static void __exit ctrlreg_exit(void)
{
int cpu = 0;
cpu_notifier_register_begin();
for_each_online_cpu(cpu)
ctrlreg_device_destroy(cpu);
class_destroy(ctrlreg_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "ctrlreg");
__unregister_hotcpu_notifier(&ctrlreg_class_cpu_notifier);
cpu_notifier_register_done();
}
module_init(ctrlreg_init);
module_exit(ctrlreg_exit);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Kee Nemesis 241");
MODULE_DESCRIPTION("Read and write Control Registers");
This module create the following dev nodes:
/dev/crs/cpu0/cr0
/dev/crs/cpu0/cr2
/dev/crs/cpu0/cr3
/dev/crs/cpu0/cr4
/dev/crs/cpu0/cr8
/dev/crs/cpu0/xcr0
/dev/crs/cpu1/cr0
/dev/crs/cpu1/cr2
/dev/crs/cpu1/cr3
/dev/crs/cpu1/cr4
/dev/crs/cpu1/cr8
/dev/crs/cpu1/xcr0
...
You can read/write these dev nodes. The minimum read/write length is 4 bytes on 32 bit system and 8 bytes on 64 bit ones (Linux do some buffering anyway).
To compile this LKM, save the code above as ctrlreg.c and create this Makefile
obj-m += ctrlreg.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
then use make to get ctrlreg.ko.
To load the module use sudo insmod ctrlreg.ko, to remove it sudo rmmod ctrlreg.
I have also written a small user mode utility to read CR:
CODE IS WITHOUT ANY WARRANTY, TESTED ON DEBIAN 8 ON 64 bit VM ONLY
#include <stdio.h>
#include <stdlib.h>
#define MAX_PATH 256
int main(int argc, char* argv[])
{
unsigned long cpu, reg;
FILE* fin;
char device[MAX_PATH];
unsigned long data;
if (argc < 3 || argc > 4)
return fprintf(stderr, "Usage:\n\t\t cr cpu reg [value]\n"), 1;
if (sscanf(argv[1], "cpu%u", &cpu) != 1)
return fprintf(stderr, "Invalid value '%s' for cpu\n", argv[1]), 2;
if (sscanf(argv[2], "cr%u", &reg) != 1 && sscanf(argv[2], "xcr%u", &reg) != 1)
return fprintf(stderr, "Invalid value '%s' for reg\n", argv[2]), 3;
if (argc == 4 && sscanf(argv[3], "%lu", &data) != 1)
return fprintf(stderr, "Invalid numeric value '%s'\n", argv[3]), 6;
snprintf(device, MAX_PATH, "/dev/crs/cpu%u/%s", cpu, argv[2]);
fin = fopen(device, argc == 4 ? "wb" : "rb");
if (!fin)
return fprintf(stderr, "Cannot open device %s\n", device), 4;
if (argc == 4)
{
if (fwrite(&data, sizeof(data), 1, fin) != 1)
return fprintf(stderr, "Cannot write device %s (%d)\n", device, ferror(fin)), 5;
}
else
{
if (fread(&data, sizeof(data), 1, fin) != 1)
return fprintf(stderr, "Cannot read device %s (%d)\n", device, ferror(fin)), 7;
printf("%016x\n", data);
}
fclose(fin);
return 0;
}
Save the code as cr.c and compile it.
To read cr0 of the second CPU you can use:
cr cpu1 cr0
To write the value 0 (be careful) into it
cr cpu1 cr0 0

Missing linux/if.h

hello I am getting this error when using cygwin to try to make,
this is my makefile
flags=-g
all: icmptx
icmptx: it.o icmptx.o tun_dev.o
gcc $(flags) -o icmptx icmptx.o it.o tun_dev.o
it.o: it.c
gcc $(flags) -c it.c
icmptx.o: icmptx.c
gcc $(flags) -c icmptx.c
tun_dev.o: tun_dev.c
gcc $(flags) -c tun_dev.c
clean:
rm -f tun_dev.o it.o icmptx.o icmptx
the error is
$ make
gcc -g -c tun_dev.c
tun_dev.c:35:22: fatal error: linux/if.h: No such file or directory
compilation terminated.
Makefile:15: recipe for target `tun_dev.o' failed
make: *** [tun_dev.o] Error 1
here is my tun_dev.c file
/*
*/
/*
* tun_dev.c,v 1.1.2.4 2001/09/13 05:02:22 maxk Exp
*/
/* #include "config.h" */
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <syslog.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/if.h>
#include "vtun.h"
#include "lib.h"
/*
* Allocate TUN device, returns opened fd.
* Stores dev name in the first arg(must be large enough).
*/
int tun_open_old(char *dev)
{
char tunname[14];
int i, fd;
if( *dev ) {
sprintf(tunname, "/dev/%s", dev);
return open(tunname, O_RDWR);
}
for(i=0; i < 255; i++){
sprintf(tunname, "/dev/tun%d", i);
/* Open device */
if( (fd=open(tunname, O_RDWR)) > 0 ){
sprintf(dev, "tun%d", i);
return fd;
}
}
return -1;
}
#include <linux/if_tun.h>
/* pre 2.4.6 compatibility */
#define OTUNSETNOCSUM (('T'<< 8) | 200)
#define OTUNSETDEBUG (('T'<< 8) | 201)
#define OTUNSETIFF (('T'<< 8) | 202)
#define OTUNSETPERSIST (('T'<< 8) | 203)
#define OTUNSETOWNER (('T'<< 8) | 204)
int tun_open(char *dev)
{
struct ifreq ifr;
int fd;
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
return tun_open_old(dev);
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
if (*dev)
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
if (ioctl(fd, TUNSETIFF, (void *) &ifr) < 0) {
if (errno == EBADFD) {
/* Try old ioctl */
if (ioctl(fd, OTUNSETIFF, (void *) &ifr) < 0)
goto failed;
} else
goto failed;
}
strcpy(dev, ifr.ifr_name);
return fd;
failed:
close(fd);
return -1;
}
int tun_close(int fd, char *dev)
{
return close(fd);
}
/* Read/write frames from TUN device */
int tun_write(int fd, char *buf, int len)
{
return write(fd, buf, len);
}
int tun_read(int fd, char *buf, int len)
{
return read(fd, buf, len);
}
any idea, google shows nothing
Try:
#include <net/if.h>
instead.

Page faults on OS X when reading with MMAP

I am trying to benchmark file system I/O on Mac OS X using mmap.
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <stdio.h>
#include <math.h>
char c;
int main(int argc, char ** argv)
{
if (argc != 2)
{
printf("no files\n");
exit(1);
}
int fd = open(argv[1], O_RDONLY);
fcntl(fd, F_NOCACHE, 1);
int offset=0;
int size=0x100000;
int pagesize = getpagesize();
struct stat stats;
fstat(fd, &stats);
int filesize = stats.st_size;
printf("%d byte pages\n", pagesize);
printf("file %s # %d bytes\n", argv[1], filesize);
while(offset < filesize)
{
if(offset + size > filesize)
{
int pages = ceil((filesize-offset)/(double)pagesize);
size = pages*pagesize;
}
printf("mapping offset %x with size %x\n", offset, size);
void * mem = mmap(0, size, PROT_READ, 0, fd, offset);
if(mem == -1)
return 0;
offset+=size;
int i=0;
for(; i<size; i+=pagesize)
{
c = *((char *)mem+i);
}
munmap(mem, size);
}
return 0;
}
The idea is that I'll map a file or portion of it and then cause a page fault by dereferencing it. I am slowly losing my sanity since this doesn't at all work and I've done similar things on Linux before.
Change this line
void * mem = mmap(0, size, PROT_READ, 0, fd, offset);
to
void * mem = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
And, don't compare mem with -1. Use this instead:
if(mem == MAP_FAILED) { ... }
It's both more readable and more portable.
General advice: if you're on a different UNIX platform from what you're used to, it's a good idea to open the man page. For mmap on OS X, it can be found here. It says
Conforming applications must specify either MAP_PRIVATE or MAP_SHARED.
So, specifying 0 on the fourth
argument is not OK in OS X. I believe
this is true for BSD in general.

Resources