Why IsProcessorFeaturePresent(PF_ERMS_AVAILABLE) appears to have a discrepancy with CPUID? - winapi

When I run the following code, I get ERMS == false, but erms == true. Why is this?
#include <intrin.h>
#include <Windows.h>
int main()
{
int regs[4];
__cpuidex(regs, 7, 0);
// https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
bool erms = !!(regs[1] /* EBX */ & (1 << 9));
bool ERMS = !!IsProcessorFeaturePresent(PF_ERMS_AVAILABLE);
}

Related

Userfaultfd write protection appears unsupported when checking through the UFFDIO_API ioctl

I am trying to use the write protection feature of Linux's userfaultfd, but it does not appear to be enabled in my kernel even though I am using version 5.13 (write protection should be fully supported in 5.10+).
When I run
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static int has_bit(uint64_t val, uint64_t bit) {
return (val & bit) == bit;
}
int main() {
long uffd; /* userfaultfd file descriptor */
struct uffdio_api uffdio_api;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
printf("UFFDIO_API: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_API));
printf("UFFDIO_REGISTER: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_REGISTER));
printf("UFFDIO_UNREGISTER: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_UNREGISTER));
printf("UFFDIO_WRITEPROTECT: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_WRITEPROTECT));
printf("UFFD_FEATURE_PAGEFAULT_FLAG_WP: %d\n", has_bit(uffdio_api.features, UFFD_FEATURE_PAGEFAULT_FLAG_WP));
}
The output is
UFFDIO_API: 1
UFFDIO_REGISTER: 1
UFFDIO_UNREGISTER: 1
UFFDIO_WRITEPROTECT: 0
UFFD_FEATURE_PAGEFAULT_FLAG_WP: 1
The UFFD_FEATURE_PAGEFAULT_FLAG_WP feature is enabled, but the UFFDIO_WRITEPROTECT ioctl is marked as not supported, which is necessary to enable write protection.
What might lead to this feature being disabled, and how can I enable it?
I am using Ubuntu MATE 21.10 with Linux kernel version 5.13.0-30-generic.
EDIT:
It seems like despite the man page section on the UFFD_API ioctl (https://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html), this might be the intended behavior for a system where write protection is enabled. However, when I run a full program that spawns a poller thread and writes to the protected memory, the poller thread does not receive any notification.
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static int page_size;
static void* fault_handler_thread(void* arg) {
long uffd; /* userfaultfd file descriptor */
uffd = (long) arg;
/* Loop, handling incoming events on the userfaultfd
file descriptor. */
for (;;) {
/* See what poll() tells us about the userfaultfd. */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if (nready == -1)
errExit("poll");
printf("\nfault_handler_thread():\n");
printf(
" poll() returns: nready = %d; "
"POLLIN = %d; POLLERR = %d\n",
nready, (pollfd.revents & POLLIN) != 0,
(pollfd.revents & POLLERR) != 0);
// received fault, exit the program
exit(EXIT_FAILURE);
}
}
int main() {
long uffd; /* userfaultfd file descriptor */
char* addr; /* Start of region handled by userfaultfd */
uint64_t len; /* Length of region handled by userfaultfd */
pthread_t thr; /* ID of thread that handles page faults */
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
struct uffdio_writeprotect uffdio_wp;
int s;
page_size = sysconf(_SC_PAGE_SIZE);
len = page_size;
/* Create and enable userfaultfd object. */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
errExit("mmap");
printf("Address returned by mmap() = %p\n", addr);
/* Register the memory range of the mapping we just created for
handling by the userfaultfd object. */
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");
uffdio_wp.range.start = (unsigned long) addr;
uffdio_wp.range.len = len;
uffdio_wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffdio_wp) == -1)
errExit("ioctl-UFFDIO_WRITEPROTECT");
/* Create a thread that will process the userfaultfd events. */
s = pthread_create(&thr, NULL, fault_handler_thread, (void*) uffd);
if (s != 0) {
errno = s;
errExit("pthread_create");
}
/* Main thread now touches memory in the mapping, touching
locations 1024 bytes apart. This will trigger userfaultfd
events for all pages in the region. */
usleep(100000);
size_t l;
l = 0xf; /* Ensure that faulting address is not on a page
boundary, in order to test that we correctly
handle that case in fault_handling_thread(). */
char i = 0;
while (l < len) {
printf("Write address %p in main(): ", addr + l);
addr[l] = i++;
printf("%d\n", addr[l]);
l += 1024;
usleep(100000); /* Slow things down a little */
}
exit(EXIT_SUCCESS);
}
The UFFD_API ioctl does not seem to ever report _UFFD_WRITEPROTECT as can be seen here in the kernel source code (1, 2). I assume that this is because whether this operation is supported or not depends on the kind of underlying mapping.
The feature is in fact reporeted on a per-registered-range basis. You will have to set the API with ioctl(uffd, UFFDIO_API, ...) first, then register a range with ioctl(uffd, UFFDIO_REGISTER, ...) and then check the uffdio_register.ioctls field.
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
int main(void) {
long uffd;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
struct uffdio_api uffdio_api = { .api = UFFD_API };
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl(UFFDIO_API)");
const size_t region_sz = 0x4000;
void *region = mmap(NULL, region_sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
if (region == MAP_FAILED)
errExit("mmap");
if (posix_memalign((void **)region, sysconf(_SC_PAGESIZE), region_sz))
errExit("posix_memalign");
printf("Region mapped at %p - %p\n", region, region + region_sz);
struct uffdio_register uffdio_register = {
.range = { .start = (unsigned long)region, .len = region_sz },
.mode = UFFDIO_REGISTER_MODE_WP
};
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl(UFFDIO_REGISTER)");
printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");
if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != UFFD_API_RANGE_IOCTLS)
errExit("bad ioctl set");
struct uffdio_writeprotect wp = {
.range = { .start = (unsigned long)region, .len = region_sz },
.mode = UFFDIO_WRITEPROTECT_MODE_WP
};
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp) == -1)
errExit("ioctl(UFFDIO_WRITEPROTECT)");
puts("ioctl(UFFDIO_WRITEPROTECT) successful.");
return EXIT_SUCCESS;
}
Output:
Region mapped at 0x7f45c48fe000 - 0x7f45c4902000
uffdio_register.ioctls = 0x5c
Have _UFFDIO_WRITEPROTECT? YES
ioctl(UFFDIO_WRITEPROTECT) successful.
I found the solution. The write-protected pages must be touched after registering but before marking them as write-protected. This is an undocumented requirement, from what I can tell.
In other words, add
for (size_t i = 0; i < len; i += page_size) {
addr[i] = 0;
}
between registering and write-protecting.
It works if I change the full example to
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#define errExit(msg) \
do { \
perror(msg); \
exit(EXIT_FAILURE); \
} while (0)
static int page_size;
static void* fault_handler_thread(void* arg) {
long uffd; /* userfaultfd file descriptor */
uffd = (long) arg;
/* Loop, handling incoming events on the userfaultfd
file descriptor. */
for (;;) {
/* See what poll() tells us about the userfaultfd. */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if (nready == -1)
errExit("poll");
printf("\nfault_handler_thread():\n");
printf(
" poll() returns: nready = %d; "
"POLLIN = %d; POLLERR = %d\n",
nready, (pollfd.revents & POLLIN) != 0,
(pollfd.revents & POLLERR) != 0);
// received fault, exit the program
exit(EXIT_FAILURE);
}
}
int main() {
long uffd; /* userfaultfd file descriptor */
char* addr; /* Start of region handled by userfaultfd */
uint64_t len; /* Length of region handled by userfaultfd */
pthread_t thr; /* ID of thread that handles page faults */
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
struct uffdio_writeprotect uffdio_wp;
int s;
page_size = sysconf(_SC_PAGE_SIZE);
len = page_size;
/* Create and enable userfaultfd object. */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
errExit("mmap");
printf("Address returned by mmap() = %p\n", addr);
/* Register the memory range of the mapping we just created for
handling by the userfaultfd object. */
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");
for (size_t i = 0; i < len; i += page_size) {
addr[i] = 0;
}
uffdio_wp.range.start = (unsigned long) addr;
uffdio_wp.range.len = len;
uffdio_wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffdio_wp) == -1)
errExit("ioctl-UFFDIO_WRITEPROTECT");
/* Create a thread that will process the userfaultfd events. */
s = pthread_create(&thr, NULL, fault_handler_thread, (void*) uffd);
if (s != 0) {
errno = s;
errExit("pthread_create");
}
/* Main thread now touches memory in the mapping, touching
locations 1024 bytes apart. This will trigger userfaultfd
events for all pages in the region. */
usleep(100000);
size_t l;
l = 0xf; /* Ensure that faulting address is not on a page
boundary, in order to test that we correctly
handle that case in fault_handling_thread(). */
char i = 0;
while (l < len) {
printf("Write address %p in main(): ", addr + l);
addr[l] = i++;
printf("%d\n", addr[l]);
l += 1024;
usleep(100000); /* Slow things down a little */
}
exit(EXIT_SUCCESS);
}

what is wrong with the bpf_csum_diff function call?

I want to somehow redirect packets using ebpf. Took an example from the Cilium documentation: Implementation: proxy via bpf
here is an example of my macro in bpf_helpers:
...
static int (*bpf_csum_diff)(void *from, __u64 from_size, void *to, __u64 to_size, __u64 seed) = (void*) // NOLINT
BPF_FUNC_csum_diff;
...
here is the code of the proxy script itself:
#include <linux/bpf.h>
#include "../main/bpf_helpers.h"
#include "../main/bpf_endian.h"
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <stdlib.h>
#include "../main/utils_helpers.h"
#include <stddef.h>
#include <linux/pkt_cls.h>
SEC("socket_filter")
int proxy(struct __sk_buff *skb)
{
const __be32 cluster_ip = 0x846F070A; // 10.7.111.132
const __be32 pod_ip = 0x0529050A; // 10.5.41.5
const int l3_off = ETH_HLEN; // IP header offset
const int l4_off = l3_off + 20; // TCP header offset: l3_off + sizeof(struct iphdr)
__be32 sum; // IP checksum
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
if (data_end < data + l4_off) { // not our packet
return TC_ACT_OK;
}
struct iphdr *ip4 = (struct iphdr *)(data + l3_off);
if (ip4->daddr != cluster_ip || ip4->protocol != IPPROTO_TCP /* || tcp->dport == 80 */) {
return TC_ACT_OK;
}
// DNAT: cluster_ip -> pod_ip, then update L3 and L4 checksum
sum = bpf_csum_diff((void *)&ip4->daddr, 4, (void *)&pod_ip, 4, 0);
bpf_csum_diff((void *)&ip4->daddr, 4, (void *)&pod_ip, 4, 0);
bpf_skb_store_bytes(skb, l3_off + offsetof(struct iphdr, daddr), (void *)&pod_ip, 4, 0);
bpf_l3_csum_replace(skb, l3_off + offsetof(struct iphdr, check), 0, sum, 0);
bpf_l4_csum_replace(skb, l4_off + offsetof(struct tcphdr, check), 0, sum, BPF_F_PSEUDO_HDR);
return TC_ACT_OK;
}
char __license[] SEC("license") = "GPL";
Here is the code of the proxy script itself:
...
2020/06/02 21:58:17 sf.Load(): %vebpf_prog_load() failed: 0: (b7) r2 = 86574346
1: (63) *(u32 *)(r10 -4) = r2
2: (61) r2 = *(u32 *)(r1 +80)
invalid bpf_context access off=80 size=4
processed 3 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
...
Tell me how to correctly proxy packets to another port and another ip in docker?

Missing linux/if.h

hello I am getting this error when using cygwin to try to make,
this is my makefile
flags=-g
all: icmptx
icmptx: it.o icmptx.o tun_dev.o
gcc $(flags) -o icmptx icmptx.o it.o tun_dev.o
it.o: it.c
gcc $(flags) -c it.c
icmptx.o: icmptx.c
gcc $(flags) -c icmptx.c
tun_dev.o: tun_dev.c
gcc $(flags) -c tun_dev.c
clean:
rm -f tun_dev.o it.o icmptx.o icmptx
the error is
$ make
gcc -g -c tun_dev.c
tun_dev.c:35:22: fatal error: linux/if.h: No such file or directory
compilation terminated.
Makefile:15: recipe for target `tun_dev.o' failed
make: *** [tun_dev.o] Error 1
here is my tun_dev.c file
/*
*/
/*
* tun_dev.c,v 1.1.2.4 2001/09/13 05:02:22 maxk Exp
*/
/* #include "config.h" */
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <syslog.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/if.h>
#include "vtun.h"
#include "lib.h"
/*
* Allocate TUN device, returns opened fd.
* Stores dev name in the first arg(must be large enough).
*/
int tun_open_old(char *dev)
{
char tunname[14];
int i, fd;
if( *dev ) {
sprintf(tunname, "/dev/%s", dev);
return open(tunname, O_RDWR);
}
for(i=0; i < 255; i++){
sprintf(tunname, "/dev/tun%d", i);
/* Open device */
if( (fd=open(tunname, O_RDWR)) > 0 ){
sprintf(dev, "tun%d", i);
return fd;
}
}
return -1;
}
#include <linux/if_tun.h>
/* pre 2.4.6 compatibility */
#define OTUNSETNOCSUM (('T'<< 8) | 200)
#define OTUNSETDEBUG (('T'<< 8) | 201)
#define OTUNSETIFF (('T'<< 8) | 202)
#define OTUNSETPERSIST (('T'<< 8) | 203)
#define OTUNSETOWNER (('T'<< 8) | 204)
int tun_open(char *dev)
{
struct ifreq ifr;
int fd;
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
return tun_open_old(dev);
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
if (*dev)
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
if (ioctl(fd, TUNSETIFF, (void *) &ifr) < 0) {
if (errno == EBADFD) {
/* Try old ioctl */
if (ioctl(fd, OTUNSETIFF, (void *) &ifr) < 0)
goto failed;
} else
goto failed;
}
strcpy(dev, ifr.ifr_name);
return fd;
failed:
close(fd);
return -1;
}
int tun_close(int fd, char *dev)
{
return close(fd);
}
/* Read/write frames from TUN device */
int tun_write(int fd, char *buf, int len)
{
return write(fd, buf, len);
}
int tun_read(int fd, char *buf, int len)
{
return read(fd, buf, len);
}
any idea, google shows nothing
Try:
#include <net/if.h>
instead.

CreateThread() error

#include <windows.h>
#include <stdio.h>
#include <conio.h>
#include <stdlib.h>
#include <iostream.h>
#include <string.h>
void Thread1( LPVOID param)
{
int a;
a = *((int *)param);
for (int i= 0; i <10; i++)
printf("%d\n", a);
}
int main()
{
int a =4;
int ThreadId;
CreateThread( 0, 0x0100, Thread1, &a, 0, &ThreadId);
for( int i = 0; i <11; i++)
Sleep( 1);
return( 1);
}
This is a simple code but I am not able to figure it out why visual studio is giving me error:
error C2664: 'CreateThread' : cannot convert parameter 3 from 'void (void *)' to 'unsigned long (__stdcall *)(void *)'
None of the functions with this name in scope match the target type
Error executing cl.exe.
define as following
DWORD WINAPI MyThreadProc(LPVOID lpParameter)
CreateThread() require __stdcall calling convention.

Page faults on OS X when reading with MMAP

I am trying to benchmark file system I/O on Mac OS X using mmap.
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <stdio.h>
#include <math.h>
char c;
int main(int argc, char ** argv)
{
if (argc != 2)
{
printf("no files\n");
exit(1);
}
int fd = open(argv[1], O_RDONLY);
fcntl(fd, F_NOCACHE, 1);
int offset=0;
int size=0x100000;
int pagesize = getpagesize();
struct stat stats;
fstat(fd, &stats);
int filesize = stats.st_size;
printf("%d byte pages\n", pagesize);
printf("file %s # %d bytes\n", argv[1], filesize);
while(offset < filesize)
{
if(offset + size > filesize)
{
int pages = ceil((filesize-offset)/(double)pagesize);
size = pages*pagesize;
}
printf("mapping offset %x with size %x\n", offset, size);
void * mem = mmap(0, size, PROT_READ, 0, fd, offset);
if(mem == -1)
return 0;
offset+=size;
int i=0;
for(; i<size; i+=pagesize)
{
c = *((char *)mem+i);
}
munmap(mem, size);
}
return 0;
}
The idea is that I'll map a file or portion of it and then cause a page fault by dereferencing it. I am slowly losing my sanity since this doesn't at all work and I've done similar things on Linux before.
Change this line
void * mem = mmap(0, size, PROT_READ, 0, fd, offset);
to
void * mem = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
And, don't compare mem with -1. Use this instead:
if(mem == MAP_FAILED) { ... }
It's both more readable and more portable.
General advice: if you're on a different UNIX platform from what you're used to, it's a good idea to open the man page. For mmap on OS X, it can be found here. It says
Conforming applications must specify either MAP_PRIVATE or MAP_SHARED.
So, specifying 0 on the fourth
argument is not OK in OS X. I believe
this is true for BSD in general.

Resources