eBPF Validation error when trying to hash a string (process name) - linux-kernel

Hi I am trying to generate a 32bit hash for the full process name in ebpf. These process names can be long and will not fit on the stack hence the "heap" per cpu array. I am currently using libbpf bootstrap as a prototype from here: https://github.com/libbpf/libbpf-bootstrap.git I am having an issue with the verifier not validating the hash function. What is the problem here? I am stumped.
The meat of the code is:
uint32_t map_id = 0;
char *map_val = bpf_map_lookup_elem(&heap, &map_id);
if (!map_val)
return 0;
int bytes_read = bpf_probe_read_str(map_val, sizeof(e->filename), (void *)ctx + fname_off);
if (bytes_read > 0) {
map_val[ (bytes_read - 1) & (4096 -1) ] = 0;
uint32_t key = hash( (unsigned char*)map_val);
bpf_printk("process_exec count: %u, hash: %lu, full path: %s\n", bytes_read -1, key, map_val);
}
The hash function is:
uint32_t hash(unsigned char *str)
{
int c;
uint32_t hash = 5381;
while ( c = *str++ )
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
I get a validator error:
; hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
91: (27) r4 *= 33
; hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
92: (0f) r4 += r1
; while ( c = *str++ )
93: (71) r1 = *(u8 *)(r2 +0)
R0=inv(id=6,smin_value=-4096,smax_value=4095) R1_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R2_w=map_value(id=0,off=4096,ks=4,vs=4096,imm=0) R4_w=inv(id=0) R6=ctx(id=0,off=0,umax_value=65535,var_off=(0x0; 0xffff)) R7=map_value(id=0,off=0,ks=4,vs=4096,imm=0) R8=invP0 R10=fp0 fp-8=mmmm???? fp-16=mmmmmmmm fp-24=mmmm???? fp-32=mmmmmmmm
invalid access to map value, value_size=4096 off=4096 size=1
R2 min value is outside of the allowed memory range
processed 32861 insns (limit 1000000) max_states_per_insn 4 total_states 337 peak_states 337 mark_read 4
-- END PROG LOAD LOG --
libbpf: prog 'handle_exec': failed to load: -13
libbpf: failed to load object 'bootstrap_bpf'
libbpf: failed to load BPF skeleton 'bootstrap_bpf': -13
Failed to load and verify BPF skeleton
Here is the complete diff for my use case:
diff --git a/examples/c/bootstrap.bpf.c b/examples/c/bootstrap.bpf.c
index d0860c0..c93ed58 100644
--- a/examples/c/bootstrap.bpf.c
+++ b/examples/c/bootstrap.bpf.c
## -20,6 +20,13 ## struct {
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(max_entries, 1);
+ __uint(value_size, 4096);
+} heap SEC(".maps");
+
const volatile unsigned long long min_duration_ns = 0;
SEC("tp/sched/sched_process_exec")
## -58,6 +65,22 ## int handle_exec(struct trace_event_raw_sched_process_exec *ctx)
/* successfully submit it to user-space for post-processing */
bpf_ringbuf_submit(e, 0);
+
+
+ uint32_t map_id = 0;
+ char *map_val = bpf_map_lookup_elem(&heap, &map_id);
+ if (!map_val)
+ return 0;
+
+ int bytes_read = bpf_probe_read_str(map_val, sizeof(e->filename), (void *)ctx + fname_off);
+ if (bytes_read > 0) {
+ // tell the validator bytes ready is between 0 and 4095
+ map_val[ (bytes_read - 1) & (4096 -1) ] = 0;
+
+ uint32_t key = hash( (unsigned char*)map_val);
+ bpf_printk("process_exec count: %u, hash: %u, full path: %s\n", bytes_read -1, key, map_val);
+ }
+
return 0;
}
## -109,4 +132,3 ## int handle_exit(struct trace_event_raw_sched_process_template* ctx)
bpf_ringbuf_submit(e, 0);
return 0;
}
-
diff --git a/examples/c/bootstrap.h b/examples/c/bootstrap.h
index b49e022..d268e56 100644
--- a/examples/c/bootstrap.h
+++ b/examples/c/bootstrap.h
## -4,7 +4,7 ##
#define __BOOTSTRAP_H
#define TASK_COMM_LEN 16
-#define MAX_FILENAME_LEN 127
+#define MAX_FILENAME_LEN 4096
struct event {
int pid;
## -16,4 +16,15 ## struct event {
bool exit_event;
};
+static inline
+uint32_t hash(unsigned char *str)
+{
+ int c;
+ uint32_t hash = 5381;
+ while ( c = *str++ )
+ hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+
+ return hash;
+}
+
#endif /* __BOOTSTRAP_H */

TL;DR. You need to ensure that you are not reading past the end of the map value. So you need to check str never goes past the initial str value + 4095.
Verifier error explanation.
; while ( c = *str++ )
93: (71) r1 = *(u8 *)(r2 +0)
R0=inv(id=6,smin_value=-4096,smax_value=4095) R1_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R2_w=map_value(id=0,off=4096,ks=4,vs=4096,imm=0) R4_w=inv(id=0) R6=ctx(id=0,off=0,umax_value=65535,var_off=(0x0; 0xffff)) R7=map_value(id=0,off=0,ks=4,vs=4096,imm=0) R8=invP0 R10=fp0 fp-8=mmmm???? fp-16=mmmmmmmm fp-24=mmmm???? fp-32=mmmmmmmm
invalid access to map value, value_size=4096 off=4096 size=1
R2 min value is outside of the allowed memory range
The verifier here is telling you that your code may attempt to read one byte (size=1) from the map value, at offset 4096 (off=4096). Since the map value has a size of 4096 (value_size=4096), that would end up reading after the end of the map value, leading to an unbounded memory access. Hence, the verifier rejects it.

Related

W5100 is sending garbage

I try to implement a web interface with a W5100 Ethernet Controller and an XMega, but my browser prints out this weird result:
Please take a look at my code:
SPIM_Config_t Config_SPIM = {
.Device = &SPIC,
.Mode = SPI_MODE_0,
.Prescaler = SPI_PRESCALER_64,
};
W5100_Config_t Config_Ethernet = {
.Submask = {255, 255, 0, 0},
.IP = {169, 254, 133, 121},
.Gateway = {169, 154, 133, 129},
.MAC = {0x00, 0x00, 0x00, 0x00, 0x00, 0xAA}
};
uint8_t Rx_Buffer[2048];
uint8_t Tx_Buffer[2048];
const char HTTP[] = "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nPragma: no-cache\r\n\r\n"
"<html>\r\n"
"<body>\r\n"
"<title>Title</title>\r\n"
"<p>Hello world</p>\r\n"
"</body>\r\n"
"</html>\r\n";
int main(void)
{
W5100_Init(&Config_SPIM, &Config_Ethernet);
while(1)
{
W5100_Status_t Status;
W5100_GetState(0, &Status);
switch(Status)
{
case W5100_SOCK_CLOSED:
{
if(W5100_Open(0, W5100_PROT_TCP, 80, W5100_MEM_2K, W5100_MEM_2K, 65535) == W5100_NO_ERROR)
{
W5100_Listen(0, ETHERNET_TIMEOUT);
}
break;
}
case W5100_SOCK_ESTABLISHED:
{
uint16_t Rx_Bytes;
if(W5100_GetBytes(0, &Rx_Bytes) == W5100_NO_ERROR)
{
if(Rx_Bytes)
{
W5100_Receive(0, Rx_Buffer, Rx_Bytes);
strcpy((char*)Tx_Buffer, HTTP);
W5100_Send(0, Tx_Buffer, strlen((char*)HTTP), ETHERNET_TIMEOUT);
}
else
{
}
}
W5100_Disconnect(0, ETHERNET_TIMEOUT);
break;
}
case W5100_SOCK_FIN_WAIT:
case W5100_SOCK_CLOSING:
case W5100_SOCK_TIME_WAIT:
case W5100_SOCK_CLOSE_WAIT:
case W5100_SOCK_LAST_ACK:
{
W5100_Close(0, ETHERNET_TIMEOUT);
break;
}
}
}
}
I think the error is somewhere in my W5100_Send function and it seems that the Controller is sending the content of different memory locations, but I can´t figure out the error. The code based on the datasheet of the Ethernet Controller:
W5100_ErrorCode_t W5100_Send(uint8_t Socket, uint8_t* Buffer, uint16_t Length, uint32_t Timeout)
{
uint8_t Temp[2];
uint8_t Mask;
uint16_t SocketBase;
uint16_t Offset;
uint16_t Free;
uint16_t SocketMemory;
uint32_t Timeout_Temp = Timeout;
if(!_W5100_IsInitialized)
{
return W5100_NOT_INITIALIZED;
}
else if((Socket > 0x04) || (Buffer == NULL) || (Length == 0x00))
{
return W5100_INVALID_PARAM;
}
// Get the memory mask for address calculation
W5100_ReadRegister(W5100_REGISTER_TMSR, &Mask);
Mask &= (0x03 << (Socket << 0x01));
// Check for invalid memory by comparing the memory mask for the given socket and the socket index
if(((Socket > 0) && (Mask == 3)) || ((Socket > 1) && (Mask == 2)))
{
return W5100_INVALID_PARAM;
}
SocketBase = W5100_SOCKET_ADDR(Socket);
SocketMemory = W5100_SOCKET_MEM_OFFSET << Mask;
// Wait while the buffer is full
do
{
// Get the free bytes
W5100_ReadRegister(SocketBase + W5100_OFFSET_TX_FSR0, &Temp[0]);
W5100_ReadRegister(SocketBase + W5100_OFFSET_TX_FSR1, &Temp[1]);
Free = ((uint16_t)(Temp[0] << 0x08)) | Temp[1];
if(Timeout_Temp-- == 0x00)
{
W5100_Disconnect(Socket, Timeout);
return W5100_TIMEOUT;
}
_delay_ms(1);
}while(Free < Length);
// Get the write pointer address
W5100_ReadRegister(SocketBase + W5100_OFFSET_TX_WR0, &Temp[0]);
W5100_ReadRegister(SocketBase + W5100_OFFSET_TX_WR1, &Temp[1]);
Offset = (((uint16_t)(Temp[0] << 0x08)) | Temp[1]) & W5100_TX_MEM_MASK;
// Check for an overflow
if(Offset + Length > SocketMemory)
{
uint16_t Upper;
uint16_t Left;
Upper = SocketMemory - Offset;
Left = Length - Upper;
W5100_WriteMemory(W5100_TX_BUFFER_BASE + (SocketMemory * Socket) + Offset, Buffer, Upper);
W5100_WriteMemory(W5100_TX_BUFFER_BASE + (SocketMemory * Socket), Buffer, Left);
}
else
{
W5100_WriteMemory(W5100_TX_BUFFER_BASE + (SocketMemory * Socket) + Offset, Buffer, Length);
}
W5100_WriteRegister(SocketBase + W5100_OFFSET_TX_WR0, Offset >> 0x08);
W5100_WriteRegister(SocketBase + W5100_OFFSET_TX_WR1, Offset & 0xFF);
return W5100_ExecuteCommand(Socket, W5100_CMD_SEND, Timeout);
}
You should fully rewrite your W5100_Send, because it is full of issues.
For example, calculation of Mask value has no sense.
The cycle which is waiting for Free value always delays at least 1 ms, even when good value obtained from the beginning. Also, when timed out, it breaks, even if received Free value is good.
Offset value is damaged by & operation:
Offset = (((uint16_t)(Temp[0] << 0x08)) | Temp[1]) & W5100_TX_MEM_MASK;
This value is never increased by the written data size, and the damaged value is written back to W5100_OFFSET_TX_WR1:W5100_OFFSET_TX_WR0
The wrapping data writing has an error:
W5100_WriteMemory(W5100_TX_BUFFER_BASE + (SocketMemory * Socket) + Offset, Buffer, Upper);
W5100_WriteMemory(W5100_TX_BUFFER_BASE + (SocketMemory * Socket), Buffer, Left);
You're copying to both the parts from the start of Buffer. In the second line it should be &Buffer[Upper]
Etc etc...
First you need to determine size of sockets. I encourage you to set up the socket sizes from the beginning, thus avoiding offset and size calculation on the runtime.
But if you want to determine the socket size dynamically, then you can do it as follows:
uint16_t SocketBufAddr = W5100_TX_BUFFER_BASE; // Start of the socket memory block
SocketMemory = 0; // Size of the socket memory block
W5100_ReadRegister(W5100_REGISTER_TMSR, &Mask);
for (uint8_t i = 0 ; i <= Socket ; i++) {
SocketBufAddr += SocketMemory; // Increase the offset by the previous socket size
SocketMemory = 1024 << ((Mask >> (i * 2)) & 3);
}
now, the writing process should be something like this:
// Get the write pointer address
W5100_ReadRegister(SocketBase + W5100_OFFSET_TX_WR0, &Temp[0]);
W5100_ReadRegister(SocketBase + W5100_OFFSET_TX_WR1, &Temp[1]);
uint16_t WrPointer = (((uint16_t)(Temp[0] << 0x08)) | Temp[1]); // no & operation! It is the 16-bit pointer!!!
Offset = WrPointer & (SocketMemory - 1); // Offset inside the socket memory block. SocketMemory is always = 2^n
// Check for an overflow
if(Offset + Length > SocketMemory)
{
uint16_t Upper;
uint16_t Left;
Upper = SocketMemory - Offset ;
Left = Length - Upper;
W5100_WriteMemory(SocketBufAddr + Offset, Buffer, Upper);
W5100_WriteMemory(SocketBufAddr, &Buffer[Upper], Left);
}
else
{
W5100_WriteMemory(SocketBufAddr + Offset, Buffer, Length);
}
WrPointer += Length; // Increase full 16-bit pointer value
// Write the new pointer back
W5100_WriteRegister(SocketBase + W5100_OFFSET_TX_WR0, WrPointer >> 0x08);
W5100_WriteRegister(SocketBase + W5100_OFFSET_TX_WR1, WrPointer & 0xFF);
return W5100_ExecuteCommand(Socket, W5100_CMD_SEND, Timeout);

Looking for source code of __builtin_avr_delay_cycles called by _delay_ms in avr-gcc

I was investigating the delay_ms function of avr-gcc. In delay.h I found its definition:
void _delay_ms(double __ms)
{
double __tmp ;
#if __HAS_DELAY_CYCLES && defined(__OPTIMIZE__) && \
!defined(__DELAY_BACKWARD_COMPATIBLE__) && \
__STDC_HOSTED__
uint32_t __ticks_dc;
extern void __builtin_avr_delay_cycles(unsigned long);
__tmp = ((F_CPU) / 1e3) * __ms;
#if defined(__DELAY_ROUND_DOWN__)
__ticks_dc = (uint32_t)fabs(__tmp);
#elif defined(__DELAY_ROUND_CLOSEST__)
__ticks_dc = (uint32_t)(fabs(__tmp)+0.5);
#else
//round up by default
__ticks_dc = (uint32_t)(ceil(fabs(__tmp)));
#endif
__builtin_avr_delay_cycles(__ticks_dc);
#else
...
}
I am interested in how the __builtin_avr_delay_cycles function looks like internally and where it is defined? Where can I find the source?
As said in my comment to this very question on electronics.SE:
Compiler builtins are kinda funky to find, always, because they are not just C functions, but things that get inserted while parsing/compiling the code (at various levels of abstraction from the textual representation of the code itself. compiler theory stuff). What you're looking for is the function avr_expand_builtin in the GCC source tree. There's a case AVR_BUILTIN_DELAY_CYCLES in there. Look for what happens there.
Which is:
/* Implement `TARGET_EXPAND_BUILTIN'. */
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
SUBTARGET may be used as the target for computing one of EXP's operands.
IGNORE is nonzero if the value is to be ignored. */
static rtx
avr_expand_builtin (tree exp, rtx target,
rtx subtarget ATTRIBUTE_UNUSED,
machine_mode mode ATTRIBUTE_UNUSED,
int ignore)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
unsigned int id = DECL_FUNCTION_CODE (fndecl);
const struct avr_builtin_description *d = &avr_bdesc[id];
tree arg0;
rtx op0;
gcc_assert (id < AVR_BUILTIN_COUNT);
switch (id)
{
case AVR_BUILTIN_NOP:
emit_insn (gen_nopv (GEN_INT (1)));
return 0;
case AVR_BUILTIN_DELAY_CYCLES:
{
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
if (!CONST_INT_P (op0))
error ("%s expects a compile time integer constant", bname);
else
avr_expand_delay_cycles (op0);
return NULL_RTX;
}
…
thus, the function you're looking for is avr_expand_delay_cycles in the same file:
static void
avr_expand_delay_cycles (rtx operands0)
{
unsigned HOST_WIDE_INT cycles = UINTVAL (operands0) & GET_MODE_MASK (SImode);
unsigned HOST_WIDE_INT cycles_used;
unsigned HOST_WIDE_INT loop_count;
if (IN_RANGE (cycles, 83886082, 0xFFFFFFFF))
{
loop_count = ((cycles - 9) / 6) + 1;
cycles_used = ((loop_count - 1) * 6) + 9;
emit_insn (gen_delay_cycles_4 (gen_int_mode (loop_count, SImode),
avr_mem_clobber()));
cycles -= cycles_used;
}
if (IN_RANGE (cycles, 262145, 83886081))
{
loop_count = ((cycles - 7) / 5) + 1;
if (loop_count > 0xFFFFFF)
loop_count = 0xFFFFFF;
cycles_used = ((loop_count - 1) * 5) + 7;
emit_insn (gen_delay_cycles_3 (gen_int_mode (loop_count, SImode),
avr_mem_clobber()));
cycles -= cycles_used;
}
if (IN_RANGE (cycles, 768, 262144))
{
loop_count = ((cycles - 5) / 4) + 1;
if (loop_count > 0xFFFF)
loop_count = 0xFFFF;
cycles_used = ((loop_count - 1) * 4) + 5;
emit_insn (gen_delay_cycles_2 (gen_int_mode (loop_count, HImode),
avr_mem_clobber()));
cycles -= cycles_used;
}
if (IN_RANGE (cycles, 6, 767))
{
loop_count = cycles / 3;
if (loop_count > 255)
loop_count = 255;
cycles_used = loop_count * 3;
emit_insn (gen_delay_cycles_1 (gen_int_mode (loop_count, QImode),
avr_mem_clobber()));
cycles -= cycles_used;
}
while (cycles >= 2)
{
emit_insn (gen_nopv (GEN_INT (2)));
cycles -= 2;
}
if (cycles == 1)
{
emit_insn (gen_nopv (GEN_INT (1)));
cycles--;
}
}
Of biggest interest here is that this modifies a node in the Abstract Syntax Tree, and emits instructions there.

OCR algorithm (GOCR) to 32F429IDISCOVERY board

I'm trying to implement an OCR algorithm (GOCR algorithm specifically) to 32F429IDISCOVERY board and I'm still getting nothing back...
I'm recording a image from OV7670 camera in RGB565 format to SDRAM of the board that is then converted to greyscale and passed to the algorithm itself.
From this and other forums I got the impression that GOCR is very good algorithm and it seemed to be working very well on PC but I just cant get it to work on the board.
Does anyone have some experience with implementing OCR or GOCR? I am not sure where the problem is because it beaves in a very wierd way. The code stops in different part of the algorithm almost every time...
Calling the OCR algorithm:
void ocr_algorithm(char *output_str) {
job_t job1, *job; /* fixme, dont want global variables for lib */
job=OCR_JOB=&job1;
int linecounter;
const char *line;
uint8_t r,g,b;
uint32_t n,i,buffer;
char *p_pic;
uint32_t *image = (uint32_t*) SDRAM_START_ADR;
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
job_init(job); /* init cfg and db */
job_init_image(job); /* single image */
p_pic = malloc(IMG_ROWS*IMG_COLUMNS);
// Converting RGB565 to grayscale
i=0;
for (n = 0; n < IMG_ROWS*IMG_COLUMNS; n++) {
if (n % 2 == 0){
buffer = image[i] & 0xFFFF;
}
else{
buffer = (image[i] >> 16) & 0xFFFF;
i++;
}
r = (uint8_t) ((buffer >> 11) & 0x1F);
g = (uint8_t) ((buffer >> 5) & 0x3F);
b = (uint8_t) (buffer & 0x1F);
// RGB888
r = ((r * 527) + 23) >> 6;
g = ((g * 259) + 33) >> 6;
b = ((b * 527) + 23) >> 6;
// Greyscale
p_pic[n] = 0.299*r + 0.587*g + 0.114*b;
}
//read_picture;
job->src.p.p = p_pic;
job->src.p.x = IMG_ROWS;
job->src.p.y = IMG_COLUMNS;
job->src.p.bpp = 1;
/* call main loop */
pgm2asc(job);
//print output
strcpy(output_str, "");
linecounter = 0;
line = getTextLine(&(job->res.linelist), linecounter++);
while (line) {
strcat(output_str, line);
strcat(output_str, "\n");
line = getTextLine(&(job->res.linelist), linecounter++);
}
free_textlines(&(job->res.linelist));
job_free_image(job);
free(p_pic);
}

How to do timeout less than 1 second?

To write more robust scripts it is useful to "forget" contents of expect buffer to ensure the matching is done only on recently received input:
# this leaves expect buffer with unmatched history
# + accumulates incoming data over 1 sec
set timeout 1
expect
# match everything in the buffer ~"forget"
expect *
# subsequent expect commands will see only what appeared since now
Is it possible to have the timeout smaller than 1 second without patching the expect sources?
Note: set timeout 0 will not work as the first expect doesn't leave the newly incoming data in buffer.
I'm not sure about the how to flush the buffer in the tcl interpreter.
I'm not sure about your use case, but I have found that the most reliable format for remote-shell scripting over expect, that the easiest thing to do is to include a #randomnumber at the end of each send, expect for the #randomnumber, this makes sure that the buffer is synced to the last line that I send to the spawned process. Your mileage would vary if the spawned process doesn't echo the characters you send.
The pure python implementation from pexpect is great if you are ok with moving to python from the TCL implementation. The buffers work slightly differently so it will take some getting used to. If you are executing commands over remote shells I'd recommend python-remote (Which i wrote)
you could gas the buffer in the method you are using above by
import pexpect
spawn = pexpect.spawn(command)
stuff_inbuffer = spawn.read_nonblocking(size=100000, timeout=0.1)
sending random strings to sync the buffer before repsonse
import random, pexpect
spawn = pexpect.spawn(command)
rand = random.random()
spawn.sendline(command + " #%s" %(rand))
spawn.expect("%s\r\n" %(rand))
you could then either get the buffer with and expect, or read which will wait until the buffer has size, or timeout is exceeded.
results = spwan.read(size=100000, timeout=10)
spawn.expect("something")
results = spawn.buffer
or
results = spawn.before
Patching expect is easy... use negative timeouts for milliseconds (except -1, which is special):
# set timeout to 100 milliseconds
set timeout -100
The following if named milliExpect.patch... cd into expect5.45 directory and do
patch -Np1 -i milliExpect.patch.
Then the usual (may have to stipulate where tcl is in configure)...
./configure; make; sudo make install
--- milliExpect.patch ----
--- expect5.45_orig/exp_event.c 2010-06-30 17:53:49.000000000 -0700
+++ expect5.45/exp_event.c 2014-09-30 12:50:18.733698995 -0700
## -277,6 +277,117 ##
}
}
+/* returns status, one of EOF, TIMEOUT, ERROR or DATA */
+/* can now return RECONFIGURE, too */
+/*ARGSUSED*/
+int exp_get_next_event_d(interp,esPtrs,n,esPtrOut,timeout,key)
+Tcl_Interp *interp;
+ExpState *(esPtrs[]);
+int n; /* # of esPtrs */
+ExpState **esPtrOut; /* 1st ready esPtr, not set if none */
+double timeout; /* milliseconds */
+int key;
+{
+ ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey);
+
+ ExpState *esPtr;
+ int i; /* index into in-array */
+#ifdef HAVE_PTYTRAP
+ struct request_info ioctl_info;
+#endif
+
+ int old_configure_count = exp_configure_count;
+
+ int timerFired = FALSE;
+ Tcl_TimerToken timerToken = 0;/* handle to Tcl timehandler descriptor */
+ /* We must delete any timer before returning. Doing so throughout
+ * the code makes it unreadable; isolate the unreadable nonsense here.
+ */
+#define RETURN(x) { \
+ if (timerToken) Tcl_DeleteTimerHandler(timerToken); \
+ return(x); \
+ }
+
+ for (;;) {
+ /* if anything has been touched by someone else, report that */
+ /* an event has been received */
+
+ for (i=0;i<n;i++) {
+ tsdPtr->rr++;
+ if (tsdPtr->rr >= n) tsdPtr->rr = 0;
+
+ esPtr = esPtrs[tsdPtr->rr];
+
+ if (esPtr->key != key) {
+ esPtr->key = key;
+ esPtr->force_read = FALSE;
+ *esPtrOut = esPtr;
+ RETURN(EXP_DATA_OLD);
+ } else if ((!esPtr->force_read) && (!expSizeZero(esPtr))) {
+ *esPtrOut = esPtr;
+ RETURN(EXP_DATA_OLD);
+ } else if (esPtr->notified) {
+ /* this test of the mask should be redundant but SunOS */
+ /* raises both READABLE and EXCEPTION (for no */
+ /* apparent reason) when selecting on a plain file */
+ if (esPtr->notifiedMask & TCL_READABLE) {
+ *esPtrOut = esPtr;
+ esPtr->notified = FALSE;
+ RETURN(EXP_DATA_NEW);
+ }
+ /*
+ * at this point we know that the event must be TCL_EXCEPTION
+ * indicating either EOF or HP ptytrap.
+ */
+#ifndef HAVE_PTYTRAP
+ RETURN(EXP_EOF);
+#else
+ if (ioctl(esPtr->fdin,TIOCREQCHECK,&ioctl_info) < 0) {
+ expDiagLog("ioctl error on TIOCREQCHECK: %s", Tcl_PosixError(interp));
+ RETURN(EXP_TCLERROR);
+ }
+ if (ioctl_info.request == TIOCCLOSE) {
+ RETURN(EXP_EOF);
+ }
+ if (ioctl(esPtr->fdin, TIOCREQSET, &ioctl_info) < 0) {
+ expDiagLog("ioctl error on TIOCREQSET after ioctl or open on slave: %s", Tcl_ErrnoMsg(errno));
+ }
+ /* presumably, we trapped an open here */
+ /* so simply continue by falling thru */
+#endif /* !HAVE_PTYTRAP */
+ }
+ }
+
+ if (!timerToken) {
+ if (timeout >= 0) {
+ timerToken = Tcl_CreateTimerHandler((int)timeout,
+ exp_timehandler,
+ (ClientData)&timerFired);
+ }
+ }
+
+ /* make sure that all fds that should be armed are */
+ for (i=0;i<n;i++) {
+ esPtr = esPtrs[i];
+ /*printf("CreateChannelHandler: %s\r\n",esPtr->name);*/
+ Tcl_CreateChannelHandler(
+ esPtr->channel,
+ TCL_READABLE | TCL_EXCEPTION,
+ exp_channelhandler,
+ (ClientData)esPtr);
+ esPtr->fg_armed = TRUE;
+ }
+
+ Tcl_DoOneEvent(0); /* do any event */
+
+ if (timerFired) return(EXP_TIMEOUT);
+
+ if (old_configure_count != exp_configure_count) {
+ RETURN(EXP_RECONFIGURE);
+ }
+ }
+}
+
/* Having been told there was an event for a specific ExpState, get it */
/* This returns status, one of EOF, TIMEOUT, ERROR or DATA */
/*ARGSUSED*/
--- expect5.45_orig/expect.c 2010-10-26 15:09:36.000000000 -0700
+++ expect5.45/expect.c 2014-09-30 13:01:42.693800013 -0700
## -41,6 +41,12 ##
#include "tcldbg.h"
#endif
+#define TclUtfToUniChar(str, chPtr) \
+ ((((unsigned char) *(str)) < 0xC0) ? \
+ ((*(chPtr) = (Tcl_UniChar) *(str)), 1) \
+ : Tcl_UtfToUniChar(str, chPtr))
+
+
#include "retoglob.c" /* RE 2 GLOB translator C variant */
/* initial length of strings that we can guarantee patterns can match */
## -123,6 +129,7 ##
int duration; /* permanent or temporary */
int timeout_specified_by_flag; /* if -timeout flag used */
int timeout; /* timeout period if flag used */
+ double timeout_double; /* if timeout < -1 */
struct exp_cases_descriptor ecd;
struct exp_i *i_list;
} exp_cmds[4];
## -559,6 +566,11 ##
goto error;
}
eg->timeout_specified_by_flag = TRUE;
+ if (eg->timeout < -1) {
+ eg->timeout_double = (double)eg->timeout * -1.;
+ } else {
+ eg->timeout_double = (double)eg->timeout * 1000.;
+ }
break;
case EXP_ARG_NOBRACE:
/* nobrace does nothing but take up space */
## -1812,6 +1824,74 ##
return cc;
}
+/* returns # of bytes read or (non-positive) error of form EXP_XXX */
+/* returns 0 for end of file */
+/* If timeout is non-zero, set an alarm before doing the read, else assume */
+/* the read will complete immediately. */
+/*ARGSUSED*/
+static int
+expIRead_d( /* INTL */
+ Tcl_Interp *interp,
+ ExpState *esPtr,
+ double timeout,
+ int save_flags)
+{
+ int cc = EXP_TIMEOUT;
+ int size;
+
+ /* We drop one third when are at least 2/3 full */
+ /* condition is (size >= max*2/3) <=> (size*3 >= max*2) */
+ if (expSizeGet(esPtr)*3 >= esPtr->input.max*2)
+ exp_buffer_shuffle(interp,esPtr,save_flags,EXPECT_OUT,"expect");
+ size = expSizeGet(esPtr);
+
+#ifdef SIMPLE_EVENT
+ restart:
+
+ alarm_fired = FALSE;
+
+ if (timeout > -1) {
+ if (timeout > 0) {
+ usleep((int)timeout * 1000);
+ } else {
+ usleep(1000 * 1); /* ?? is 1 ms enough ??? */
+ }
+ }
+#endif
+
+ cc = Tcl_ReadChars(esPtr->channel, esPtr->input.newchars,
+ esPtr->input.max - esPtr->input.use,
+ 0 /* no append */);
+ i_read_errno = errno;
+
+ if (cc > 0) {
+ memcpy (esPtr->input.buffer + esPtr->input.use,
+ Tcl_GetUnicodeFromObj (esPtr->input.newchars, NULL),
+ cc * sizeof (Tcl_UniChar));
+ esPtr->input.use += cc;
+ }
+
+#ifdef SIMPLE_EVENT
+ alarm(0);
+
+ if (cc == -1) {
+ /* check if alarm went off */
+ if (i_read_errno == EINTR) {
+ if (alarm_fired) {
+ return EXP_TIMEOUT;
+ } else {
+ if (Tcl_AsyncReady()) {
+ int rc = Tcl_AsyncInvoke(interp,TCL_OK);
+ if (rc != TCL_OK) return(exp_tcl2_returnvalue(rc));
+ }
+ goto restart;
+ }
+ }
+ }
+#endif
+ return cc;
+}
+
/*
* expRead() does the logical equivalent of a read() for the expect command.
* This includes figuring out which descriptor should be read from.
## -1932,6 +2012,126 ##
}
return(cc);
}
+/*
+ * expRead_d() does the logical equivalent of a read() for the expect command.
+ * This includes figuring out which descriptor should be read from.
+ *
+ * The result of the read() is left in a spawn_id's buffer rather than
+ * explicitly passing it back. Note that if someone else has modified a buffer
+ * either before or while this expect is running (i.e., if we or some event has
+ * called Tcl_Eval which did another expect/interact), expRead will also call
+ * this a successful read (for the purposes if needing to pattern match against
+ * it).
+ */
+
+/* if it returns a negative number, it corresponds to a EXP_XXX result */
+/* if it returns a non-negative number, it means there is data */
+/* (0 means nothing new was actually read, but it should be looked at again) */
+int
+expRead_d(
+ Tcl_Interp *interp,
+ ExpState *(esPtrs[]), /* If 0, then esPtrOut already known and set */
+ int esPtrsMax, /* number of esPtrs */
+ ExpState **esPtrOut, /* Out variable to leave new ExpState. */
+ double timeout,
+ int key)
+{
+ ExpState *esPtr;
+
+ int size;
+ int cc;
+ int write_count;
+ int tcl_set_flags; /* if we have to discard chars, this tells */
+ /* whether to show user locally or globally */
+
+ if (esPtrs == 0) {
+ /* we already know the ExpState, just find out what happened */
+ cc = exp_get_next_event_info(interp,*esPtrOut);
+ tcl_set_flags = TCL_GLOBAL_ONLY;
+ } else {
+ cc = exp_get_next_event_d(interp,esPtrs,esPtrsMax,esPtrOut,timeout,key);
+ tcl_set_flags = 0;
+ }
+
+ esPtr = *esPtrOut;
+
+ if (cc == EXP_DATA_NEW) {
+ /* try to read it */
+ cc = expIRead_d(interp,esPtr,timeout,tcl_set_flags);
+
+ /* the meaning of 0 from i_read means eof. Muck with it a */
+ /* little, so that from now on it means "no new data arrived */
+ /* but it should be looked at again anyway". */
+ if (cc == 0) {
+ cc = EXP_EOF;
+ } else if (cc > 0) {
+ /* successfully read data */
+ } else {
+ /* failed to read data - some sort of error was encountered such as
+ * an interrupt with that forced an error return
+ */
+ }
+ } else if (cc == EXP_DATA_OLD) {
+ cc = 0;
+ } else if (cc == EXP_RECONFIGURE) {
+ return EXP_RECONFIGURE;
+ }
+
+ if (cc == EXP_ABEOF) { /* abnormal EOF */
+ /* On many systems, ptys produce EIO upon EOF - sigh */
+ if (i_read_errno == EIO) {
+ /* Sun, Cray, BSD, and others */
+ cc = EXP_EOF;
+ } else if (i_read_errno == EINVAL) {
+ /* Solaris 2.4 occasionally returns this */
+ cc = EXP_EOF;
+ } else {
+ if (i_read_errno == EBADF) {
+ exp_error(interp,"bad spawn_id (process died earlier?)");
+ } else {
+ exp_error(interp,"i_read(spawn_id fd=%d): %s",esPtr->fdin,
+ Tcl_PosixError(interp));
+ if (esPtr->close_on_eof) {
+ exp_close(interp,esPtr);
+ }
+ }
+ return(EXP_TCLERROR);
+ /* was goto error; */
+ }
+ }
+
+ /* EOF, TIMEOUT, and ERROR return here */
+ /* In such cases, there is no need to update screen since, if there */
+ /* was prior data read, it would have been sent to the screen when */
+ /* it was read. */
+ if (cc < 0) return (cc);
+
+ /*
+ * update display
+ */
+
+ size = expSizeGet(esPtr);
+ if (size) write_count = size - esPtr->printed;
+ else write_count = 0;
+
+ if (write_count) {
+ /*
+ * Show chars to user if they've requested it, UNLESS they're seeing it
+ * already because they're typing it and tty driver is echoing it.
+ * Also send to Diag and Log if appropriate.
+ */
+ expLogInteractionU(esPtr,esPtr->input.buffer + esPtr->printed, write_count);
+
+ /*
+ * strip nulls from input, since there is no way for Tcl to deal with
+ * such strings. Doing it here lets them be sent to the screen, just
+ * in case they are involved in formatting operations
+ */
+ if (esPtr->rm_nulls) size = expNullStrip(&esPtr->input,esPtr->printed);
+ esPtr->printed = size; /* count'm even if not logging */
+ }
+ return(cc);
+}
/* when buffer fills, copy second half over first and */
/* continue, so we can do matches over multiple buffers */
## -2363,7 +2563,12 ##
/* "!e" means no case matched - transfer by default */
if (!e || e->transfer) {
- int remainder = numchars-match;
+ int remainder;
+ if (match > numchars) {
+ match = numchars;
+ eo->matchlen = match;
+ }
+ remainder = numchars-match;
/* delete matched chars from input buffer */
esPtr->printed -= match;
if (numchars != 0) {
## -2548,6 +2753,11 ##
time_t current_time = 0; /* current time (when we last looked)*/
time_t end_time; /* future time at which to give up */
+ double start_time_total_d; /* time at beginning of this procedure */
+ double start_time_d = 0.; /* time when restart label hit */
+ double current_time_d = 0.; /* current time (when we last looked)*/
+ double end_time_d; /* future time at which to give up */
+
ExpState *last_esPtr; /* for differentiating when multiple f's */
/* to print out better debugging messages */
int last_case; /* as above but for case */
## -2556,8 +2766,9 ##
int key; /* identify this expect command instance */
int configure_count; /* monitor exp_configure_count */
- int timeout; /* seconds */
+ int timeout; /* seconds (or milliseconds if less than -1) */
int remtime; /* remaining time in timeout */
+ double remtime_d; /* remaining time in timeout (milliseconds) */
int reset_timer; /* should timer be reset after continue? */
Tcl_Time temp_time;
Tcl_Obj* new_cmd = NULL;
## -2585,7 +2796,9 ##
Tcl_GetTime (&temp_time);
start_time_total = temp_time.sec;
+ start_time_total_d = temp_time.sec * 1000. + temp_time.usec / 1000.;
start_time = start_time_total;
+ start_time_d = start_time_total_d;
reset_timer = TRUE;
if (&StdinoutPlaceholder == (ExpState *)clientData) {
## -2641,6 +2854,7 ##
else {
Tcl_GetTime (&temp_time);
start_time = temp_time.sec;
+ start_time_d = temp_time.sec * 1000. + temp_time.usec / 1000.;
}
if (eg.timeout_specified_by_flag) {
## -2669,7 +2883,9 ##
if (reset_timer) {
Tcl_GetTime (&temp_time);
current_time = temp_time.sec;
+ current_time_d = temp_time.sec * 1000. + temp_time.usec / 1000.;
end_time = current_time + timeout;
+ end_time_d = current_time_d - timeout;
} else {
reset_timer = TRUE;
}
## -2677,12 +2893,20 ##
/* remtime and current_time updated at bottom of loop */
remtime = timeout;
+ remtime_d = timeout * -1.;
for (;;) {
- if ((timeout != EXP_TIME_INFINITY) && (remtime < 0)) {
+
+ if ((timeout > EXP_TIME_INFINITY) && (remtime < 0)) {
+ cc = EXP_TIMEOUT;
+ } else if ((timeout < EXP_TIME_INFINITY) && (remtime_d < 0.)) {
cc = EXP_TIMEOUT;
} else {
+ if (timeout >= EXP_TIME_INFINITY) {
cc = expRead(interp,esPtrs,mcount,&esPtr,remtime,key);
+ } else {
+ cc = expRead_d(interp,esPtrs,mcount,&esPtr,remtime_d,key);
+ }
}
/*SUPPRESS 530*/
## -2732,7 +2956,9 ##
if (timeout != EXP_TIME_INFINITY) {
Tcl_GetTime (&temp_time);
current_time = temp_time.sec;
+ current_time_d = temp_time.sec * 1000. + temp_time.usec / 1000.;
remtime = end_time - current_time;
+ remtime_d = end_time_d - current_time_d;
}
}

efficiently find the first element matching a bit mask

I have a list of N 64-bit integers whose bits represent small sets. Each integer has at most k bits set to 1. Given a bit mask, I would like to find the first element in the list that matches the mask, i.e. element & mask == element.
Example:
If my list is:
index abcdef
0 001100
1 001010
2 001000
3 000100
4 000010
5 000001
6 010000
7 100000
8 000000
and my mask is 111000, the first element matching the mask is at index 2.
Method 1:
Linear search through the entire list. This takes O(N) time and O(1) space.
Method 2:
Precompute a tree of all possible masks, and at each node keep the answer for that mask. This takes O(1) time for the query, but takes O(2^64) space.
Question:
How can I find the first element matching the mask faster than O(N), while still using a reasonable amount of space? I can afford to spend polynomial time in precomputation, because there will be a lot of queries. The key is that k is small. In my application, k <= 5 and N is in the thousands. The mask has many 1s; you can assume that it is drawn uniformly from the space of 64-bit integers.
Update:
Here is an example data set and a simple benchmark program that runs on Linux: http://up.thirld.com/binmask.tar.gz. For large.in, N=3779 and k=3. The first line is N, followed by N unsigned 64-bit ints representing the elements. Compile with make. Run with ./benchmark.e >large.out to create the true output, which you can then diff against. (Masks are generated randomly, but the random seed is fixed.) Then replace the find_first() function with your implementation.
The simple linear search is much faster than I expected. This is because k is small, and so for a random mask, a match is found very quickly on average.
A suffix tree (on bits) will do the trick, with the original priority at the leaf nodes:
000000 -> 8
1 -> 5
10 -> 4
100 -> 3
1000 -> 2
10 -> 1
100 -> 0
10000 -> 6
100000 -> 7
where if the bit is set in the mask, you search both arms, and if not, you search only the 0 arm; your answer is the minimum number you encounter at a leaf node.
You can improve this (marginally) by traversing the bits not in order but by maximum discriminability; in your example, note that 3 elements have bit 2 set, so you would create
2:0 0:0 1:0 3:0 4:0 5:0 -> 8
5:1 -> 5
4:1 5:0 -> 4
3:1 4:0 5:0 -> 3
1:1 3:0 4:0 5:0 -> 6
0:1 1:0 3:0 4:0 5:0 -> 7
2:1 0:0 1:0 3:0 4:0 5:0 -> 2
4:1 5:0 -> 1
3:1 4:0 5:0 -> 0
In your example mask this doesn't help (since you have to traverse both the bit2==0 and bit2==1 sides since your mask is set in bit 2), but on average it will improve the results (but at a cost of setup and more complex data structure). If some bits are much more likely to be set than others, this could be a huge win. If they're pretty close to random within the element list, then this doesn't help at all.
If you're stuck with essentially random bits set, you should get about (1-5/64)^32 benefit from the suffix tree approach on average (13x speedup), which might be better than the difference in efficiency due to using more complex operations (but don't count on it--bit masks are fast). If you have a nonrandom distribution of bits in your list, then you could do almost arbitrarily well.
This is the bitwise Kd-tree. It typically needs less than 64 visits per lookup operation. Currently, the selection of the bit (dimension) to pivot on is random.
#include <limits.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
typedef unsigned long long Thing;
typedef unsigned long Number;
unsigned thing_ffs(Thing mask);
Thing rand_mask(unsigned bitcnt);
#define WANT_RANDOM 31
#define WANT_BITS 3
#define BITSPERTHING (CHAR_BIT*sizeof(Thing))
#define NONUMBER ((Number)-1)
struct node {
Thing value;
Number num;
Number nul;
Number one;
char pivot;
} *nodes = NULL;
unsigned nodecount=0;
unsigned itercount=0;
struct node * nodes_read( unsigned *sizp, char *filename);
Number *find_ptr_to_insert(Number *ptr, Thing value, Thing mask);
unsigned grab_matches(Number *result, Number num, Thing mask);
void initialise_stuff(void);
int main (int argc, char **argv)
{
Thing mask;
Number num;
unsigned idx;
srand (time(NULL));
nodes = nodes_read( &nodecount, argv[1]);
fprintf( stdout, "Nodecount=%u\n", nodecount );
initialise_stuff();
#if WANT_RANDOM
mask = nodes[nodecount/2].value | nodes[nodecount/3].value ;
#else
mask = 0x38;
#endif
fprintf( stdout, "\n#### Search mask=%llx\n", (unsigned long long) mask );
itercount = 0;
num = NONUMBER;
idx = grab_matches(&num,0, mask);
fprintf( stdout, "Itercount=%u\n", itercount );
fprintf(stdout, "KdTree search %16llx\n", (unsigned long long) mask );
fprintf(stdout, "Count=%u Result:\n", idx);
idx = num;
if (idx >= nodecount) idx = nodecount-1;
fprintf( stdout, "num=%4u Value=%16llx\n"
,(unsigned) nodes[idx].num
,(unsigned long long) nodes[idx].value
);
fprintf( stdout, "\nLinear search %16llx\n", (unsigned long long) mask );
for (idx = 0; idx < nodecount; idx++) {
if ((nodes[idx].value & mask) == nodes[idx].value) break;
}
fprintf(stdout, "Cnt=%u\n", idx);
if (idx >= nodecount) idx = nodecount-1;
fprintf(stdout, "Num=%4u Value=%16llx\n"
, (unsigned) nodes[idx].num
, (unsigned long long) nodes[idx].value );
return 0;
}
void initialise_stuff(void)
{
unsigned num;
Number root, *ptr;
root = 0;
for (num=0; num < nodecount; num++) {
nodes[num].num = num;
nodes[num].one = NONUMBER;
nodes[num].nul = NONUMBER;
nodes[num].pivot = -1;
}
nodes[num-1].value = 0; /* last node is guaranteed to match anything */
root = 0;
for (num=1; num < nodecount; num++) {
ptr = find_ptr_to_insert (&root, nodes[num].value, 0ull );
if (*ptr == NONUMBER) *ptr = num;
else fprintf(stderr, "Found %u for %u\n"
, (unsigned)*ptr, (unsigned) num );
}
}
Thing rand_mask(unsigned bitcnt)
{struct node * nodes_read( unsigned *sizp, char *filename)
{
struct node *ptr;
unsigned size,used;
FILE *fp;
if (!filename) {
size = (WANT_RANDOM+0) ? WANT_RANDOM : 9;
ptr = malloc (size * sizeof *ptr);
#if (!WANT_RANDOM)
ptr[0].value = 0x0c;
ptr[1].value = 0x0a;
ptr[2].value = 0x08;
ptr[3].value = 0x04;
ptr[4].value = 0x02;
ptr[5].value = 0x01;
ptr[6].value = 0x10;
ptr[7].value = 0x20;
ptr[8].value = 0x00;
#else
for (used=0; used < size; used++) {
ptr[used].value = rand_mask(WANT_BITS);
}
#endif /* WANT_RANDOM */
*sizp = size;
return ptr;
}
fp = fopen( filename, "r" );
if (!fp) return NULL;
fscanf(fp,"%u\n", &size );
fprintf(stderr, "Size=%u\n", size);
ptr = malloc (size * sizeof *ptr);
for (used = 0; used < size; used++) {
fscanf(fp,"%llu\n", &ptr[used].value );
}
fclose( fp );
*sizp = used;
return ptr;
}
Thing value = 0;
unsigned bit, cnt;
for (cnt=0; cnt < bitcnt; cnt++) {
bit = 54321*rand();
bit %= BITSPERTHING;
value |= 1ull << bit;
}
return value;
}
Number *find_ptr_to_insert(Number *ptr, Thing value, Thing done)
{
Number num=NONUMBER;
while ( *ptr != NONUMBER) {
Thing wrong;
num = *ptr;
wrong = (nodes[num].value ^ value) & ~done;
if (nodes[num].pivot < 0) { /* This node is terminal */
/* choose one of the wrong bits for a pivot .
** For this bit (nodevalue==1 && searchmask==0 )
*/
if (!wrong) wrong = ~done ;
nodes[num].pivot = thing_ffs( wrong );
}
ptr = (wrong & 1ull << nodes[num].pivot) ? &nodes[num].nul : &nodes[num].one;
/* Once this bit has been tested, it can be masked off. */
done |= 1ull << nodes[num].pivot ;
}
return ptr;
}
unsigned grab_matches(Number *result, Number num, Thing mask)
{
Thing wrong;
unsigned count;
for (count=0; num < *result; ) {
itercount++;
wrong = nodes[num].value & ~mask;
if (!wrong) { /* we have a match */
if (num < *result) { *result = num; count++; }
/* This is cheap pruning: the break will omit both subtrees from the results.
** But because we already have a result, and the subtrees have higher numbers
** than our current num, we can ignore them. */
break;
}
if (nodes[num].pivot < 0) { /* This node is terminal */
break;
}
if (mask & 1ull << nodes[num].pivot) {
/* avoid recursion if there is only one non-empty subtree */
if (nodes[num].nul >= *result) { num = nodes[num].one; continue; }
if (nodes[num].one >= *result) { num = nodes[num].nul; continue; }
count += grab_matches(result, nodes[num].nul, mask);
count += grab_matches(result, nodes[num].one, mask);
break;
}
mask |= 1ull << nodes[num].pivot;
num = (wrong & 1ull << nodes[num].pivot) ? nodes[num].nul : nodes[num].one;
}
return count;
}
unsigned thing_ffs(Thing mask)
{
unsigned bit;
#if 1
if (!mask) return (unsigned)-1;
for ( bit=random() % BITSPERTHING; 1 ; bit += 5, bit %= BITSPERTHING) {
if (mask & 1ull << bit ) return bit;
}
#elif 0
for (bit =0; bit < BITSPERTHING; bit++ ) {
if (mask & 1ull <<bit) return bit;
}
#else
mask &= (mask-1); // Kernighan-trick
for (bit =0; bit < BITSPERTHING; bit++ ) {
mask >>=1;
if (!mask) return bit;
}
#endif
return 0xffffffff;
}
struct node * nodes_read( unsigned *sizp, char *filename)
{
struct node *ptr;
unsigned size,used;
FILE *fp;
if (!filename) {
size = (WANT_RANDOM+0) ? WANT_RANDOM : 9;
ptr = malloc (size * sizeof *ptr);
#if (!WANT_RANDOM)
ptr[0].value = 0x0c;
ptr[1].value = 0x0a;
ptr[2].value = 0x08;
ptr[3].value = 0x04;
ptr[4].value = 0x02;
ptr[5].value = 0x01;
ptr[6].value = 0x10;
ptr[7].value = 0x20;
ptr[8].value = 0x00;
#else
for (used=0; used < size; used++) {
ptr[used].value = rand_mask(WANT_BITS);
}
#endif /* WANT_RANDOM */
*sizp = size;
return ptr;
}
fp = fopen( filename, "r" );
if (!fp) return NULL;
fscanf(fp,"%u\n", &size );
fprintf(stderr, "Size=%u\n", size);
ptr = malloc (size * sizeof *ptr);
for (used = 0; used < size; used++) {
fscanf(fp,"%llu\n", &ptr[used].value );
}
fclose( fp );
*sizp = used;
return ptr;
}
UPDATE:
I experimented a bit with the pivot-selection, favouring bits with the highest discriminatory value ("information content"). This involves:
making a histogram of the usage of bits (can be done while initialising)
while building the tree: choosing the one with frequency closest to 1/2 in the remaining subtrees.
The result: the random pivot selection performed better.
Construct a a binary tree as follows:
Every level corresponds to a bit
It corresponding bit is on go right, otherwise left
This way insert every number in the database.
Now, for searching: if the corresponding bit in the mask is 1, traverse both children. If it is 0, traverse only the left node. Essentially keep traversing the tree until you hit the leaf node (BTW, 0 is a hit for every mask!).
This tree will have O(N) space requirements.
Eg of tree for 1 (001), 2(010) and 5 (101)
root
/ \
0 1
/ \ |
0 1 0
| | |
1 0 1
(1) (2) (5)
With precomputed bitmasks. Formally is is still O(N), since the and-mask operations are O(N). The final pass is also O(N), because it needs to find the lowest bit set, but that could be sped up, too.
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* For demonstration purposes.
** In reality, this should be an unsigned long long */
typedef unsigned char Thing;
#define BITSPERTHING (CHAR_BIT*sizeof (Thing))
#define COUNTOF(a) (sizeof a / sizeof a[0])
Thing data[] =
/****** index abcdef */
{ 0x0c /* 0 001100 */
, 0x0a /* 1 001010 */
, 0x08 /* 2 001000 */
, 0x04 /* 3 000100 */
, 0x02 /* 4 000010 */
, 0x01 /* 5 000001 */
, 0x10 /* 6 010000 */
, 0x20 /* 7 100000 */
, 0x00 /* 8 000000 */
};
/* Note: this is for demonstration purposes.
** Normally, one should choose a machine wide unsigned int
** for bitmask arrays.
*/
struct bitmap {
char data[ 1+COUNTOF (data)/ CHAR_BIT ];
} nulmaps [ BITSPERTHING ];
#define BITSET(a,i) (a)[(i) / CHAR_BIT ] |= (1u << ((i)%CHAR_BIT) )
#define BITTEST(a,i) ((a)[(i) / CHAR_BIT ] & (1u << ((i)%CHAR_BIT) ))
void init_tabs(void);
void map_empty(struct bitmap *dst);
void map_full(struct bitmap *dst);
void map_and2(struct bitmap *dst, struct bitmap *src);
int main (void)
{
Thing mask;
struct bitmap result;
unsigned ibit;
mask = 0x38;
init_tabs();
map_full(&result);
for (ibit = 0; ibit < BITSPERTHING; ibit++) {
/* bit in mask is 1, so bit at this position is in fact a don't care */
if (mask & (1u <<ibit)) continue;
/* bit in mask is 0, so we can only select items with a 0 at this bitpos */
map_and2(&result, &nulmaps[ibit] );
}
/* This is not the fastest way to find the lowest 1 bit */
for (ibit = 0; ibit < COUNTOF (data); ibit++) {
if (!BITTEST(result.data, ibit) ) continue;
fprintf(stdout, " %u", ibit);
}
fprintf( stdout, "\n" );
return 0;
}
void init_tabs(void)
{
unsigned ibit, ithing;
/* 1 bits in data that dont overlap with 1 bits in the searchmask are showstoppers.
** So, for each bitpos, we precompute a bitmask of all *entrynumbers* from data[], that contain 0 in bitpos.
*/
memset(nulmaps, 0 , sizeof nulmaps);
for (ithing=0; ithing < COUNTOF(data); ithing++) {
for (ibit=0; ibit < BITSPERTHING; ibit++) {
if ( data[ithing] & (1u << ibit) ) continue;
BITSET(nulmaps[ibit].data, ithing);
}
}
}
/* Logical And of two bitmask arrays; simular to dst &= src */
void map_and2(struct bitmap *dst, struct bitmap *src)
{
unsigned idx;
for (idx = 0; idx < COUNTOF(dst->data); idx++) {
dst->data[idx] &= src->data[idx] ;
}
}
void map_empty(struct bitmap *dst)
{
memset(dst->data, 0 , sizeof dst->data);
}
void map_full(struct bitmap *dst)
{
unsigned idx;
/* NOTE this loop sets too many bits to the left of COUNTOF(data) */
for (idx = 0; idx < COUNTOF(dst->data); idx++) {
dst->data[idx] = ~0;
}
}

Resources