After I have defined and filled the buffer from binary .exe data --
unsigned char *buffer ; /*buffer*/
buffer = malloc(300) ; /*allocate space on heap*/
fread(buffer, 300, 1, file) ;
Then how do I get bytes at position 121--124 of buffer
as a long value?
I have tried
long Hint = 0;
memcpy(Hint, buffer[121], 4);
printf("Hint=x%x\n", Hint);
but all I get is an abend on memcpy
Here is a simple way to do that (I put numbers in buffer for the example):
unsigned char *buffer ; /*buffer*/
buffer = (unsigned char*) malloc (300) ; /*allocate space on heap*/
for(int i=0;i<300;i++) /*initialize buffer with numbers for the demo*/
buffer[i] = i;
long Hint = 0;
long *h = (long *)&buffer[121];
Hint = *h;
printf("Hint=0x%x\n", Hint);
The output for this will be:
Hint=0x7c7b7a79
Which is the numbers 121-124 in hex.
I am working on C++ code to read and write .bmp image.
Below is my code.
However, I encountered some problems that I couldn't fix. enter image description here
I have googled a lot but none of them solve my problem.
Sorry if my coding style doesn't look good to you, I'm new to Xcode and C++.
Please help me.
I will be really appreciated.
#include <iostream>
#include <stdio.h>
#pragma pack(2)
typedef struct // BMP file header structure
{
unsigned short bfType ; // Magic number for file
unsigned int bfSize ; // Size of file
unsigned short bfReserved1 ; // Reserved, usually set to 0
unsigned short bfReserved2 ; // Reserved, usually set to 0
unsigned int bfoffBits ; // Offset to bitmap data
}BITMAPFILEHEADER;
#pragma pack()
typedef struct
{
unsigned int biSize ; // Size of info header
int biWidth ; // Width of image
int biHeight ; // Height of image
unsigned short biPlanes ; // Number of color planes
unsigned short biBitCount ; // Number of bits per pixel
unsigned int biCompression ; // Type of compression to use, 0 if there is no compression
unsigned int biSizeImage ; // Size of image data
int biXPelsPerMeter ; // X pixels per meter
int biYPelsPerMeter ; // Y pixels per meter
unsigned int biClrUsed ; // Number of color used
unsigned int biClrImportant ; // Number of important color
}BITMAPINFOHEADER;
unsigned char *ReadBitmapFile(const char *filename, BITMAPINFOHEADER *bitmapInfoHeader)
{
FILE* file ; //file pointer
BITMAPFILEHEADER bitmapFileHeader ; //bitmap file header
unsigned char *bitmapimage ; //store image data
int imageIdx = 0 ;
unsigned char tempRGB ; //swap
// open file in read binary mode
file = fopen(filename, "rb");
if (file == NULL)
return NULL;
// read the bitmap file header
fread(&bitmapFileHeader, sizeof(BITMAPFILEHEADER), 1, file);
// read the bitmap info header
fread(bitmapInfoHeader, sizeof(BITMAPINFOHEADER),1,file);
//move file point to the begging of bitmap data
fseek(file, bitmapFileHeader.bfoffBits, SEEK_SET);
//allocate enough memory for the bitmap image data
bitmapimage = (unsigned char*)malloc(bitmapInfoHeader->biSizeImage);
//verify memory allocation
if(!bitmapimage)
{
free(bitmapimage);
fclose(file);
return NULL;
}
// read in the bitmap image data
fread(bitmapimage, bitmapInfoHeader->biSizeImage, 1, file);
//swap the r and b value to get RGB (bitmap is BGR)
for (imageIdx = 0; imageIdx < bitmapInfoHeader->biSizeImage; imageIdx+=3){
tempRGB = bitmapimage[imageIdx];
bitmapimage[imageIdx] = bitmapimage[imageIdx + 2];
bitmapimage[imageIdx + 2] = tempRGB;
}
//close file and return bitmap data
fclose(file);
return bitmapimage;
}
BITMAPINFOHEADER bitmapInfoHeader;
unsigned char *bitmapData = ReadBitmapFile("input1.bmp", &bitmapInfoHeader);
By the way, I am using Xcode8.3.3
I am trying to print message on serial terminal from p89v664 using following code,
#include<P89V66x.H>
#include<stdio.h>
char putchar(char c) {
if (c == '\n') {
while (!TI);
TI = 0;
S0BUF = 0x0d;
}
TI = 0;
S0BUF = c;
while (!TI);
return c;
}
int printf(char*str) {
unsigned int cnt = 0;
while(*str != '\0')
{
putchar(*str);
cnt++;
str++;
}
}
void delay(unsigned int i) {
int d = 100;
for(;i!=0;i--) {
for(;d!=0;d--);
d = 100;
}
}
int main(void) {
/**Serial init*/
S0CON = 0x50; /* SCON: mode 1, 8-bit UART, enable rcvr */
TMOD |= 0x20; /* TMOD: timer 1, mode 2, 8-bit reload */
TH1 = 0xF6; /* TH1: reload value for 9600 baud */
TR1 = 1; /* TR1: timer 1 run */
TI = 1;
while(1) {
printf("Hello\n");
delay(300);
printf("Hello World\n");
delay(10000);
}
}
above program works fine till the time printf function definition in this program is not commented.
If printf function in above program is commented to use printf from standard library then junk characters are printed on serial console. (i used putty).
I used Keil uVision V4.14.4.0 compiler.
Is there anything missing?
I dont understand what is wrong with this program.
After some experiments i found that problem was with keil uVision4 evaluation version.
I compiled this code using sdcc and ran it and it worked. May be keil evaluation version's limitation was creating problem. Thank very much you Mellowcandle for all replies.
Edit:
#include <P89V66x.H>
#include<stdio.h>
void putchar(char c) {
TI = 0;
S0BUF = c;
if (c == '\n') {
while (!TI);
TI = 0;
S0BUF = 0x0d;
}
while (!TI);
}
int main(void) {
/**Serial init*/
unsigned short int c = 65334;
S0CON = 0x50; /* SCON: mode 1, 8-bit UART, enable rcvr */
TMOD |= 0x20; /* TMOD: timer 1, mode 2, 8-bit reload */
/**For 11.0592 crystal
value should TH = -3 or
TH1 = FD*/
TH1 = 0xF6; /* TH1: reload value for 9600 baud for
18 Mhz cyrstal */
TR1 = 1; /* TR1: timer 1 run */
while(1) {
printf("Hello %u\n", c);
delay(300);
printf("Hello World %u\n" ,c);
delay(10000);
}
}
command used to compile this code is,
sdcc {filename}
I am trying to convert a rgba buffer into argb, is there any way to improve the next algorithm, or any other faster way to perform such operation?
Taking into account that the alpha value is not important once in the argb buffer, and should always end up as 0xFF.
int y, x, pixel;
for (y = 0; y < height; y++)
{
for (x = 0; x < width; x++)
{
pixel = rgbaBuffer[y * width + x];
argbBuffer[(height - y - 1) * width + x] = (pixel & 0xff00ff00) | ((pixel << 16) & 0x00ff0000) | ((pixel >> 16) & 0xff);
}
}
I will focus only in the swap function:
typedef unsigned int Color32;
inline Color32 Color32Reverse(Color32 x)
{
return
// Source is in format: 0xAARRGGBB
((x & 0xFF000000) >> 24) | //______AA
((x & 0x00FF0000) >> 8) | //____RR__
((x & 0x0000FF00) << 8) | //__GG____
((x & 0x000000FF) << 24); //BB______
// Return value is in format: 0xBBGGRRAA
}
Assuming that the code is not buggy (just inefficient), I can guess that all you want to do is swap every second (even-numbered) byte (and of course invert the buffer), isn't it?
So you can achieve some optimizations by:
Avoiding the shift and masking operations
Optimizing the loop, eg economizing in the indices calculations
I would rewrite the code as follows:
int y, x;
for (y = 0; y < height; y++)
{
unsigned char *pRGBA= (unsigned char *)(rgbaBuffer+y*width);
unsigned char *pARGB= (unsigned char *)(argbBuffer+(height-y-1)*width);
for (x = 4*(width-1); x>=0; x-=4)
{
pARGB[x ] = pRGBA[x+2];
pARGB[x+1] = pRGBA[x+1];
pARGB[x+2] = pRGBA[x ];
pARGB[x+3] = 0xFF;
}
}
Please note that the more complex indices calculation is performed in the outer loop only. There are four acesses to both rgbaBuffer and argbBuffer for each pixel, but I think this is more than offset by avoiding the bitwise operations and the indixes calculations. An alternative would be (like in your code) fetch/store one pixel (int) at a time, and make the processing locally (this econimizes in memory accesses), but unless you have some efficient way to swap the two bytes and set the alpha locally (eg some inline assembly, so that you make sure that everything is performed at registers level), it won't really help.
Code you provided is very strange since it shuffles color components not rgba->argb, but rgba->rabg.
I've made a correct and optimized version of this routine.
int pixel;
int size = width * height;
for (unsigned int * rgba_ptr = rgbaBuffer, * argb_ptr = argbBuffer + size - 1; argb_ptr >= argbBuffer; rgba_ptr++, argb_ptr--)
{
// *argb_ptr = *rgba_ptr >> 8 | 0xff000000; // - this version doesn't change endianess
*argb_ptr = __builtin_bswap32(*rgba_ptr) >> 8 | 0xff000000; // This does
}
The first thing i've made is simplifying your shuffling expression. It is obvious that XRGB is just RGBA >> 8.
Also i've removed calculation of array index on each iteration and used pointers as loop variables.
This version is about 2 times faster than the original on my machine.
You can also use SSE for shuffling if this code is intended for x86 CPU.
I am very late to this one. But I had the exact same problem when generating video on the fly. By reusing the buffer, I could get away with only setting the R, G, B values for every frame and only setting the A once.
See below code:
byte[] _workingBuffer = null;
byte[] GetProcessedPixelData(SKBitmap bitmap)
{
ReadOnlySpan<byte> sourceSpan = bitmap.GetPixelSpan();
if (_workingBuffer == null || _workingBuffer.Length != bitmap.ByteCount)
{
// Alloc buffer
_workingBuffer = new byte[sourceSpan.Length];
// Set all the alpha
for (int i = 0; i < sourceSpan.Length; i += 4) _workingBuffer[i] = byte.MaxValue;
}
Stopwatch w = Stopwatch.StartNew();
for (int i = 0; i < sourceSpan.Length; i += 4)
{
// A
// Dont set alpha here. The alpha is already set in the buffer
//_workingBuffer[i] = byte.MaxValue;
//_workingBuffer[i] = sourceSpan[i + 3];
// R
_workingBuffer[i + 1] = sourceSpan[i];
// G
_workingBuffer[i + 2] = sourceSpan[i + 1];
// B
_workingBuffer[i + 3] = sourceSpan[i + 2];
}
Debug.Print("Copied " + sourceSpan.Length + " in " + w.Elapsed.TotalMilliseconds);
return _workingBuffer;
}
This got me to around 15 milliseconds on an iPhone for a (1920 * 1080 * 4) buffer which is ~8mb.
This was not nearly enough for me. My final solution was instead to do a offset memcopy (Buffer.BlockCopy in C#) since the alpha is not important.
byte[] _workingBuffer = null;
byte[] GetProcessedPixelData(SKBitmap bitmap)
{
ReadOnlySpan<byte> sourceSpan = bitmap.GetPixelSpan();
byte[] sourceArray = sourceSpan.ToArray();
if (_workingBuffer == null || _workingBuffer.Length != bitmap.ByteCount)
{
// Alloc buffer
_workingBuffer = new byte[sourceSpan.Length];
// Set first byte. This is the alpha component of the first pixel
_workingBuffer[0] = byte.MaxValue;
}
// Converts RGBA to ARGB in ~2 ms instead of ~15 ms
//
// Copies the whole buffer with a offset of 1
// R G B A R G B A R G B A
// Originally the source buffer has: R1, G1, B1, A1, R2, G2, B2, A2, R3, G3, B3, A3
// A R G B A R G B A R G B A
// After the copy it looks like: 0, R1, G1, B1, A1, R2, G2, B2, A2, R3, G3, B3, A3
// So essentially we get the wrong alpha for every pixel. But all alphas should be 255 anyways.
// The first byte is set in the alloc
Buffer.BlockCopy(sourceArray, 0, _workingBuffer, 1, sourceSpan.Length - 1);
// Below is an inefficient method of converting RGBA to ARGB. Takes ~15 ms on iPhone 12 Pro Max for a 8mb buffer (1920 * 1080 * 4 bytes)
/*
for (int i = 0; i < sourceSpan.Length; i += 4)
{
// A
// Dont set alpha here. The alpha is already set in the buffer
//_workingBuffer[i] = byte.MaxValue;
//_workingBuffer[i] = sourceSpan[i + 3];
byte sR = sourceSpan[i];
byte sG = sourceSpan[i + 1];
byte sB = sourceSpan[i + 2];
if (sR == 0 && sG == byte.MaxValue && sB == 0)
continue;
// R
_workingBuffer[i + 1] = sR;
// G
_workingBuffer[i + 2] = sG;
// B
_workingBuffer[i + 3] = sB;
}
*/
return _workingBuffer;
}
The code is commented on how this works. On my same iPhone it takes ~2 ms which is sufficient for my use case.
Use assembly, the following is for Intel.
This example swaps Red and Blue.
void* b = pixels;
UINT len = textureWidth*textureHeight;
__asm
{
mov ecx, len // Set loop counter to pixels memory block size
mov ebx, b // Set ebx to pixels pointer
label:
mov al,[ebx+0] // Load Red to al
mov ah,[ebx+2] // Load Blue to ah
mov [ebx+0],ah // Swap Red
mov [ebx+2],al // Swap Blue
add ebx,4 // Move by 4 bytes to next pixel
dec ecx // Decrease loop counter
jnz label // If not zero jump to label
}
(pixel << 24) | (pixel >> 8) rotates a 32-bit integer 8 bits to the right, which would convert a 32-bit RGBA value to ARGB. This works because:
pixel << 24 discards the RGB portion of RGBA off the left side, resulting in A000.
pixel >> 8 discards the A portion of RGBA off the right side, resulting in 0RGB.
A000 | 0RGB == ARGB.
I have a list of N 64-bit integers whose bits represent small sets. Each integer has at most k bits set to 1. Given a bit mask, I would like to find the first element in the list that matches the mask, i.e. element & mask == element.
Example:
If my list is:
index abcdef
0 001100
1 001010
2 001000
3 000100
4 000010
5 000001
6 010000
7 100000
8 000000
and my mask is 111000, the first element matching the mask is at index 2.
Method 1:
Linear search through the entire list. This takes O(N) time and O(1) space.
Method 2:
Precompute a tree of all possible masks, and at each node keep the answer for that mask. This takes O(1) time for the query, but takes O(2^64) space.
Question:
How can I find the first element matching the mask faster than O(N), while still using a reasonable amount of space? I can afford to spend polynomial time in precomputation, because there will be a lot of queries. The key is that k is small. In my application, k <= 5 and N is in the thousands. The mask has many 1s; you can assume that it is drawn uniformly from the space of 64-bit integers.
Update:
Here is an example data set and a simple benchmark program that runs on Linux: http://up.thirld.com/binmask.tar.gz. For large.in, N=3779 and k=3. The first line is N, followed by N unsigned 64-bit ints representing the elements. Compile with make. Run with ./benchmark.e >large.out to create the true output, which you can then diff against. (Masks are generated randomly, but the random seed is fixed.) Then replace the find_first() function with your implementation.
The simple linear search is much faster than I expected. This is because k is small, and so for a random mask, a match is found very quickly on average.
A suffix tree (on bits) will do the trick, with the original priority at the leaf nodes:
000000 -> 8
1 -> 5
10 -> 4
100 -> 3
1000 -> 2
10 -> 1
100 -> 0
10000 -> 6
100000 -> 7
where if the bit is set in the mask, you search both arms, and if not, you search only the 0 arm; your answer is the minimum number you encounter at a leaf node.
You can improve this (marginally) by traversing the bits not in order but by maximum discriminability; in your example, note that 3 elements have bit 2 set, so you would create
2:0 0:0 1:0 3:0 4:0 5:0 -> 8
5:1 -> 5
4:1 5:0 -> 4
3:1 4:0 5:0 -> 3
1:1 3:0 4:0 5:0 -> 6
0:1 1:0 3:0 4:0 5:0 -> 7
2:1 0:0 1:0 3:0 4:0 5:0 -> 2
4:1 5:0 -> 1
3:1 4:0 5:0 -> 0
In your example mask this doesn't help (since you have to traverse both the bit2==0 and bit2==1 sides since your mask is set in bit 2), but on average it will improve the results (but at a cost of setup and more complex data structure). If some bits are much more likely to be set than others, this could be a huge win. If they're pretty close to random within the element list, then this doesn't help at all.
If you're stuck with essentially random bits set, you should get about (1-5/64)^32 benefit from the suffix tree approach on average (13x speedup), which might be better than the difference in efficiency due to using more complex operations (but don't count on it--bit masks are fast). If you have a nonrandom distribution of bits in your list, then you could do almost arbitrarily well.
This is the bitwise Kd-tree. It typically needs less than 64 visits per lookup operation. Currently, the selection of the bit (dimension) to pivot on is random.
#include <limits.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
typedef unsigned long long Thing;
typedef unsigned long Number;
unsigned thing_ffs(Thing mask);
Thing rand_mask(unsigned bitcnt);
#define WANT_RANDOM 31
#define WANT_BITS 3
#define BITSPERTHING (CHAR_BIT*sizeof(Thing))
#define NONUMBER ((Number)-1)
struct node {
Thing value;
Number num;
Number nul;
Number one;
char pivot;
} *nodes = NULL;
unsigned nodecount=0;
unsigned itercount=0;
struct node * nodes_read( unsigned *sizp, char *filename);
Number *find_ptr_to_insert(Number *ptr, Thing value, Thing mask);
unsigned grab_matches(Number *result, Number num, Thing mask);
void initialise_stuff(void);
int main (int argc, char **argv)
{
Thing mask;
Number num;
unsigned idx;
srand (time(NULL));
nodes = nodes_read( &nodecount, argv[1]);
fprintf( stdout, "Nodecount=%u\n", nodecount );
initialise_stuff();
#if WANT_RANDOM
mask = nodes[nodecount/2].value | nodes[nodecount/3].value ;
#else
mask = 0x38;
#endif
fprintf( stdout, "\n#### Search mask=%llx\n", (unsigned long long) mask );
itercount = 0;
num = NONUMBER;
idx = grab_matches(&num,0, mask);
fprintf( stdout, "Itercount=%u\n", itercount );
fprintf(stdout, "KdTree search %16llx\n", (unsigned long long) mask );
fprintf(stdout, "Count=%u Result:\n", idx);
idx = num;
if (idx >= nodecount) idx = nodecount-1;
fprintf( stdout, "num=%4u Value=%16llx\n"
,(unsigned) nodes[idx].num
,(unsigned long long) nodes[idx].value
);
fprintf( stdout, "\nLinear search %16llx\n", (unsigned long long) mask );
for (idx = 0; idx < nodecount; idx++) {
if ((nodes[idx].value & mask) == nodes[idx].value) break;
}
fprintf(stdout, "Cnt=%u\n", idx);
if (idx >= nodecount) idx = nodecount-1;
fprintf(stdout, "Num=%4u Value=%16llx\n"
, (unsigned) nodes[idx].num
, (unsigned long long) nodes[idx].value );
return 0;
}
void initialise_stuff(void)
{
unsigned num;
Number root, *ptr;
root = 0;
for (num=0; num < nodecount; num++) {
nodes[num].num = num;
nodes[num].one = NONUMBER;
nodes[num].nul = NONUMBER;
nodes[num].pivot = -1;
}
nodes[num-1].value = 0; /* last node is guaranteed to match anything */
root = 0;
for (num=1; num < nodecount; num++) {
ptr = find_ptr_to_insert (&root, nodes[num].value, 0ull );
if (*ptr == NONUMBER) *ptr = num;
else fprintf(stderr, "Found %u for %u\n"
, (unsigned)*ptr, (unsigned) num );
}
}
Thing rand_mask(unsigned bitcnt)
{struct node * nodes_read( unsigned *sizp, char *filename)
{
struct node *ptr;
unsigned size,used;
FILE *fp;
if (!filename) {
size = (WANT_RANDOM+0) ? WANT_RANDOM : 9;
ptr = malloc (size * sizeof *ptr);
#if (!WANT_RANDOM)
ptr[0].value = 0x0c;
ptr[1].value = 0x0a;
ptr[2].value = 0x08;
ptr[3].value = 0x04;
ptr[4].value = 0x02;
ptr[5].value = 0x01;
ptr[6].value = 0x10;
ptr[7].value = 0x20;
ptr[8].value = 0x00;
#else
for (used=0; used < size; used++) {
ptr[used].value = rand_mask(WANT_BITS);
}
#endif /* WANT_RANDOM */
*sizp = size;
return ptr;
}
fp = fopen( filename, "r" );
if (!fp) return NULL;
fscanf(fp,"%u\n", &size );
fprintf(stderr, "Size=%u\n", size);
ptr = malloc (size * sizeof *ptr);
for (used = 0; used < size; used++) {
fscanf(fp,"%llu\n", &ptr[used].value );
}
fclose( fp );
*sizp = used;
return ptr;
}
Thing value = 0;
unsigned bit, cnt;
for (cnt=0; cnt < bitcnt; cnt++) {
bit = 54321*rand();
bit %= BITSPERTHING;
value |= 1ull << bit;
}
return value;
}
Number *find_ptr_to_insert(Number *ptr, Thing value, Thing done)
{
Number num=NONUMBER;
while ( *ptr != NONUMBER) {
Thing wrong;
num = *ptr;
wrong = (nodes[num].value ^ value) & ~done;
if (nodes[num].pivot < 0) { /* This node is terminal */
/* choose one of the wrong bits for a pivot .
** For this bit (nodevalue==1 && searchmask==0 )
*/
if (!wrong) wrong = ~done ;
nodes[num].pivot = thing_ffs( wrong );
}
ptr = (wrong & 1ull << nodes[num].pivot) ? &nodes[num].nul : &nodes[num].one;
/* Once this bit has been tested, it can be masked off. */
done |= 1ull << nodes[num].pivot ;
}
return ptr;
}
unsigned grab_matches(Number *result, Number num, Thing mask)
{
Thing wrong;
unsigned count;
for (count=0; num < *result; ) {
itercount++;
wrong = nodes[num].value & ~mask;
if (!wrong) { /* we have a match */
if (num < *result) { *result = num; count++; }
/* This is cheap pruning: the break will omit both subtrees from the results.
** But because we already have a result, and the subtrees have higher numbers
** than our current num, we can ignore them. */
break;
}
if (nodes[num].pivot < 0) { /* This node is terminal */
break;
}
if (mask & 1ull << nodes[num].pivot) {
/* avoid recursion if there is only one non-empty subtree */
if (nodes[num].nul >= *result) { num = nodes[num].one; continue; }
if (nodes[num].one >= *result) { num = nodes[num].nul; continue; }
count += grab_matches(result, nodes[num].nul, mask);
count += grab_matches(result, nodes[num].one, mask);
break;
}
mask |= 1ull << nodes[num].pivot;
num = (wrong & 1ull << nodes[num].pivot) ? nodes[num].nul : nodes[num].one;
}
return count;
}
unsigned thing_ffs(Thing mask)
{
unsigned bit;
#if 1
if (!mask) return (unsigned)-1;
for ( bit=random() % BITSPERTHING; 1 ; bit += 5, bit %= BITSPERTHING) {
if (mask & 1ull << bit ) return bit;
}
#elif 0
for (bit =0; bit < BITSPERTHING; bit++ ) {
if (mask & 1ull <<bit) return bit;
}
#else
mask &= (mask-1); // Kernighan-trick
for (bit =0; bit < BITSPERTHING; bit++ ) {
mask >>=1;
if (!mask) return bit;
}
#endif
return 0xffffffff;
}
struct node * nodes_read( unsigned *sizp, char *filename)
{
struct node *ptr;
unsigned size,used;
FILE *fp;
if (!filename) {
size = (WANT_RANDOM+0) ? WANT_RANDOM : 9;
ptr = malloc (size * sizeof *ptr);
#if (!WANT_RANDOM)
ptr[0].value = 0x0c;
ptr[1].value = 0x0a;
ptr[2].value = 0x08;
ptr[3].value = 0x04;
ptr[4].value = 0x02;
ptr[5].value = 0x01;
ptr[6].value = 0x10;
ptr[7].value = 0x20;
ptr[8].value = 0x00;
#else
for (used=0; used < size; used++) {
ptr[used].value = rand_mask(WANT_BITS);
}
#endif /* WANT_RANDOM */
*sizp = size;
return ptr;
}
fp = fopen( filename, "r" );
if (!fp) return NULL;
fscanf(fp,"%u\n", &size );
fprintf(stderr, "Size=%u\n", size);
ptr = malloc (size * sizeof *ptr);
for (used = 0; used < size; used++) {
fscanf(fp,"%llu\n", &ptr[used].value );
}
fclose( fp );
*sizp = used;
return ptr;
}
UPDATE:
I experimented a bit with the pivot-selection, favouring bits with the highest discriminatory value ("information content"). This involves:
making a histogram of the usage of bits (can be done while initialising)
while building the tree: choosing the one with frequency closest to 1/2 in the remaining subtrees.
The result: the random pivot selection performed better.
Construct a a binary tree as follows:
Every level corresponds to a bit
It corresponding bit is on go right, otherwise left
This way insert every number in the database.
Now, for searching: if the corresponding bit in the mask is 1, traverse both children. If it is 0, traverse only the left node. Essentially keep traversing the tree until you hit the leaf node (BTW, 0 is a hit for every mask!).
This tree will have O(N) space requirements.
Eg of tree for 1 (001), 2(010) and 5 (101)
root
/ \
0 1
/ \ |
0 1 0
| | |
1 0 1
(1) (2) (5)
With precomputed bitmasks. Formally is is still O(N), since the and-mask operations are O(N). The final pass is also O(N), because it needs to find the lowest bit set, but that could be sped up, too.
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* For demonstration purposes.
** In reality, this should be an unsigned long long */
typedef unsigned char Thing;
#define BITSPERTHING (CHAR_BIT*sizeof (Thing))
#define COUNTOF(a) (sizeof a / sizeof a[0])
Thing data[] =
/****** index abcdef */
{ 0x0c /* 0 001100 */
, 0x0a /* 1 001010 */
, 0x08 /* 2 001000 */
, 0x04 /* 3 000100 */
, 0x02 /* 4 000010 */
, 0x01 /* 5 000001 */
, 0x10 /* 6 010000 */
, 0x20 /* 7 100000 */
, 0x00 /* 8 000000 */
};
/* Note: this is for demonstration purposes.
** Normally, one should choose a machine wide unsigned int
** for bitmask arrays.
*/
struct bitmap {
char data[ 1+COUNTOF (data)/ CHAR_BIT ];
} nulmaps [ BITSPERTHING ];
#define BITSET(a,i) (a)[(i) / CHAR_BIT ] |= (1u << ((i)%CHAR_BIT) )
#define BITTEST(a,i) ((a)[(i) / CHAR_BIT ] & (1u << ((i)%CHAR_BIT) ))
void init_tabs(void);
void map_empty(struct bitmap *dst);
void map_full(struct bitmap *dst);
void map_and2(struct bitmap *dst, struct bitmap *src);
int main (void)
{
Thing mask;
struct bitmap result;
unsigned ibit;
mask = 0x38;
init_tabs();
map_full(&result);
for (ibit = 0; ibit < BITSPERTHING; ibit++) {
/* bit in mask is 1, so bit at this position is in fact a don't care */
if (mask & (1u <<ibit)) continue;
/* bit in mask is 0, so we can only select items with a 0 at this bitpos */
map_and2(&result, &nulmaps[ibit] );
}
/* This is not the fastest way to find the lowest 1 bit */
for (ibit = 0; ibit < COUNTOF (data); ibit++) {
if (!BITTEST(result.data, ibit) ) continue;
fprintf(stdout, " %u", ibit);
}
fprintf( stdout, "\n" );
return 0;
}
void init_tabs(void)
{
unsigned ibit, ithing;
/* 1 bits in data that dont overlap with 1 bits in the searchmask are showstoppers.
** So, for each bitpos, we precompute a bitmask of all *entrynumbers* from data[], that contain 0 in bitpos.
*/
memset(nulmaps, 0 , sizeof nulmaps);
for (ithing=0; ithing < COUNTOF(data); ithing++) {
for (ibit=0; ibit < BITSPERTHING; ibit++) {
if ( data[ithing] & (1u << ibit) ) continue;
BITSET(nulmaps[ibit].data, ithing);
}
}
}
/* Logical And of two bitmask arrays; simular to dst &= src */
void map_and2(struct bitmap *dst, struct bitmap *src)
{
unsigned idx;
for (idx = 0; idx < COUNTOF(dst->data); idx++) {
dst->data[idx] &= src->data[idx] ;
}
}
void map_empty(struct bitmap *dst)
{
memset(dst->data, 0 , sizeof dst->data);
}
void map_full(struct bitmap *dst)
{
unsigned idx;
/* NOTE this loop sets too many bits to the left of COUNTOF(data) */
for (idx = 0; idx < COUNTOF(dst->data); idx++) {
dst->data[idx] = ~0;
}
}