Incorrect hexadecimal matrix multiplication results - c++11

matrix 1:
0x02 0x03 0x01 0x01
0x01 0x02 0x03 0x01
0x01 0x01 0x02 0x03
0x03 0x01 0x01 0x02
matrix 2:
0x63 0x53 0xe0 0x8c
0x09 0x60 0xe1 0x04
0xcd 0x70 0xb7 0x51
0xba 0xca 0xd0 0xe7
These two matrices are multipled in this function:
void mul(uint8_t state[4][4])
{
for(unsigned short i = 0; i < 4; i++)
{
state [0][i] = byteProduct(0x02 ,state[0][i]) ^ byteProduct(0x03, state[1][i]) ^ state[2][i] ^ state[3][i];
state [1][i] = state[0][i] ^ byteProduct(0x02, state[1][i]) ^ byteProduct(0x03, state[2][i]) ^ state[3][i];
state [2][i] = state[0][i] ^ state[1][i] ^ byteProduct(0x02, state[2][i]) ^ byteProduct(0x03, state[3][i]);
state [3][i] = byteProduct(0x03, state[0][i]) ^ state[1][i] ^ state[2][i] ^ byteProduct(0x02, state[3][i]);
}
}
In this function I've taken matrix 2 column by column and individually multiplied with the values of the rows of matrix 1. And the value should be replaced in the state matrix.
ByteProduct is defined as:
uint8_t byteProduct(uint8_t x, uint8_t y)
{
uint8_t result = 0, temp;
while(x != 0)
{
if((x & 1) != 0)
result ^= y;
temp = y & 0x80;
y <<= 1;
if(temp != 0)
y ^= 0x1b;
x >>= 1;
}
return result;
}
The result should be :
0x5f 0x72 0x64 0x15
0x57 0xf5 0xbc 0x92
0xf7 0xbe 0x3b 0x29
0x1d 0xb9 0xf9 0x1a
But the mamtrix resulting from the function is different from this.
Any solutions to this?
Note that these calculations are performed in GF(2^8) field, hence don't try to use + and * operators, rather ^ is used for + operator and byteProduct() functions returns the multiplication of uint8_ts.
I've performed row x column multiplication.

Do not fill values into one of the matrixes you are still using for calculation.
Make a copy of it first or fill a new matrix.
Otherwise you will compromise the values you still use for calculation.
E.g.
state [0][i] = byteProduct(0x02 ,state[0][i]) ^ byteProduct(0x03, state[1][i]) ^ state[2][i] ^ state[3][i];
state [1][i] = state[0][i] ^ byteProduct(0x02, state[1][i]) ^ byteProduct(0x03, state[2][i]) ^ state[3][i];
The first line already overwrites the value of state [0][i] and then the second line uses it again to calculate the state [1][i].

Related

Zlib crc32 combine endian format

I just go through the code of Zlib CRC32 combine function, but I am confused about the endian of the CRC32 input. Does it only work for big endian ? If I have small endian format, should I do a byte swap first before I use the function ? Thanks in advance.
/* ========================================================================= */
local uLong crc32_combine_(crc1, crc2, len2)
uLong crc1;
uLong crc2;
z_off64_t len2;
{
int n;
unsigned long row;
unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */
unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */
/* degenerate case (also disallow negative lengths) */
if (len2 <= 0)
return crc1;
/* put operator for one zero bit in odd */
odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
row = 1;
for (n = 1; n < GF2_DIM; n++) {
odd[n] = row;
row <<= 1;
}
/* put operator for two zero bits in even */
gf2_matrix_square(even, odd);
/* put operator for four zero bits in odd */
gf2_matrix_square(odd, even);
/* apply len2 zeros to crc1 (first square will put the operator for one
zero byte, eight zero bits, in even) */
do {
/* apply zeros operator for this bit of len2 */
gf2_matrix_square(even, odd);
if (len2 & 1)
crc1 = gf2_matrix_times(even, crc1);
len2 >>= 1;
/* if no more bits set, then done */
if (len2 == 0)
break;
/* another iteration of the loop with odd and even swapped */
gf2_matrix_square(odd, even);
if (len2 & 1)
crc1 = gf2_matrix_times(odd, crc1);
len2 >>= 1;
/* if no more bits set, then done */
} while (len2 != 0);
/* return combined crc */
crc1 ^= crc2;
return crc1;
}
All of zlib works in little or big endian architectures.
There is no "endianess" of the arguments of crc32_combine(). The crc1 and crc2 arguments are passed as 32-bit integers, not sequences of bytes, and so have no endianess.
By the way, there is more recent code for crc32_combine() that is little bit more efficient.

C getting a raw keypress with no stdlib

I am working an a very basic operating system for a learning experience, and I am trying to start with key presses. I am making a freestanding executable, so no standard library. How would I go about taking input from a keyboard? I have figured out how to print to the screen through video memory.
/*
* kernel.c
* */
void cls(int line) { // clear the screen
char *vidptr = (char*) 0xb8000;
/* 25 lines each of 80 columns; each element takes 2 bytes */
unsigned int x = 0;
while (x < 80 * 25 * 2) {
// blank character
vidptr[x] = ' ';
// attribute-byte - light grey on black screen
x += 1;
vidptr[x] = 0x07;
x += 1;
}
line = 0;
}
void printf(const char *str, int line, char attr) { // write a string to video memory
char *vidptr = (char*) 0xb8000;
unsigned int y =0, x = 0;
while (str[y] != '\0') {
// the character's ascii
vidptr[x] = str[y];
x++;
// attribute byte - give character black bg and light gray fg
vidptr[x+1] = attr;
x++;
y++;
}
}
void kmain(void) {
unsigned int line = 0;
cls(line);
printf("Testing the Kernel", line, 0x0a);
}
and my assembly:
;; entry point
bits 32 ; nasm directive - 32 bit
global entry
extern _kmain ; kmain is defined in the c file
section .text
entry: jmp start
;multiboot spec
align 4
dd 0x1BADB002 ; black magic
dd 0x00 ; flags
dd -(0x1BADB002 + 0x00) ; checksum. m+f+c should be zero
start:
cli ; block interrupts
mov esp, stack_space ; set stack pointer
call _kmain
hlt ; halt the CPU
section .bss
resb 8192 ; 8KB for stack
stack_space:

Efficient sse shuffle mask generation for left-packing byte elements

What would be an efficient way to optimize the following code with sse ?
uint16_t change1= ... ;
uint8_t* pSrc = ... ;
uint8_t* pDest = ... ;
if(change1 & 0x0001) *pDest++ = pSrc[0];
if(change1 & 0x0002) *pDest++ = pSrc[1];
if(change1 & 0x0004) *pDest++ = pSrc[2];
if(change1 & 0x0008) *pDest++ = pSrc[3];
if(change1 & 0x0010) *pDest++ = pSrc[4];
if(change1 & 0x0020) *pDest++ = pSrc[5];
if(change1 & 0x0040) *pDest++ = pSrc[6];
if(change1 & 0x0080) *pDest++ = pSrc[7];
if(change1 & 0x0100) *pDest++ = pSrc[8];
if(change1 & 0x0200) *pDest++ = pSrc[9];
if(change1 & 0x0400) *pDest++ = pSrc[10];
if(change1 & 0x0800) *pDest++ = pSrc[11];
if(change1 & 0x1000) *pDest++ = pSrc[12];
if(change1 & 0x2000) *pDest++ = pSrc[13];
if(change1 & 0x4000) *pDest++ = pSrc[14];
if(change1 & 0x8000) *pDest++ = pSrc[15];
So far I am using a quite big lookup table for it, but I really want to get rid of it:
SSE3Shuffle::Entry& e0 = SSE3Shuffle::g_Shuffle.m_Entries[change1];
_mm_storeu_si128((__m128i*)pDest, _mm_shuffle_epi8(*(__m128i*)pSrc, e0.mask));
pDest += e0.offset;
Assuming:
change1 = _mm_movemask_epi8(bytemask);
offset = popcnt(change1);
On large buffers, using two shuffles and a 1 KiB table is only ~10% slower than using 1 shuffle and a 1MiB table. My attempts at generating the shuffle mask via prefix sums and bit twiddling are about about half the speed of the table based methods
(solutions using pext/pdep were not explored).
Reducing table size: Use two lookups into a 2 KiB table instead of 1 lookup into a 1 MiB table. Always keep the top-most byte - if that byte is to be discarded then it doesn't matter what byte is at that position (down to 7-bit indices, or 1 KiB table). Further reduce possible combinations by manually packing the two bytes in each 16-bit lane (down to a 216 byte table).
The following example strips whitespace from text using SSE4.1. If only SSSE3 is available then blendv can be emulated. The 64-bit halves are re-combined by overlapping writes to memory, but they could be re-combined in the xmm register (as seen in the AVX2 example).
#include <stdint.h>
#include <smmintrin.h> // SSE4.1
size_t despacer (void* dst_void, void* src_void, size_t length)
{
uint8_t* src = (uint8_t*)src_void;
uint8_t* dst = (uint8_t*)dst_void;
if (length >= 16) {
// table of control characters (space, tab, newline, carriage return)
const __m128i lut_cntrl = _mm_setr_epi8(' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0);
// bits[4:0] = index -> ((trit_d * 0) + (trit_c * 9) + (trit_b * 3) + (trit_a * 1))
// bits[15:7] = popcnt
const __m128i sadmask = _mm_set1_epi64x(0x8080898983838181);
// adding 8 to each shuffle index is cheaper than extracting the high qword
const __m128i offset = _mm_cvtsi64_si128(0x0808080808080808);
// shuffle control indices
static const uint64_t table[27] = {
0x0000000000000706, 0x0000000000070600, 0x0000000007060100, 0x0000000000070602,
0x0000000007060200, 0x0000000706020100, 0x0000000007060302, 0x0000000706030200,
0x0000070603020100, 0x0000000000070604, 0x0000000007060400, 0x0000000706040100,
0x0000000007060402, 0x0000000706040200, 0x0000070604020100, 0x0000000706040302,
0x0000070604030200, 0x0007060403020100, 0x0000000007060504, 0x0000000706050400,
0x0000070605040100, 0x0000000706050402, 0x0000070605040200, 0x0007060504020100,
0x0000070605040302, 0x0007060504030200, 0x0706050403020100
};
const uint8_t* end = &src[length & ~15];
do {
__m128i v = _mm_loadu_si128((__m128i*)src);
src += 16;
// detect spaces
__m128i mask = _mm_cmpeq_epi8(_mm_shuffle_epi8(lut_cntrl, v), v);
// shift w/blend: each word now only has 3 states instead of 4
// which reduces the possiblities per qword from 128 to 27
v = _mm_blendv_epi8(v, _mm_srli_epi16(v, 8), mask);
// extract bitfields describing each qword: index, popcnt
__m128i desc = _mm_sad_epu8(_mm_and_si128(mask, sadmask), sadmask);
size_t lo_desc = (size_t)_mm_cvtsi128_si32(desc);
size_t hi_desc = (size_t)_mm_extract_epi16(desc, 4);
// load shuffle control indices from pre-computed table
__m128i lo_shuf = _mm_loadl_epi64((__m128i*)&table[lo_desc & 0x1F]);
__m128i hi_shuf = _mm_or_si128(_mm_loadl_epi64((__m128i*)&table[hi_desc & 0x1F]), offset);
// store an entire qword then advance the pointer by how ever
// many of those bytes are actually wanted. Any trailing
// garbage will be overwritten by the next store.
// note: little endian byte memory order
_mm_storel_epi64((__m128i*)dst, _mm_shuffle_epi8(v, lo_shuf));
dst += (lo_desc >> 7);
_mm_storel_epi64((__m128i*)dst, _mm_shuffle_epi8(v, hi_shuf));
dst += (hi_desc >> 7);
} while (src != end);
}
// tail loop
length &= 15;
if (length != 0) {
const uint64_t bitmap = 0xFFFFFFFEFFFFC1FF;
do {
uint64_t c = *src++;
*dst = (uint8_t)c;
dst += ((bitmap >> c) & 1) | ((c + 0xC0) >> 8);
} while (--length);
}
// return pointer to the location after the last element in dst
return (size_t)(dst - ((uint8_t*)dst_void));
}
Whether the tail loop should be vectorized or use cmov is left as an exercise for the reader. Writing each byte unconditionally/branchlessly is fast when the input is unpredictable.
Using AVX2 to generate the shuffle control mask using an in-register table is only slightly slower than using large precomputed tables.
#include <stdint.h>
#include <immintrin.h>
// probably needs improvment...
size_t despace_avx2_vpermd(const char* src_void, char* dst_void, size_t length)
{
uint8_t* src = (uint8_t*)src_void;
uint8_t* dst = (uint8_t*)dst_void;
const __m256i lut_cntrl2 = _mm256_broadcastsi128_si256(_mm_setr_epi8(' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0));
const __m256i permutation_mask = _mm256_set1_epi64x( 0x0020100884828180 );
const __m256i invert_mask = _mm256_set1_epi64x( 0x0020100880808080 );
const __m256i zero = _mm256_setzero_si256();
const __m256i fixup = _mm256_set_epi32(
0x08080808, 0x0F0F0F0F, 0x00000000, 0x07070707,
0x08080808, 0x0F0F0F0F, 0x00000000, 0x07070707
);
const __m256i lut = _mm256_set_epi32(
0x04050607, // 0x03020100', 0x000000'07
0x04050704, // 0x030200'00, 0x0000'0704
0x04060705, // 0x030100'00, 0x0000'0705
0x04070504, // 0x0300'0000, 0x00'070504
0x05060706, // 0x020100'00, 0x0000'0706
0x05070604, // 0x0200'0000, 0x00'070604
0x06070605, // 0x0100'0000, 0x00'070605
0x07060504 // 0x00'000000, 0x'07060504
);
// hi bits are ignored by pshufb, used to reject movement of low qword bytes
const __m256i shuffle_a = _mm256_set_epi8(
0x7F, 0x7E, 0x7D, 0x7C, 0x7B, 0x7A, 0x79, 0x78, 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70,
0x7F, 0x7E, 0x7D, 0x7C, 0x7B, 0x7A, 0x79, 0x78, 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70
);
// broadcast 0x08 then blendd...
const __m256i shuffle_b = _mm256_set_epi32(
0x08080808, 0x08080808, 0x00000000, 0x00000000,
0x08080808, 0x08080808, 0x00000000, 0x00000000
);
for( uint8_t* end = &src[(length & ~31)]; src != end; src += 32){
__m256i r0,r1,r2,r3,r4;
unsigned int s0,s1;
r0 = _mm256_loadu_si256((__m256i *)src); // asrc
// detect spaces
r1 = _mm256_cmpeq_epi8(_mm256_shuffle_epi8(lut_cntrl2, r0), r0);
r2 = _mm256_sad_epu8(zero, r1);
s0 = (unsigned)_mm256_movemask_epi8(r1);
r1 = _mm256_andnot_si256(r1, permutation_mask);
r1 = _mm256_sad_epu8(r1, invert_mask); // index_bitmap[0:5], low32_spaces_count[7:15]
r2 = _mm256_shuffle_epi8(r2, zero);
r2 = _mm256_sub_epi8(shuffle_a, r2); // add space cnt of low qword
s0 = ~s0;
r3 = _mm256_slli_epi64(r1, 29); // move top part of index_bitmap to high dword
r4 = _mm256_srli_epi64(r1, 7); // number of spaces in low dword
r4 = _mm256_shuffle_epi8(r4, shuffle_b);
r1 = _mm256_or_si256(r1, r3);
r1 = _mm256_permutevar8x32_epi32(lut, r1);
s1 = _mm_popcnt_u32(s0);
r4 = _mm256_add_epi8(r4, shuffle_a);
s0 = s0 & 0xFFFF; // isolate low oword
r2 = _mm256_shuffle_epi8(r4, r2);
s0 = _mm_popcnt_u32(s0);
r2 = _mm256_max_epu8(r2, r4); // pin low qword bytes
r1 = _mm256_xor_si256(r1, fixup);
r1 = _mm256_shuffle_epi8(r1, r2); // complete shuffle mask
r0 = _mm256_shuffle_epi8(r0, r1); // despace!
_mm_storeu_si128((__m128i*)dst, _mm256_castsi256_si128(r0));
_mm_storeu_si128((__m128i*)&dst[s0], _mm256_extracti128_si256(r0,1));
dst += s1;
}
// tail loop
length &= 31;
if (length != 0) {
const uint64_t bitmap = 0xFFFFFFFEFFFFC1FF;
do {
uint64_t c = *src++;
*dst = (uint8_t)c;
dst += ((bitmap >> c) & 1) | ((c + 0xC0) >> 8);
} while (--length);
}
return (size_t)(dst - ((uint8_t*)dst_void));
}
For posterity, the 1 KiB version (generating the table is left as an exercise for the reader).
static const uint64_t table[128] __attribute__((aligned(64))) = {
0x0706050403020100, 0x0007060504030201, ..., 0x0605040302010700, 0x0605040302010007
};
const __m128i mask_01 = _mm_set1_epi8( 0x01 );
__m128i vector0 = _mm_loadu_si128((__m128i*)src);
__m128i vector1 = _mm_shuffle_epi32( vector0, 0x0E );
__m128i bytemask0 = _mm_cmpeq_epi8( ???, vector0); // detect bytes to omit
uint32_t bitmask0 = _mm_movemask_epi8(bytemask0) & 0x7F7F;
__m128i hsum = _mm_sad_epu8(_mm_add_epi8(bytemask0, mask_01), _mm_setzero_si128());
vector0 = _mm_shuffle_epi8(vector0, _mm_loadl_epi64((__m128i*) &table[(uint8_t)bitmask0]));
_mm_storel_epi64((__m128i*)dst, vector0);
dst += (uint32_t)_mm_cvtsi128_si32(hsum);
vector1 = _mm_shuffle_epi8(vector1, _mm_loadl_epi64((__m128i*) &table[bitmask0 >> 8]));
_mm_storel_epi64((__m128i*)dst, vector1);
dst += (uint32_t)_mm_cvtsi128_si32(_mm_unpackhi_epi64(hsum, hsum));
https://github.com/InstLatx64/AVX512_VPCOMPRESSB_Emu has some benchmarks.
If one is willing to use BMI2 available on haswell and later, one can use pdep to first compress unwanted nibbles out from uint64_t, and then use pext to scatter the result to shuffle mask.
// Step 1 -- replicate mask to nibbles
uint64_t change4 = pdep(change1, 0x1111111111111111ULL) * 0x0F;
// Step 2 -- extract index from array of nibbles
uint64_t indices = pext(0xfedcba09876543210, change4);
// Step 3 -- interleave nibbles to octects
uint64_t high = pdep(indices >> 32ULL,0x0F0F0F0F0F0F0F0F);
uint64_t low = pdep(indices, 0x0F0F0F0F0F0F0F0FULL);
// Step 4 -- use these two masks to compress pSrc
__m128i compressed = _mm_shuffle_epi8(pSrc, _mm_set_epi64(high, low));
// Step 5 -- store 16 bytes unaligned
_mm_storeu_si128(pDst, compressed);
// Step 6 -- increment target pointer
pDst += __mm_popcnt(change1);
Also other variants (based on cumulative sum or sorting the 'X's (or zero bits) out from XX23456789abXXef will first require some technique to spread the bits from uint16_t evenly to __m128i (i.e. reverse of movemask_epi8).
The 64k entry LUT can however be split to top and bottom parts:
int c = change1 & 0xff;
int p = __popcount(c);
uint64_t a = LUT256[c]; // low part of index
uint64_t b = LUT256[change1 >> 8]; // top part of index
b += addlut9[p]; // 0x0101010101010101 * p
// Then must concatenate b|a at pth position of 'a'
if (p < 8)
{
a |= b << (8*(8-p));
b >>= 8*p;
}
__m128i d = _mm_shuffle_epi8(_mm_loadu_si128(pSrc),_mm_set1_epi64(b,a));
// and continue with steps 5 and 6 as before

Satellite data processing error

A new satellite data processing center has just been completed and ready for the initial testing using live data being sent down from an orbiting satellite. As the very first messages are displayed on the screen and you notice many of the data values are wildly out of range.
For example, on the terminal screen is something defined as “delta time” and it seems to be out of the expected range [0.01 to 10,000.00 seconds], but the value displayed (as a double) is [-4.12318024e-028 seconds]. After further investigation into the raw byte-based data stream, you find the original data being sent down from the satellite for this double word as [0xC0 0x83 0xA1 0xCA 0x66 0x55 0x40 0xBA]. On one of the old terminals, this data is displayed correctly and is within the expected range.
a. [5] What caused this problem?
b. [5] If this is the real problem, what should the actual value be?
Ah, Failure Mode Analysis. Very important indeed!
Well, other terminal shows data correctly --> there is incompatibility between terminal and data.
Big Endian, little Endian perhaps? I am expecting the "old" terminal to be little Endian because it may have been coded in C. Now you can interpret the data.
Here is some code
#include <stdio.h>
union myW {
double x;
// Recieved as:[0xC0 0x83 0xA1 0xCA 0x66 0x55 0x40 0xBA]
unsigned char d[8] = {0x83, 0xC0,0xCA, 0xA1, 0x55, 0x66, 0xBA, 0x40};
};
union myBad {
double x;
// Recieved as:[0xC0 0x83 0xA1 0xCA 0x66 0x55 0x40 0xBA]
unsigned char d[8] = {0xC0, 0x83,0xA1, 0xCA, 0x66, 0x55, 0x40, 0xBA};
};
int main(void)
{
myW value;
value.x = 1.0; // check how reasonable number looks like
printf("Something reasonable: \n");
for(int i = 0; i < 8; i++)
{
printf("%u ", value.d[i]);
}
myW received;
printf("\nWhat shouldve been displayed:\n");
for(int i = 0; i < 8; i++)
{
printf("%u ", received.d[i]);
}
printf("\n%f\n", received.x);
myBad bad;
printf("\nBad output as:\n");
for(int i = 0; i < 8; i++)
{
printf("%u ", bad.d[i]);
}
printf("\n%0.30f\n", bad.x);
}
Output:
Something reasonable:
0 0 0 0 0 0 240 63
What shouldve been displayed::
131 192 202 161 85 102 186 64
6758.334500
Bad output as:
192 131 161 202 102 85 64 186
-0.000000000000000000000000000412
Compiled with g++

How can I check if there is only value changed in a (bitwise?) value?

How can I check if there is only 1 bit change between a value and another (next) value?
the output is for example
001
101
110
in the second output there is a 0 changed into a 1
in the third output there is a 0 changed into a 1 AND also the last 1 changed into a 0
the program may only continue if there is only 1 change.
First, XOR the two numbers. XOR will return a 1 for every bit that changed.
Example:
0101110110100100
XOR
0100110110100100
would give you
0001000000000000
Now what you need is a quick way to check if there is only a single bit in your resulting number, or in other words, if the resulting number is a power of two.
A quick test for that is: (x & (x - 1)) == 0.
No for loops needed.
You can compute the bitwise XOR and then just count the bits that are 1's. This is known as the Hamming distance. For example:
unsigned int a = 0b001;
unsigned int b = 0b100;
unsigned int res;
/* Stores the number of different bits */
unsigned int acc;
res = a ^ b;
/* from https://graphics.stanford.edu/~seander/bithacks.html */
for (acc = 0; res; res >>= 1)
{
acc += res & 1;
}
In Java
void main(String[] args){
boolean value = moreThanOneChanged("101", "001");
}
static boolean moreThanOneChanged(String input, String current){
if(input.length() != current.length()) return false;
char[] first = input.toCharArray();
char[] second = current.toCharArray();
for(int i = 0, j = 0; i < input.length(); i++){
if(first[i] == second[i])
j++;
if(j > 1)
return true;
}
return false;
}
You can prove it to yourself fairly easily by using an and comparison between an exclusive or of each value and the exclusive or minus 1. It is easier to visualize what takes place by looking at the binary representation of the values and results. Below the function onebitoff performs the test. The other functions just provide a way to output the results:
#include <stdio.h>
#include <limits.h> /* for CHAR_BIT */
#define WDSZ 64
/** returns pointer to binary representation of 'n' zero padded to 'sz'.
* returns pointer to string contianing binary representation of
* unsigned 64-bit (or less ) value zero padded to 'sz' digits.
*/
char *cpbin (unsigned long n, int sz)
{
static char s[WDSZ + 1] = {0};
char *p = s + WDSZ;
int i;
for (i=0; i<sz; i++) {
p--;
*p = (n>>i & 1) ? '1' : '0';
}
return p;
}
/* return true if one-bit bitwise variance */
int onebitoff (unsigned int a, unsigned int b)
{
return ((a ^ b) & ((a ^ b) - 1)) ? 0 : 1;
}
/* quick output of binary difference for 2 values */
void showdiff (unsigned int a, unsigned int b)
{
if (onebitoff (a, b))
printf ( " values %u, %u - vary by one-bit (bitwise)\n\n", a, b);
else
printf ( " values %u, %u - vary by other than one-bit (bitwise)\n\n", a, b);
printf (" %3u : %s\n", a, cpbin (a, sizeof (char) * CHAR_BIT));
printf (" %3u : %s\n", b, cpbin (b, sizeof (char) * CHAR_BIT));
printf (" xor : %s\n\n", cpbin ((a ^ b), sizeof (char) * CHAR_BIT));
}
int main () {
printf ("\nTest whether the following numbers vary by a single bit (bitwise)\n\n");
showdiff (1, 5);
showdiff (5, 6);
showdiff (6, 1);
showdiff (97, 105); /* just as a further test */
return 0;
}
output:
$ ./bin/bitsvary
Test whether the following numbers vary by a single bit (bitwise)
values 1, 5 - vary by one-bit (bitwise)
1 : 00000001
5 : 00000101
xor : 00000100
values 5, 6 - vary by other than one-bit (bitwise)
5 : 00000101
6 : 00000110
xor : 00000011
values 6, 1 - vary by other than one-bit (bitwise)
6 : 00000110
1 : 00000001
xor : 00000111
values 97, 105 - vary by one-bit (bitwise)
97 : 01100001
105 : 01101001
xor : 00001000

Resources