How do I fold 20 bytes into 8 bytes - byte

This is how I currently fold 16 to 8
How do I fold 20 bytes into 8 bytes
ac = regs[0] ^ regs[2];
bd = regs[1] ^ regs[3];
for (i = 0; i < 4; i++)
{
fold[i] = (sbyte)(ac & 0xff);
ac >>= 8;
}
for (i = 4; i < 8; i++)
{
fold[i] = (sbyte)(bd & 0xff);
bd >>= 8;
}
return fold;

Related

when gray image width is 2048, 4096, running time of rotate 90 image is more longer than nearby

Now I am going to rotate image 90 degrees, the code is below, using openCV Mat struct.
And I found a phenomenon that when the image width is 4096 ,running time is about twice of running time of image width is 4097. Following is the running time output. Does somebody Know why?
#include<string>
#include<fstream>
#include<iostream>
#include "opencv.hpp"
using namespace cv;
void main()
{
std::string ImageFileName0 = "F:\\temp\\4096-3000.bmp";
std::string ImageFileName1 = "F:\\temp\\4097-3000.bmp";
Mat image4096 = imread(ImageFileName0);
Mat image4097 = imread(ImageFileName1);
int lWidth4096 = image4096.size().width;
int lHeight4096 = image4096.size().height;
Mat image4096Res(lWidth4096, lHeight4096, CV_8UC1);
for (int i=0; i<lHeight4096; i++)
{
for (int j=0; j<lWidth4096;j++)
{
image4096Res.at<uchar>(j, i) = 0;
}
}
int lWidth4097 = image4097.size().width;
int lHeight4097 = image4097.size().height;
Mat image4097Res(lWidth4097, lHeight4097, CV_8UC1);
for (int i = 0; i < lHeight4097; i++)
{
for (int j = 0; j < lWidth4097; j++)
{
image4097Res.at<uchar>(j, i) = 0;
}
}
for (int i = 0; i < 10; i++)
{
double time0 = static_cast<double>(getTickCount());
for (int j = 0; j < lWidth4096; ++j)
{
for (int i = 0; i<lHeight4096; ++i)
{
image4096Res.at<uchar>(j, i) = image4096.at<uchar>(i, j);
}
}
double time4096 = (static_cast<double>(getTickCount()) - time0) / getTickFrequency();
double time1 = static_cast<double>(getTickCount());
for (int j = 0; j < lWidth4097; ++j)
{
for (int i = 0; i<lHeight4097; ++i)
{
//srcTmp = srcimage.PointToRow(i);
//*(dstTmp + i) = srcimage.GetPixelValue(j, i);
image4097Res.at<uchar>(j, i) = image4097.at<uchar>(i, j);
}
}
double time4097 = (static_cast<double>(getTickCount()) - time1) / getTickFrequency();
std::cout << "4096 time:" << time4096*1000 << std::endl;
std::cout << "4097 time:" << time4097*1000 << std::endl;
std::cout << std::endl;
}
namedWindow("aa",CV_WINDOW_NORMAL);
imshow("aa", image4096Res);
waitKey();
}
Running time result:
4096 time:149.337
4097 time:56.8092
4096 time:143.556
4097 time:67.4758
4096 time:142.07
4097 time:58.2825
4096 time:153.973
4097 time:57.1894
4096 time:145.086
4097 time:58.7944
4096 time:156.33
4097 time:87.9404
4096 time:140.224
4097 time:56.9525
4096 time:144.413
4097 time:57.133
4096 time:141.672
4097 time:54.916
4096 time:148.443
4097 time:55.8449
Time consuming varies with the width of the image is here
It is clear that only when the image width is 1024,2048,4096 the execute time is abnormal, while the overall trend is increasing linearly.
Could you try using this version to see if the same difference exist
Also added test to see if loaded image is of type CV_8UC1.
Be aware that the result of your and my solution is a mirrored image
int lWidth4096 = image4096.size().width;
int lHeight4096 = image4096.size().height;
Mat image4096Res = cv::Mat::zeros(lWidth4096, lHeight4096, CV_8UC1);
int lWidth4097 = image4097.size().width;
int lHeight4097 = image4097.size().height;
Mat image4097Res = cv::Mat::zeros(lWidth4097, lHeight4097, CV_8UC1);
if (image4096.type() != CV_8UC1)
throw "need 8bit image as input";
if (image4097.type() != CV_8UC1)
throw "need 8bit image as input";
for (int i = 0; i < 10; i++)
{
double time0 = static_cast<double>(getTickCount());
for (int j = 0; j < lWidth4096; ++j)
{
uint8_t *buf = image4096Res.ptr(j);
for (int i = 0; i<lHeight4096; ++i)
{
//image4096Res.at<uchar>(j, i) = image4096.at<uchar>(i, j);
//buf[i] = image4096.at<uchar>(i, j);
buf[i] = (image4096.data + image4096.step[0] * i)[j];
}
}
double time4096 = (static_cast<double>(getTickCount()) - time0) / getTickFrequency();
double time1 = static_cast<double>(getTickCount());
for (int j = 0; j < lWidth4097; ++j)
{
uint8_t *buf = image4097Res.ptr(j);
for (int i = 0; i<lHeight4097; ++i)
{
//image4097Res.at<uchar>(j, i) = image4097.at<uchar>(i, j);
//buf[i] = image4097.at<uchar>(i, j);
buf[i] = (image4097.data + image4097.step[0] * i)[j];
}
}
double time4097 = (static_cast<double>(getTickCount()) - time1) / getTickFrequency();
std::cout << "4096 time:" << time4096 * 1000 << std::endl;
std::cout << "4097 time:" << time4097 * 1000 << std::endl;
std::cout << std::endl;
}

Can separate, forked, processes jointly access shared dynamically allocated, pointer, memory?

I am trying to parallelize the addition of two simple 4x4 matrices. The child process adds only odd rows and the parents adds even ones. However, I can't seem to get the processes to work on shared pointer memory and the output is always given in halves, shown beneath the code:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
int main() {
int A[4][4] = {{1,2,3,4},{6,7,8,9},{11,12,13,14},{16,17,18,19}};
int B[4][4] = {{1,2,3,4},{6,7,8,9},{11,12,13,14},{16,17,18,19}};
int** C = (int**)malloc(sizeof(int)*4);
for (int z = 0; z<4; z++) {
C[z] = (int*)malloc(sizeof(int)*4);
}
pid_t cp;
printf("Before fork\n");
cp = fork();
if (cp == 0) {
for(int i = 1; i < 4; i+=2) {
for(int j = 0; j < 4; j++) {
printf("child process\n");
printf("We are adding cell %d,%d\n",i,j);
C[i][j] = A[i][j] + B[i][j];
sleep(1);
}
}
} else {
for (int k = 0; k < 4; k+=2) {
for(int l = 0; l < 4; l++) {
printf("parent process\n");
printf("We are adding cell %d,%d\n",k,l);
C[k][l] = A[k][l] + B[k][l];
sleep(1);
}
}
}
sleep(10);
printf("We are printing C here\n");
for (int m = 0; m < 4; m++) {
for(int n = 0; n < 4; n++) {
printf("%d ",C[m][n]);
}
printf("\n");
}
}
This is the output of the final for loop in the above code:
We are printing C here
0 0 0 0
12 14 16 18
0 0 0 0
32 34 36 38
We are printing C here
2 4 6 8
0 0 0 0
22 24 26 28
0 0 0 0

Radix Sort Using Bitwise Shift ">>" and bitwise and "&" operators using base 8 and 4

How does one go about sorting an array with the int contents {782, 40, 21}, using the Radix sort method, and utilizing bitwise shift ">>" and bitwise and "&" operators? How is this done using base 8 and 4?
Example base 256 radix sort for 32 bit integers. std::swap() could be replaced by any swap method.
typedef unsigned int uint32_t;
// a is input array, b is working array
uint32_t * RadixSort(uint32_t * a, uint32_t *b, size_t count)
{
size_t mIndex[4][256] = {0}; // count / index matrix
size_t i,j,m,n;
uint32_t u;
for(i = 0; i < count; i++){ // generate histograms
u = a[i];
for(j = 0; j < 4; j++){
mIndex[j][(size_t)(u & 0xff)]++;
u >>= 8;
}
}
for(j = 0; j < 4; j++){ // convert to indices
m = 0;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 4; j++){ // radix sort
for(i = 0; i < count; i++){ // sort by current lsb
u = a[i];
m = (size_t)(u>>(j<<3))&0xff;
b[mIndex[j][m]++] = u;
}
std::swap(a, b); // swap ptrs
}
return(a);
}

Adressing for-loops in big O notation

I have the Following Loops:
n = 2^M
for(int i = n; i > 0; i--){
for(int j = 1; j < n ; j*=2){
for(int k = 0 ; k < j ; k++){
}
}
}
Im trying to understnad how to approach this, i have tried to break it down to single steps but with no luck. if someone can explane me how to look at this and what to look for in this type of question.
why don't you just instrument it, to see what happens?
for example
public static void main(String[] args) {
int[] bb = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; //try other values here like 1,10,100
for (int b : bb) {
int n = (int) Math.pow(2, b);
int l1 = 0;
int l2 = 0;
int l3 = 0;
for (int i = n; i > 0; i--) {
l1++;
for (int j = 1; j < n; j *= 2) {
l2++;
for (int k = 0; k < j; k++) {
l3++;
}
}
}
System.out.println(b+" "+l1+" "+l2+" "+l3);
}
}
you'd have something like
1 2 2 2
2 4 8 12
3 8 24 56
4 16 64 240
5 32 160 992
6 64 384 4032
7 128 896 16256
8 256 2048 65280
9 512 4608 261632
10 1024 10240 1047552
what does it seem?

Horizontally Flip a One Bit Bitmap Line

I'm looking for an algorithm to flip a 1 Bit Bitmap line horizontally. Remember these lines are DWORD aligned!
I'm currently unencoding an RLE stream to an 8 bit-per-pixel buffer, then re-encoding to a 1 bit line, however, I would like to try and keep it all in the 1 bit space in an effort to increase its speed. Profiling indicates this portion of the program to be relatively slow compared to the rest.
Example line (Before Flip):
FF FF FF FF 77 AE F0 00
Example line (After Flip):
F7 5E EF FF FF FF F0 00
Create a conversion table to swap the bits in a byte:
byte[] convert = new byte[256];
for (int i = 0; i < 256; i++) {
int value = 0;
for (int bit = 1; bit <= 128; bit<<=1) {
value <<= 1;
if ((i & bit) != 0) value++;
}
convert[i] = (byte)value;
}
Now you can use the table to swap a byte, then you just have to store the byte in the right place in the result:
byte[] data = { 0xFF, 0xFF, 0xFF, 0xFF, 0x77, 0xAE, 0xF0, 0x00 };
int width = 52;
int shift = data.Length * 8 - width;
int shiftBytes = data.Length - 1 - shift / 8;
int shiftBits = shift % 8;
byte[] result = new byte[data.Length];
for (int i = 0; i < data.Length; i++) {
byte swap = convert[data[i]];
if (shiftBits == 0) {
result[shiftBYtes - i] = swap;
} else {
if (shiftBytes - i >= 0) {
result[shiftBytes - i] |= (byte)(swap << shiftBits);
}
if (shiftBytes - i - 1 >= 0) {
result[shiftBytes - i - 1] |= (byte)(swap >> (8 - shiftBits));
}
}
}
Console.WriteLine(BitConverter.ToString(result));
Output:
F7-5E-EF-FF-FF-FF-F0-00
The following code reads and reverses the data in blocks of 32 bits as integers. The code to reverse the bits is split into two parts because on a little endian machine reading four bytes as an 32 bit integer reverses the byte order.
private static void Main()
{
var lineLength = 52;
var input = new Byte[] { 0xFF, 0xFF, 0xFF, 0xFF, 0x77, 0xAE, 0xF0, 0x00 };
var output = new Byte[input.Length];
UInt32 lastValue = 0x00000000;
var numberBlocks = lineLength / 32 + ((lineLength % 32 == 0) ? 0 : 1);
var numberBitsInLastBlock = lineLength % 32;
for (Int32 block = 0; block < numberBlocks; block++)
{
var rawValue = BitConverter.ToUInt32(input, 4 * block);
var reversedValue = (ReverseBitsA(rawValue) << (32 - numberBitsInLastBlock)) | (lastValue >> numberBitsInLastBlock);
lastValue = rawValue;
BitConverter.GetBytes(ReverseBitsB(reversedValue)).CopyTo(output, 4 * (numberBlocks - block - 1));
}
Console.WriteLine(BitConverter.ToString(input).Replace('-', ' '));
Console.WriteLine(BitConverter.ToString(output).Replace('-', ' '));
}
private static UInt32 SwapBitGroups(UInt32 value, UInt32 mask, Int32 shift)
{
return ((value & mask) << shift) | ((value & ~mask) >> shift);
}
private static UInt32 ReverseBitsA(UInt32 value)
{
value = SwapBitGroups(value, 0x55555555, 1);
value = SwapBitGroups(value, 0x33333333, 2);
value = SwapBitGroups(value, 0x0F0F0F0F, 4);
return value;
}
private static UInt32 ReverseBitsB(UInt32 value)
{
value = SwapBitGroups(value, 0x00FF00FF, 8);
value = SwapBitGroups(value, 0x0000FFFF, 16);
return value;
}
It is a bit ugly and not robust against errors ... but it is just sample code. And it outputs the following.
FF FF FF FF 77 AE F0 00
F7 5E EF FF FF FF F0 00

Resources