Canoe :- How to use masking of bits in CAPL scripting - capl

I wanted to mask my 6bits signal to pass value 20 degree Celsius

try this
int firstvalue = 0xFF;
int secondValue = 0x55;
int result = 0;
result = firstvalue & 0xAA;
write("0xFF & 0xAA = %X", result);
result = secondValue & 0xAA;
write("0x55 & 0xAA %X", result);
result = secondValue & 0x01;
write("0x55 & 0x01 %X", result);
result = secondValue & 0;
write("0x55 & 0 %X", result);
result = firstvalue & 0x01;
write("0xFF & 0x01 %X", result);

Related

Program will exit after running a specific function in C++. Unable to figure out why

I have this function that will split the string. After running it in a while loop of read file, the program will exit instead of continue the next step of functions.
vector <string> splitString (string str, string delim){
vector <string> result;
size_t pos = 0;
string token;
while ((pos = str.find(delim)) != std::string::npos){
token = str.substr(0, pos);
result.push_back(token);
str.erase(0, pos+delim.length());
}
if (!str.empty())
result.push_back(str);
return (result);
}
void readCityLocation(int ** cityMap){
ifstream inFile;
inFile.open(filename[0]);
string line;
while (getline(inFile, line)){
int xCoordinate;
int yCoordinate;
string xValue;
string yValue;
//Get xCoordinate...
vector <string> splitThis = splitString(line,",");
xValue = splitThis[0];
cout << line;
}
An function called option2(), will create a table and call void readCityLocation( )
void option2(){
cout << endl << endl;
int ** table = new int * [gridXEnd + 1];
for (int i = 0; i < gridXEnd + 1; i++) {
table[i] = new int[gridYEnd + 1];
}
//Initialize all the array value in table to be 0
for (int i = 0; i < gridXEnd + 1; i++) {
for (int j = 0; j < gridYEnd + 1; j++) {
table[i][j] = 0;
//To be remove: Error Handling
//cout << table[i][j] << " Grid X: " << i << " Grid Y: " << j << endl;
}
}
readCityLocation(table);
}
I am fairly new to all this and can't figure out whats the problem. Any assistance is appreciated. Thank you.

Generate random number in MIPS

How can I generate random number in MIPs that similar to C Standard Library
this is the C code.
Please help me
uint32_t random_in_range(uint32_t low, uint32_t high)
{
uint32_t range = high-low+1;
uint32_t rand_num = get_random();
return (rand_num % range) + low;
}
// Generate random 32-bit unsigned number
// based on multiply-with-carry method shown
// at http://en.wikipedia.org/wiki/Random_number_generation
uint32_t get_random()
{
uint32_t result;
m_z = 36969 * (m_z & 65535) + (m_z >> 16);
m_w = 18000 * (m_w & 65535) + (m_w >> 16);
result = (m_z << 16) + m_w; /* 32-bit result */
return result;
}

Processing Image data changes during save

I'm trying to create a program to hide data in a image file. Data bits are hidden into last bit of every pixels blue value. First four pixels contain the length of following data bytes.
Everything works fine when I encrypt the data to image and then decrypt it without saving the image in between. However if I encrypt the data to an image and then save it and then open the file again and try to decrypt it, decryption fails since the values seem to have changed.
I wonder if there is something similar happening as with txt files where there is BOM containing byte order data prepended into the file?
The code works if I change the color c = crypted.pixels[pos + i];
to color c = original.pixels[pos + i]; in readByteAt function
and run the encrypting function first and then the decryption function.
This causes the code to run the decryption function on the just encrypted image still in program memory instead reading it from the file.
Any ideas on what causes this or how to prevent it are welcome!
here is the full (messy) code:
PImage original;
PImage crypted;
int imagesize;
boolean ready = false;
void setup() {
size(100, 100);
imagesize = width * height;
}
void draw() {
}
void encrypt()
{
original = loadImage("image.jpg");
original.loadPixels();
println("begin encrypt");
int pos = 0;
byte b[] = loadBytes("DATA.txt");
println("encrypting in image...");
int len = b.length;
println("len " + len);
writeByteAt((len >> (3*8)) & 0xFF, 0);
writeByteAt((len >> (2*8)) & 0xFF, 8);
writeByteAt((len >> (1*8)) & 0xFF, 16);
writeByteAt(len & 0xFF, 24);
pos = 32;
for (int i = 3; i < b.length; i++) {
int a = b[i] & 0xff;
print(char(a));
writeByteAt(a, pos);
pos += 8;
}
original.updatePixels();
println();
println("done");
original.save("encrypted.jpg");
}
void writeByteAt(int b, int pos)
{
println("writing " + b + " at " + pos);
for (int i = 0; i < 8; i++)
{
color c = original.pixels[pos + i];
int v = int(blue(c));
if ((b & (1 << i)) > 0)
{
v = v | 1;
} else
{
v = v & 0xFE;
}
original.pixels[pos+i] = color(red(c), green(c), v);
//original.pixels[pos+i] = color(255,255,255);
}
}
int readByteAt(int pos)
{
int b = 0;
for (int i = 0; i < 8; i++)
{
color c = crypted.pixels[pos + i];
int v = int(blue(c));
if ((v & 1) > 0)
{
b += (1 << i);
}
}
return b;
}
void decrypt()
{
crypted = loadImage("encrypted.jpg");
crypted.loadPixels();
println("begin decrypt");
int pos = 0;
PrintWriter output = createWriter("out.txt");
println("decrypting...");
int len = 0;
len += readByteAt(0) << 3*8;
len += readByteAt(8) << 2*8;
len += readByteAt(16) << 1*8;
len += readByteAt(24);
pos = 32;
if(len >= imagesize)
{
println("ERROR: DATA LENGTH OVER IMAGE SIZE");
return;
}
println(len);
while (pos < ((len+1)*8)) {
output.print(char(readByteAt(pos)));
print(char(readByteAt(pos)));
pos += 8;
}
output.flush(); // Writes the remaining data to the file
output.close();
println("\nDone");
}
void keyPressed()
{
if(key == 'e')
{
encrypt();
}
if(key == 'd')
{
decrypt();
}
}

load vector from large vector with simd based on mask

I hope someone can help here.
I have a large byte vector from which i create a small byte vector ( based on a mask ) which I then process with simd.
Currently the mask is an array of baseOffset + submask (byte[256]) , optimized for storage as there are > 10^8 . I create a maxsize subvector , then loop through the mask array multiply the baseOffssetby 256 and for each bit offset in the mask load from the large vector and put the values in a smaller vector sequentially . The smaller vector is then processed via a number of VPMADDUBSW and accumulated . I can change this structure. eg walk the bits once to use a 8K bit array buffer and then create the small vector.
Is there a faster way i can create the subarray ?
I pulled the code out of the app into a test program but the original is in a state of flux ( moving to AVX2 and pulling more out of C# )
#include "stdafx.h"
#include<stdio.h>
#include <mmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include <smmintrin.h>
#include <immintrin.h>
//from
char N[4096] = { 9, 5, 5, 5, 9, 5, 5, 5, 5, 5 };
//W
char W[4096] = { 1, 2, -3, 5, 5, 5, 5, 5, 5, 5 };
char buffer[4096] ;
__declspec(align(2))
struct packed_destination{
char blockOffset;
__int8 bitMask[32];
};
__m128i sum = _mm_setzero_si128();
packed_destination packed_destinations[10];
void process128(__m128i u, __m128i s)
{
__m128i calc = _mm_maddubs_epi16(u, s); // pmaddubsw
__m128i loints = _mm_cvtepi16_epi32(calc);
__m128i hiints = _mm_cvtepi16_epi32(_mm_shuffle_epi32(calc, 0x4e));
sum = _mm_add_epi32(_mm_add_epi32(loints, hiints), sum);
}
void process_array(char n[], char w[], int length)
{
sum = _mm_setzero_si128();
int length128th = length >> 7;
for (int i = 0; i < length128th; i++)
{
__m128i u = _mm_load_si128((__m128i*)&n[i * 128]);
__m128i s = _mm_load_si128((__m128i*)&w[i * 128]);
process128(u, s);
}
}
void populate_buffer_from_vector(packed_destination packed_destinations[], char n[] , int dest_length)
{
int buffer_dest_index = 0;
for (int i = 0; i < dest_length; i++)
{
int blockOffset = packed_destinations[i].blockOffset <<8 ;
// go through mask and copy to buffer
for (int j = 0; j < 32; j++)
{
int joffset = blockOffset + j << 3;
int mask = packed_destinations[i].bitMask[j];
if (mask & 1 << 0)
buffer[buffer_dest_index++] = n[joffset + 1<<0 ];
if (mask & 1 << 1)
buffer[buffer_dest_index++] = n[joffset + 1<<1];
if (mask & 1 << 2)
buffer[buffer_dest_index++] = n[joffset + 1<<2];
if (mask & 1 << 3)
buffer[buffer_dest_index++] = n[joffset + 1<<3];
if (mask & 1 << 4)
buffer[buffer_dest_index++] = n[joffset + 1<<4];
if (mask & 1 << 5)
buffer[buffer_dest_index++] = n[joffset + 1<<5];
if (mask & 1 << 6)
buffer[buffer_dest_index++] = n[joffset + 1<<6];
if (mask & 1 << 7)
buffer[buffer_dest_index++] = n[joffset + 1<<7];
};
}
}
int _tmain(int argc, _TCHAR* argv[])
{
for (int i = 0; i < 32; ++i)
{
packed_destinations[0].bitMask[i] = 0x0f;
packed_destinations[1].bitMask[i] = 0x04;
}
packed_destinations[1].blockOffset = 1;
populate_buffer_from_vector(packed_destinations, N, 1);
process_array(buffer, W, 256);
int val = sum.m128i_i32[0] +
sum.m128i_i32[1] +
sum.m128i_i32[2] +
sum.m128i_i32[3];
printf("sum is %d" , val);
printf("Press Any Key to Continue\n");
getchar();
return 0;
}
Normally mask usage would be 5-15% for some work loads it would be 25-100% .
MASKMOVDQU is close but then we would have to re pack /swl according to the mask before saving..
A couple of optimisations for your existing code:
If your data is sparse then it would probably be a good idea to add an additional test of each 8 bit mask value prior to testing the additional bits, i.e.
int mask = packed_destinations[i].bitMask[j];
if (mask != 0)
{
if (mask & 1 << 0)
buffer[buffer_dest_index++] = n[joffset + 1<<0 ];
if (mask & 1 << 1)
buffer[buffer_dest_index++] = n[joffset + 1<<1];
...
Secondly your process128 function can be optimised considerably:
inline __m128i process128(const __m128i u, const __m128i s, const __m128i sum)
{
const __m128i vk1 = _mm_set1_epi16(1);
__m128i calc = _mm_maddubs_epi16(u, s);
calc = _mm_madd_epi16(v, vk1);
return _mm_add_epi32(sum, calc);
}
Note that as well as reducing the SSE instruction count from 6 to 3, I've also made sum a parameter, to get away from any dependency on global variables (it's always a good idea to avoid globals, not only for good software engineering but also because they can inhibit certain compiler optimisations).
It would be interesting to see a profile of your code (using a decent sampling profiler, not via instrumentation), since this would help to prioritise any further optimisation efforts.

Horizontally Flip a One Bit Bitmap Line

I'm looking for an algorithm to flip a 1 Bit Bitmap line horizontally. Remember these lines are DWORD aligned!
I'm currently unencoding an RLE stream to an 8 bit-per-pixel buffer, then re-encoding to a 1 bit line, however, I would like to try and keep it all in the 1 bit space in an effort to increase its speed. Profiling indicates this portion of the program to be relatively slow compared to the rest.
Example line (Before Flip):
FF FF FF FF 77 AE F0 00
Example line (After Flip):
F7 5E EF FF FF FF F0 00
Create a conversion table to swap the bits in a byte:
byte[] convert = new byte[256];
for (int i = 0; i < 256; i++) {
int value = 0;
for (int bit = 1; bit <= 128; bit<<=1) {
value <<= 1;
if ((i & bit) != 0) value++;
}
convert[i] = (byte)value;
}
Now you can use the table to swap a byte, then you just have to store the byte in the right place in the result:
byte[] data = { 0xFF, 0xFF, 0xFF, 0xFF, 0x77, 0xAE, 0xF0, 0x00 };
int width = 52;
int shift = data.Length * 8 - width;
int shiftBytes = data.Length - 1 - shift / 8;
int shiftBits = shift % 8;
byte[] result = new byte[data.Length];
for (int i = 0; i < data.Length; i++) {
byte swap = convert[data[i]];
if (shiftBits == 0) {
result[shiftBYtes - i] = swap;
} else {
if (shiftBytes - i >= 0) {
result[shiftBytes - i] |= (byte)(swap << shiftBits);
}
if (shiftBytes - i - 1 >= 0) {
result[shiftBytes - i - 1] |= (byte)(swap >> (8 - shiftBits));
}
}
}
Console.WriteLine(BitConverter.ToString(result));
Output:
F7-5E-EF-FF-FF-FF-F0-00
The following code reads and reverses the data in blocks of 32 bits as integers. The code to reverse the bits is split into two parts because on a little endian machine reading four bytes as an 32 bit integer reverses the byte order.
private static void Main()
{
var lineLength = 52;
var input = new Byte[] { 0xFF, 0xFF, 0xFF, 0xFF, 0x77, 0xAE, 0xF0, 0x00 };
var output = new Byte[input.Length];
UInt32 lastValue = 0x00000000;
var numberBlocks = lineLength / 32 + ((lineLength % 32 == 0) ? 0 : 1);
var numberBitsInLastBlock = lineLength % 32;
for (Int32 block = 0; block < numberBlocks; block++)
{
var rawValue = BitConverter.ToUInt32(input, 4 * block);
var reversedValue = (ReverseBitsA(rawValue) << (32 - numberBitsInLastBlock)) | (lastValue >> numberBitsInLastBlock);
lastValue = rawValue;
BitConverter.GetBytes(ReverseBitsB(reversedValue)).CopyTo(output, 4 * (numberBlocks - block - 1));
}
Console.WriteLine(BitConverter.ToString(input).Replace('-', ' '));
Console.WriteLine(BitConverter.ToString(output).Replace('-', ' '));
}
private static UInt32 SwapBitGroups(UInt32 value, UInt32 mask, Int32 shift)
{
return ((value & mask) << shift) | ((value & ~mask) >> shift);
}
private static UInt32 ReverseBitsA(UInt32 value)
{
value = SwapBitGroups(value, 0x55555555, 1);
value = SwapBitGroups(value, 0x33333333, 2);
value = SwapBitGroups(value, 0x0F0F0F0F, 4);
return value;
}
private static UInt32 ReverseBitsB(UInt32 value)
{
value = SwapBitGroups(value, 0x00FF00FF, 8);
value = SwapBitGroups(value, 0x0000FFFF, 16);
return value;
}
It is a bit ugly and not robust against errors ... but it is just sample code. And it outputs the following.
FF FF FF FF 77 AE F0 00
F7 5E EF FF FF FF F0 00

Resources