How do I port code calling QueryPerformanceFrequency to Rust? - windows

I need to port this C code into Rust:
QueryPerformanceFrequency((unsigned long long int *) &frequency);
I didn't find a function that does that.
The Linux variant looks like:
struct timespec now;
if (clock_gettime(CLOCK_MONOTONIC, &now) == 0)
frequency = 1000000000;
Should I call std::time::Instant::now() and set the frequency to 1000000000?
This is the complete function:
// Initializes hi-resolution MONOTONIC timer
static void InitTimer(void)
{
srand(time(NULL)); // Initialize random seed
#if defined(_WIN32)
QueryPerformanceFrequency((unsigned long long int *) &frequency);
#endif
#if defined(__linux__)
struct timespec now;
if (clock_gettime(CLOCK_MONOTONIC, &now) == 0)
frequency = 1000000000;
#endif
#if defined(__APPLE__)
mach_timebase_info_data_t timebase;
mach_timebase_info(&timebase);
frequency = (timebase.denom*1e9)/timebase.numer;
#endif
baseTime = GetTimeCount(); // Get MONOTONIC clock time offset
startTime = GetCurrentTime(); // Get current time
}

The direct solution to accessing Windows APIs is to use the winapi crate. In this case, call QueryPerformanceFrequency:
use std::mem;
use winapi::um::profileapi::QueryPerformanceFrequency;
fn freq() -> u64 {
unsafe {
let mut freq = mem::zeroed();
QueryPerformanceFrequency(&mut freq);
*freq.QuadPart() as u64
}
}
fn main() {
println!("Hello, world!");
}
[dependencies]
winapi = { version = "0.3.8", features = ["profileapi"] }
hi-resolution MONOTONIC timer
I would use Instant as a monotonic timer and assume it's high-enough precision until proven otherwise.

Related

problem in synchronization of timer0 and CPU clock cycles

I am a beginner in AVR. I need to sample input at odd intervals of 8 ms. i have used CTC mode for generating 8 ms timer. i used CTC with compare interrupt so that i can get a flag (timer_count) set at every comparison. i.e. after every 8 ms. The 8 ms timer starts on External Interrupt at PIN D0.
when i am checking the input conditions in main loop, due to large difference in frequency of main controller (18.432 MHz) and 8 ms timer, i am unable to sample inputs correctly. Can anyone tell me any other method to do this. The code is pasted here for reference.
#include <mega128.h>
#include <stdio.h>
#include <stdlib.h>
#define CHECK_BIT(ADDRESS,BIT) (ADDRESS & (1<<BIT))
#define SET_BIT(ADDRESS,BIT) (ADDRESS |= (1<<BIT))
#define CLEAR_BIT(ADDRESS,BIT) (ADDRESS &= (~(1<<BIT)))
#define TGL_BIT(ADDRESS, BIT) (ADDRESS ^= (1<<BIT))
volatile unsigned int flag;
volatile unsigned int timer_count=0;
volatile unsigned int frequency_979_sense;
volatile unsigned int frequency_885_sense;
volatile unsigned int frequency_933_sense;
volatile unsigned int flag_979_received;
volatile unsigned int flag_885_received;
volatile unsigned int data;
volatile unsigned int i;
volatile unsigned char SOP_valid;
volatile unsigned int previous_state=0;
volatile unsigned int current_state=0;
// Timer 0 output compare interrupt service routine
interrupt [TIM0_COMP] void timer0_comp_isr(void)
{
timer_count++;
}
void init_timer0()
{
// Timer/Counter 0 initialization
// Clock source: System Clock
// Clock value: 18.000 kHz
// Mode: CTC top=OCR0
// OC0 output: toggle output on compare match
ASSR=0x00;
TCCR0=0x1F;
TCNT0=0x00;
OCR0=0x90;
// Timer(s)/Counter(s) Interrupt(s) initialization
TIMSK=0x02;
}
// External Interrupt 0 service routine
interrupt [EXT_INT0] void ext_int0_isr(void)
{
if ((CHECK_BIT(PIND,4)==0) & (CHECK_BIT(PIND,5)==0))
{
init_timer0();
flag=1;
CLEAR_BIT(EIMSK,0);
CLEAR_BIT(EIFR,0);
}
}
void main(void)
{
// Port D initialization
PORTD=0xFF;
DDRD=0x00;
// External Interrupt(s) initialization
// INT0: On
// INT0 Mode: Rising Edge
EICRA=0x03;
EICRB=0x00;
EIMSK=0x01;
EIFR=0x01;
// Global enable interrupts
#asm("sei")
while (1)
{
while (timer_count > 0 & timer_count<32)
{
if (timer_count%2==1)
{
frequency_979_sense= CHECK_BIT(PIND,0);
frequency_885_sense= CHECK_BIT(PIND,4);
frequency_933_sense= CHECK_BIT(PIND,5);
if ((frequency_979_sense != 0) && (frequency_885_sense == 0) && (frequency_933_sense == 0) && (flag_885_received==0 || flag_885_received== 8))
{
flag_979_received++;
SET_BIT(data,i);
}
if ((frequency_979_sense == 0) && (frequency_885_sense != 0) && (frequency_933_sense == 0) && (flag_979_received==6))
{
flag_885_received++;
SET_BIT(data,i);
}
else
{
flag_979_received=0;
flag_885_received=0;
frequency_979_sense=0;
frequency_885_sense=0;
frequency_933_sense=0;
data=0;
TCCR0=0x00;
TIMSK=0x00;
timer_count=0;
i=0;
SET_BIT(EIMSK,0);
SET_BIT(EIFR,0);
}
}
i++;
}
if (data==65535)
{
SOP_valid=1;
}
}
}

std::chrono::high_resolution_clock results get affected due the change in the code beyond the scope of the clock

I am trying to compare the run-time taken by two Library Boost and ROOT library. while doing so I am using the high resolution std::chrono::high_resolution_clock.
The result's changes if I changes part of the codes which is beyond the scope of the function where the clock is. I would expect the clock's outout remains invariant.
Here is piece of code
#include <boost/histogram.hpp>
#include <random>
#include <chrono>
#include <fmt/format.h>
#include <fmt/chrono.h>
#include <iostream>
#include "TH1F.h"
#include "TH2F.h"
/*
g++ TestSpeed.C -O2 -g -ggdb $(root-config --cflags) $(root-config --libs) -lfmt -o compare -I /home/sbhawani/alice/sw/ubuntu2004_x86-64/boost/latest/include/ -I /home/sbhawani/alice/sw/ubuntu2004_x86-64/fmt/latest/include/ -L /home/sbhawani/alice/sw/ubuntu2004_x86-64/fmt/latest/lib/
*/
void compare_boost_rootOriginal()
{
using timer = std::chrono::high_resolution_clock ;
namespace bh = boost::histogram;
// ===| binning |=============================================================
const int nBins = 100;
const float xMin = 0.;
const float xMax = 1.;
const size_t inputValues = 1000000;
// ===| histograms |==========================================================
auto h1Boost = bh::make_histogram(bh::axis::regular<>(nBins, xMin, xMax, "random"));
TH1F h1Root = TH1F("h", ";random", nBins, xMin, xMax);
// ===| input values |========================================================
std::random_device rd; // Will be used to obtain a seed for the random number engine
std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd()
std::uniform_real_distribution<> dis(.0, 1.0);
std::vector<float> randomValues(inputValues);
for (auto &val : randomValues)
{
val = dis(gen);
}
// ===| fill root |===========================================================
auto startRoot = timer::now();
for (const auto &val : randomValues)
{
h1Root.Fill(val);
}
auto stopRoot = timer::now();
std::chrono::duration<float> timeRoot = stopRoot - startRoot;
// ===| fill boost |==========================================================
auto startBoost = timer::now();
for (const auto &val : randomValues)
{
h1Boost(val);
}
auto stopBoost = timer::now();
std::chrono::duration<float> timeBoost = stopBoost - startBoost;
// ===| output |==============================================================
fmt::print("time for boost: {}, time for root: {}, ratio: {}\n", timeBoost, timeRoot, timeBoost / timeRoot);
}
void TestCheck()
{
const int nEvtDim = 1000000;
const int nBins = 100;
const float r_low = 0.0; //general lower range of the axis
const float r_high = 1.0; //general upper range of the axis
using timer = std::chrono::high_resolution_clock ;
namespace bh = boost::histogram;
std::vector<float> Xvalues(nEvtDim);
// ===| input values |========================================================
std::random_device rd; // Will be used to obtain a seed for the random number engine
std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd()
std::uniform_real_distribution<> dis(0.0, r_high);
for (auto &val : Xvalues)
{
val = dis(gen);
}
auto nHisto = bh::make_histogram(bh::axis::regular<>(nBins, r_low, r_high, "random"),bh::axis::regular<>(nBins, r_low, r_high, "random"));
auto startBHn = timer::now();
for (const auto &xval : Xvalues)
{
nHisto(xval,xval);
}
auto stopBHn = timer::now();
std::chrono::duration<float> timeBoost = stopBHn - startBHn;
TH2F h2D = TH2F("h2D", ";h title", nBins, r_low, r_high, nBins, r_low, r_high);
auto startTHn = timer::now();
for (const auto &xval : Xvalues)
{
h2D.Fill(xval,xval);
}
auto stopTHn = timer::now();
std::chrono::duration<float> timeRoot = stopTHn - startTHn;
fmt::print("time for boost: {}, time for root: {}, ratio: {}\n", timeBoost, timeRoot, timeBoost / timeRoot);
}
int main()
{
compare_boost_rootOriginal();
}
I get different number(ratio of time taken by the root 1D/time taken by Boost 1D) in two scenarios
run the code with commenting function void TestCheck() L65 - L102
time for boost: 0.00459109s, time for root: 0.00795637s, ratio: 0.5770334
run the macro without commenting the function void TestCheck()
time for boost: 0.0063973s, time for root: 0.0080162s, ratio: 0.7980463
However I don't call function void TestCheck() in the int main(). It's bit surprising that just a definition of another function body affects the clock inside another function compare_boost_rootOriginal() which has nothing to do with TestCheck().
I have not much of experience with compilers, but I would expect this number remain invariant irrespective of how I change the code outside the scope of function void compare_boost_rootOriginal()
Moreover this number also changes a lot while running the same executable several times I also would not expect the ratio change while running same executable several times.

Is there a way to help auto-vectorizing compiler to emit saturation arithmetic intrinsic in LLVM?

I have a few for loops that does saturated arithmetic operations.
For instance:
Implementation of saturated add in my case is as follows:
static void addsat(Vector &R, Vector &A, Vector &B)
{
int32_t a, b, r;
int32_t max_add;
int32_t min_add;
const int32_t SAT_VALUE = (1<<(16-1))-1;
const int32_t SAT_VALUE2 = (-SAT_VALUE - 1);
const int32_t sat_cond = (SAT_VALUE <= 0x7fffffff);
const uint32_t SAT = 0xffffffff >> 16;
for (int i=0; i<R.length; i++)
{
a = static_cast<uint32_t>(A.data[i]);
b = static_cast<uint32_t>(B.data[i]);
max_add = (int32_t)0x7fffffff - a;
min_add = (int32_t)0x80000000 - a;
r = (a>0 && b>max_add) ? 0x7fffffff : a + b;
r = (a<0 && b<min_add) ? 0x80000000 : a + b;
if ( sat_cond == 1)
{
std_max(r,r,SAT_VALUE2);
std_min(r,r,SAT_VALUE);
}
else
{
r = static_cast<uint16_t> (static_cast<int32_t> (r));
}
R.data[i] = static_cast<uint16_t>(r);
}
}
I see that there is paddsat intrinsic in x86 that could have been the perfect solution to this loop. I do get the code auto vectorized but with a combination of multiple operations according to my code. I would like to know what could be the best way to write this loop that auto-vectorizer finds the addsat operation match right.
Vector structure is:
struct V {
static constexpr int length = 32;
unsigned short data[32];
};
Compiler used is clang 3.8 and code was compiled for AVX2 Haswell x86-64 architecture.

pwm value not changing

I have written a pwm code for Atmega128. I am using fast pwm mode with non-inverting pulse on compare match and I need to change the OCR0 value at certain times. Yet it doesn't change. Anyone knows what is the problem here ??
#include <avr/interrupt.h>
#include <avr/io.h>
uint8_t tick_1sec;
void timer1_init(void) // 1 second timer
{
OCR1A = 15624;
TIMSK |= (1<<OCIE1A);
TCCR1B = (1<<WGM12); //CTC mode
TCCR1B |= (1<<CS12)|(0<<CS11)|(1<<CS10);
}
ISR(TIMER1_COMPA_vect) //1 second interrupt
{
cli();
tick_1sec = 1;
sei();
}
void timer0_init(void) // fast pwm with OC0 non-inverting mode
{
TCCR0 = (1<<FOC0)|(1<<WGM01)|(1<<WGM00);
TCCR0 |= (1<<COM01)|(0<<COM00);
TCCR0 |= (1<<CS02)|(1<<CS01)|(1<<CS00);
OCR0 = 63;
TIMSK |= (1<<OCIE0);
}
int main(void)
{
uint8_t t = 0;
DDRB = 0xFF;
timer0_init();
timer1_init();
sei();
while(1){
if (tick_1sec)
{
tick_1sec = 0;
t++;
if (t == 10){
OCR0 = 127;
}
else if (t == 20){
OCR0 = 191;
}
else if (t == 30){
OCR0 = 63;
t = 0;
}
}
}
return 0;
}
Things to check:
I recommend declaring tick_1sec as volatile to prevent the compiler of hyper-optimizing that register.
What is your clock frequency? Your ISR will deliver 1s calls only if your CPU frequency is 16MHz (==> 16.000.000 / 1024 / 15624)
You might have a LED in your hardware which you can invert from a) the ISR b) within the first if () in main to see if this is ever reached.
update: "volatile"
The link provided by #skyrift in his comment is very worth reading.
When you use Atmel Studio, compile your code once with/without the volatile keyword and compare what the compiler is doing ==> Solution explorer / Output Files / *.lss ... you will see each C statement and how the compiler converts it to machine code ... an exercise worth once in a while when working with micros ...

Boost.Variant Vs Virtual Interface Performance

I'm trying to measure a performance difference between using Boost.Variant and using virtual interfaces. For example, suppose I want to increment different types of numbers uniformly, using Boost.Variant I would use a boost::variant over int and float and a static visitor which increments each one of them. Using class interfaces I would use a pure virtual class number and number_int and number_float classes which derive from it and implement an "increment" method.
From my testing, using interfaces is far faster than using Boost.Variant.
I ran the code at the bottom and received these results:
Virtual: 00:00:00.001028
Variant: 00:00:00.012081
Why do you suppose this difference is? I thought Boost.Variant would be a lot faster.
** Note: Usually Boost.Variant uses heap allocations to guarantee that the variant would always be non-empty. But I read on the Boost.Variant documentation that if boost::has_nothrow_copy is true then it doesn't use heap allocations which should make things significantly faster. For int and float boost::has_nothrow_copy is true.
Here is my code for measuring the two approaches against each other.
#include <iostream>
#include <boost/variant/variant.hpp>
#include <boost/variant/static_visitor.hpp>
#include <boost/variant/apply_visitor.hpp>
#include <boost/date_time/posix_time/ptime.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/date_time/posix_time/posix_time_io.hpp>
#include <boost/format.hpp>
const int iterations_count = 100000;
// a visitor that increments a variant by N
template <int N>
struct add : boost::static_visitor<> {
template <typename T>
void operator() (T& t) const {
t += N;
}
};
// a number interface
struct number {
virtual void increment() = 0;
};
// number interface implementation for all types
template <typename T>
struct number_ : number {
number_(T t = 0) : t(t) {}
virtual void increment() {
t += 1;
}
T t;
};
void use_virtual() {
number_<int> num_int;
number* num = &num_int;
for (int i = 0; i < iterations_count; i++) {
num->increment();
}
}
void use_variant() {
typedef boost::variant<int, float, double> number;
number num = 0;
for (int i = 0; i < iterations_count; i++) {
boost::apply_visitor(add<1>(), num);
}
}
int main() {
using namespace boost::posix_time;
ptime start, end;
time_duration d1, d2;
// virtual
start = microsec_clock::universal_time();
use_virtual();
end = microsec_clock::universal_time();
// store result
d1 = end - start;
// variant
start = microsec_clock::universal_time();
use_variant();
end = microsec_clock::universal_time();
// store result
d2 = end - start;
// output
std::cout <<
boost::format(
"Virtual: %1%\n"
"Variant: %2%\n"
) % d1 % d2;
}
For those interested, after I was a bit frustrated, I passed the option -O2 to the compiler and boost::variant was way faster than a virtual call.
Thanks
This is obvious that -O2 reduces the variant time, because that whole loop is optimized away. Change the implementation to return the accumulated result to the caller, so that the optimizer wouldn't remove the loop, and you'll get the real difference:
Output:
Virtual: 00:00:00.000120 = 10000000
Variant: 00:00:00.013483 = 10000000
#include <iostream>
#include <boost/variant/variant.hpp>
#include <boost/variant/static_visitor.hpp>
#include <boost/variant/apply_visitor.hpp>
#include <boost/date_time/posix_time/ptime.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/date_time/posix_time/posix_time_io.hpp>
#include <boost/format.hpp>
const int iterations_count = 100000000;
// a visitor that increments a variant by N
template <int N>
struct add : boost::static_visitor<> {
template <typename T>
void operator() (T& t) const {
t += N;
}
};
// a visitor that increments a variant by N
template <typename T, typename V>
T get(const V& v) {
struct getter : boost::static_visitor<T> {
T operator() (T t) const { return t; }
};
return boost::apply_visitor(getter(), v);
}
// a number interface
struct number {
virtual void increment() = 0;
};
// number interface implementation for all types
template <typename T>
struct number_ : number {
number_(T t = 0) : t(t) {}
virtual void increment() { t += 1; }
T t;
};
int use_virtual() {
number_<int> num_int;
number* num = &num_int;
for (int i = 0; i < iterations_count; i++) {
num->increment();
}
return num_int.t;
}
int use_variant() {
typedef boost::variant<int, float, double> number;
number num = 0;
for (int i = 0; i < iterations_count; i++) {
boost::apply_visitor(add<1>(), num);
}
return get<int>(num);
}
int main() {
using namespace boost::posix_time;
ptime start, end;
time_duration d1, d2;
// virtual
start = microsec_clock::universal_time();
int i1 = use_virtual();
end = microsec_clock::universal_time();
// store result
d1 = end - start;
// variant
start = microsec_clock::universal_time();
int i2 = use_variant();
end = microsec_clock::universal_time();
// store result
d2 = end - start;
// output
std::cout <<
boost::format(
"Virtual: %1% = %2%\n"
"Variant: %3% = %4%\n"
) % d1 % i1 % d2 % i2;
}

Resources