How to compile the exp bechmark in riscv-test - gcc

I have built simple program to calculate the exp of value. I got error;
#include <stdint.h>
#include "util.h"
#include <math.h>
#include <stdio.h>
int main() {
double value = -150;
Start_Timer();
for(int i=0; i<500 ;i++){
result = exp(value);
value++;
}
Stop_Timer();
User_Time=End_Time-Begin_Time;
printf("User_Time: %ld - %ld = %ld - \n", End_Time,Begin_Time,User_Time);
printf("The Exponential of %ld is %ld\n", value, result);
return 0;
}
Any idea how to use exp in Benchmark for testing.
i have figured out that exp function need -x and -lm for compiling. How can i use them in the test
C Failing to compile: Can't find math.h functions
I tried to edit the makefile in riscv-test/benchmark but i think , it is little bit tricky for me.
Error Message:https://github.com/riscv/riscv-tests/issues/142

Related

Why is C++ so much faster than C in this code?

My C code is:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void){
char* a = (char*)malloc(200000);
for (int i = 0;i< 100000;i++){
strcat(a,"b");
}
printf("%s",a);
}
My C++ code is
#include <iostream>
int main(void){
std::string a = "";
for (int i = 0;i< 100000;i++){
¦ a+="b";
}
std::cout<<a;
}
On my machine, the C code runs in about 5 seconds, while on my machine, the C++ code runs in 0.025! seconds.
Now, the C code doesn't check for overflows, has no C++ overhead, classes and yet is quite a few magnitudes slower than my C++ code.
Using gcc/g++ 6.2.0 compiled with -O3 on Raspberry Pi.
#erwin was correct.
When I change my code to
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void mystrcat(char* src,char* dest,int lenSrc){
src[lenSrc]=dest[0];
}
int main(void){
char* a = (char*)malloc(200000);
for (int i = 0;i< 100000;i++){
mystrcat(a,"b",i);
}
a[100000] = 0;
printf("%s\n",a);
}
It takes about .012s to run (mostly printing the large screen).
Shlemiel's the painter's algorithm at work!

Multiplication - Matrix by imaginary unit

I would like to ask if anybody knows why this is not working:
For example, let
SparseMatrix<int> A
and
SparseMatrix<std::complex<float> > B
I would like to do the following math:
B=i*A
As code:
std::complex<float> c;
c=1.0i;
B=A.cast<std::complex<float> >()*c;
or equivalent:
B=A.cast<std::complex<float> >()*1.0i;
I expect all real values of A to be imaginary in B but
there are only zeros as (0,0).
Example:
#include <Eigen/Sparse>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
using namespace Eigen;
using std::cout;
using std::endl;
int main(int argc, char *argv[]){
int rows=5, cols=5;
SparseMatrix<int> A(rows,cols);
A.setIdentity();
SparseMatrix<std::complex<float> > B;
std::complex<float> c;
c=1i;
B=A.cast<std::complex<float> >()*1.0i;
//B=A.cast<std::complex<float> >()*c;
cout << B << endl;
return 0;
}
compile with:
g++ [name].cpp -o [name]
What am I doing wrong?
Thanks a lot for any help!
You need to enable c++14 to get 1.0i working as expected. With GCC or clang, you need to add the -std=c++14 compiler option.
Then, you can simply do:
MatrixXd A = MatrixXd::Random(3,3);
MatrixXcd B;
B = A * 1.0i;
Same with a SparseMatrix.

How to use copy_to_user

I'm trying to add a custom system call into the linux kernel. Here is a simple code:
#include <linux/mysyscall.h>
#include <linux/kernel.h>
#include <asm/uaccess.h>
#include <asm/system.h>
asmlinkage int sys_mysyscall(int *data){
int a = 3;
cli();
copy_to_user(data, &a, 1);
sti();
printk(KERN_EMERG "Called with %d\n", a);
return a;
}
I can compile a kernel with mysyscall added and when I try to access it with a user program like:
#include <linux/mysyscall.h>
int main(void){
int *data;
int r;
int a = 0;
data = &a;
r = mysyscall(data);
printf("r is %d and data is %d", r, *data);
}
*data does not equal to 3 it equals to 0.
How should I use copy_to_user to fix it?
The copy to user line of code copies only one byte from 'a'. In case of little endian systems it is going to be 0. Copy all the 4 bytes to get the correct result.

Cuda error on compiling: identifier "cudamalloc" is undefined

I have a CUDA C code, when I try to compile it, nvcc gives me an error with an undefined identifier error: identifier "cudamalloc" is undefined, identifier "cudamemcpy" is undefined.
I'm running Windows 7 with Visual Studio 10 and CUDA Toolkit 4.0
I have installed Cuda on drive "C" and Visual Studio on drive "E" but im not sure that it is the problem.
I use this command to compile:
nvcc -o ej1b ej1b.cu
and this is my program:
#include <cuda.h>
#include <cstdio>
#include <cuda_runtime_api.h>
#include <device_functions.h>
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
const int N = 512;
const int C = 5;
void init_CPU_array(int vec[],const int N){
unsigned int i;
for(i = 0; i < N; i++) {
vec[i] = i;
}
}
__global__ void kernel(int vec[],const int N, const int C){
int id = blockIdx.x * blockDim.x + threadIdx.x;
if(id<N)
vec[id] = vec[id] * C;
}
int main(){
int vec[N];
int vecRES[N];
int *vecGPU;
unsigned int cantaloc=N*sizeof(int);
init_CPU_array(vec,N);
cudamalloc((void**)&vecGPU,cantaloc);
cudamemcpy(vecGPU,vec,cantaloc,cudaMemcpyHostToDevice);
dim3 dimBlock(64);
dim3 dimGrid((N + dimBlock.x - 1) / dimBlock.x);
printf("-> Variable dimBlock.x = %d\n",dimBlock.x);
kernel<<<dimGrid, dimBlock>>>(vecGPU, N, C);
cudaThreadSynchronize();
cudamemcpy(vecRES,vecGPU,cantaloc,cudaMemcpyDeviceToHost);
cudaFree(vecGPU);
printf("%s \n","-> Resultados");
int i;
for(i=0;i<10;i++){
printf("%d ",vecRES[i]);
printf("%d \n",vec[i]);
}
return 0;
I used all those #include because I don't know where the problem is.
If you read the documentation, you will find the API calls are cudaMalloc and cudaMemcpy. C and C++ are case sensitive languages and you have the names incorrect.

Implement a random-number generator using only getpid() and gettimeofday()?

I am using gcc compiler to Implement a random-number generator using only getpid() and gettimeofday(). Here is my code
#include <stdio.h>
#include <sys/time.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
struct timeval tv;
int count;
int i;
int INPUT_MAX =10;
int NO_OF_SAMPLES =10;
gettimeofday(&tv, NULL);
printf("Enter Max: \n");
scanf("%d", &INPUT_MAX);
printf("Enter No. of samples needed: \n");
scanf("%d", &NO_OF_SAMPLES);
/*printf("%ld\n",tv.tv_usec);
printf("PID :%d\n", getpid());*/
for (count = 0; count< NO_OF_SAMPLES; count++) {
printf("%ld\n", (getpid() * tv.tv_usec) % INPUT_MAX + 1);
for (i = 0; i < 1000000; ++i)
{
/* code */
}
}
return 0;
}
I gave a inner for loop for delay purpose but the result what i am getting is always same no. like this
./a.out
Enter Max:
10
Enter No. of samples needed:
10
1
1
1
1
1
1
1
1
1
1
Plz correct me what am i doing wrong?
getpid() is constant during the programs execution, so you get constant values, too.
But even if you use gettimeofday() inside the loop, this likely won't help:
gcc will likely optimize away your delay loop.
even it it's not optimized away, the delays will be very similar and your values won't be very random.
I'd suggest you look up "linear congruential generator", for a simple way to generate more random numbers.
Put gettimeofday in the loop. Look if getpid() is divisible by INPUT_MAX + 1 you will get the same answer always. Instead you can add getpid() (not make any sense though()) to tv.tv_usec.

Resources