CUDA 3.0 and cmake and emulation mode

CUDA 3.0 and cmake and emulation mode - macos

I'm trying to use CUDA with cmake (v 2.8) on my Mac (OSX 10.6). So far it works fine, I created a small sample just to try it out (see below). However when I switch on emulation mode, it cannot invoke the CUDA kernel anymore and I get the following error message:
Cuda error: kernel invocation: invalid device function .
I also tried to compile it by invoking nvcc by hand and didn't get the error message, so I think it could be a problem with cmake.
I also noticed that emulation mode is deprecated in CUDA 3.0. Why is this? Nvidia points out in their release notes, that they provide Nexus for VS and cuda-gdb on Linux. But what about OSX? I could not find cuda-gdb in the OSX version I installed here..?!
Below the files
CMakeLists.txt
cmake_minimum_required(VERSION 2.8)
project (test)
find_package(CUDA)
add_definitions(-Wall)
# Use CUDA emulator?
set(CUDA_BUILD_EMULATION ON)
set(CUDA_64_BIT_DEVICE_CODE OFF) # Does not work on a Mac currently
set(CMAKE_C_FLAGS -m32)
set(CMAKE_CXX_FLAGS -m32)
set(CUDA_VERBOSE_BUILD ON)
include_directories("${PROJECT_BINARY_DIR}")
cuda_add_executable(test
test.cu
)
test.cu
#include <cuda.h>
#include <stdlib.h>
#include <stdio.h>
#include "test_kernel.cu"
void checkCUDAError(const char *msg);
int main( int argc, const char** argv )
{
int n = 3;
float* a_h;
a_h = (float *)malloc(sizeof(float)*n);
float* a_d;
cudaMalloc((void**) &a_d, sizeof(float)*n);
hello<<<1,128>>>(a_d, n);
checkCUDAError("kernel invocation");
checkCUDAError("memcpy");
free(a_h);
cudaFree(a_d);
return 0;
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg,
cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
test_kernel.cu
#include <stdio.h>
__global__ void hello(float*a, int i)
{
int j = i+1;
#ifdef _DEVICEEMU
printf("Hello.\n");
#endif
}

See
http://forums.nvidia.com/index.php?showtopic=166570&st=0&p=1043250&#entry1043250

Related

How to use SYSVIPC in MSYS2?

I'm using package msys2/gcc in a 64-bit MSYS2 installation, under Windows 10 64-bit.
Sample C program:
#include <stdio.h>
#include <errno.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/stat.h>
int main(void)
{
uint64_t key = ftok("shm.exe", 8);
printf("%llx\n", key);
int r = shmget(key, 1024, S_IRUSR|S_IWUSR|IPC_CREAT);
if ( r == -1 )
printf("FAIL %d\n", errno);
else
printf("OK\n");
}
When I run this from the MSYS2 shell I get FAIL 88 which is ENOSYS indicating that shmget is not implemented.
Is it possible to make the call work? The full program also uses semget and semop.
From googling it seems there is a lot of source code in MSYS2 related to SYSV IPC but perhaps something needs to be enabled somewhere.

Can´t make Qt, Visual C++2015 and openCV work together in 64bit

I´m having trouble with a very simple console project. I am using latest version of MSVC 2015, openCV 3.2 an Qt 5.71 all in 64bit and windows 10 64bit.
The Qt .pro file is:
TEMPLATE = app
CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt
SOURCES += main.cpp
win32 {
INCLUDEPATH += "C:\OpenCV3.2\opencv\build\include" \
CONFIG(debug,debug|release) {
LIBS += -L"C:\OpenCV3.2\opencv\build\x64\vc14\lib" \
-lopencv_world320d
}
CONFIG(release,debug|release) {
LIBS += -L"C:\OpenCV3.2\opencv\build\x64\vc14\lib" \
-lopencv_world320
}
}
and the source code is:
#include <iostream>
#include <opencv2\\core\\core.hpp>
#include <opencv2\\highgui\\highgui.hpp>
#include <opencv2\\imgproc\\imgproc.hpp>
using namespace std;
using namespace cv;
Mat imLena; //<-- INCLUDING THIS LINE MAKE THE PROGRAM CRASH
int main(int argc, char *argv[])
{
cout << "Hello World!" << endl;
return 0;
}
This simple program compiles an run perfectly but when I add de line "Mat imLena;" it still compiles perfectly but crashes when I run it. I am very confused. Any tip, idea or suggestion?

how to compile Cuda source with Go language's cgo?

I wrote a simple program in cuda-c and it works on eclipse nsight. This is source code:
#include <iostream>
#include <stdio.h>
__global__ void add( int a,int b, int *c){
*c = a + b;
}
int main(void){
int c;
int *dev_c;
cudaMalloc((void**)&dev_c, sizeof(int));
add <<<1,1>>>(2,7,dev_c);
cudaMemcpy(&c, dev_c, sizeof(int),cudaMemcpyDeviceToHost);
printf("\n2+7= %d\n",c);
cudaFree(dev_c);
return 0;
}
Now I'm trying to use this code with Go language with cgo!!!
So I wrote this new code:
package main
//#include "/usr/local/cuda-7.0/include/cuda.h"
//#include "/usr/local/cuda-7.0/include/cuda_runtime.h"
//#cgo LDFLAGS: -lcuda
//#cgo LDFLAGS: -lcurand
////default location:
//#cgo LDFLAGS: -L/usr/local/cuda-7.0/lib64 -L/usr/local/cuda-7.0/lib
//#cgo CFLAGS: -I/usr/local/cuda-7.0/include/
//
//
//
//
//
//
//
//
//
//
/*
#include <stdio.h>
__global__ void add( int a,int b, int *c){
*c = a + b;
}
int esegui_somma(void){
int c;
int *dev_c;
cudaMalloc((void**)&dev_c, sizeof(int));
add <<<1,1>>> (2,7,dev_c);
cudaMemcpy(&c, dev_c, sizeof(int),cudaMemcpyDeviceToHost);
cudaFree(dev_c);
return c;
}
*/
import "C"
import "fmt"
func main(){
fmt.Printf("il risultato è %d",C.esegui_somma)
}
But it doesn't work!!
I read this error message:
cgo_cudabyexample_1/main.go:34:8: error: expected expression before '<' token
add <<<1,1>>> (2,7,dev_c);
^
I think that I must to set nvcc cuda compiler for cgo instead of gcc.
How can I do it? Can I change CC environment variable?
best regards

I finally figured out how to do this. Thing biggest problem is that nvccdoes not follow gcc standard flags and unlike clang it won't silently ignore them. cgo triggers the problem by adding a bunch of flags not explicitly specified by the user.
To make it all work, you'll need to separate out your device code and the functions that directly call it into separate files and compile/package them directly using nvcc into a shared library (.so). Then you'll use cgo to link this shared library using whatever default linker you have on your system. The only thing you'll have to add is -lcudart to your LDFLAGS (linker flags) to link the CUDA runtime.

CUDA 5.0 "Generate Relocatable Device Code" leads to invalid device symbol error

I am trying to do separate compilation using CUDA 5. For this reason I set the "Generate Relocatable Device Code" to "Yes (-rdc=true)" in Visual Studio 2010. The program compiles without errors, however,
I get an invalid device symbol error when I try to initialize device constants using cudaMemcpyToSymbol.
i.e. I have the following constant
__constant__ float gdDomainOrigin[2];
and try to initialize it with
cudaMemcpyToSymbol(gdDomainOrigin, mDomainOrigin, 2*sizeof(float));
which leads to the error. The error does not occur, when I compile everything as a whole, without the aforementioned option set. Could anybody please help me with that?

I can't reproduce this. If build an application from two .cu files, one containing a __constant__ symbol and a simple kernel, and the other containing the runtime API incantations to populate that constant memory and call the kernel, it works only when relocatable device code is enabled, viz:
__constant__ float gdDomainOrigin[2];
__global__
void kernel(float *inout)
{
inout[0] = gdDomainOrigin[0];
inout[1] = gdDomainOrigin[1];
}
and
#include <cstdio>
extern __constant__ float gdDomainOrigin;
extern __global__ void kernel(float *);
inline
void gpuAssert(cudaError_t code, char * file, int line, bool Abort=true)
{
if (code != 0) {
fprintf(stderr, "GPUassert: %s %s %d\n",
cudaGetErrorString(code),file,line);
if (Abort) exit(code);
}
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
int main(void)
{
const float mDomainOrigin[2] = { 1.234f, 5.6789f };
const size_t sz = sizeof(float) * size_t(2);
float * dbuf, * hbuf;
gpuErrchk( cudaFree(0) );
gpuErrchk( cudaMemcpyToSymbol(gdDomainOrigin, mDomainOrigin, sz) );
gpuErrchk( cudaMalloc((void **)&dbuf, sz) );
kernel<<<1,1>>>(dbuf);
gpuErrchk( cudaPeekAtLastError() );
hbuf = new float[2];
gpuErrchk( cudaMemcpy(hbuf, dbuf, sz, cudaMemcpyDeviceToHost) );
fprintf(stdout, "%f %f\n", hbuf[0], hbuf[1]);
return 0;
}
Compiling and running these in CUDA 5 on a 64 bit linux system with a Kepler GPU produces the following:
$ nvcc -arch=sm_30 -o shared shared.cu shared_dev.cu
$ ./shared
GPUassert: invalid device symbol shared.cu 23
$ nvcc -arch=sm_30 -rdc=true -o shared shared.cu shared_dev.cu
$ ./shared
1.234000 5.678900
You can see that in the first compilation, without relocatable GPU code generation, the symbol isn't found. In the second case, with relocatable GPU code generation, it is found, and the elf header in the object file looks just as you would expect:
$ nvcc -arch=sm_30 -rdc=true -c shared_dev.cu
$ cuobjdump -symbols shared_dev.o
Fatbin elf code:
================
arch = sm_30
code version = [1,6]
producer = cuda
host = linux
compile_size = 64bit
identifier = shared_dev.cu
symbols:
STT_SECTION STB_LOCAL .text._Z6kernelPf
STT_SECTION STB_LOCAL .nv.constant3
STT_SECTION STB_LOCAL .nv.constant0._Z6kernelPf
STT_CUDA_OBJECT STB_LOCAL _param
STT_SECTION STB_LOCAL .nv.callgraph
STT_FUNC STB_GLOBAL _Z6kernelPf
STT_CUDA_OBJECT STB_GLOBAL gdDomainOrigin
Fatbin ptx code:
================
arch = sm_30
code version = [3,1]
producer = cuda
host = linux
compile_size = 64bit
compressed
identifier = shared_dev.cu
ptxasOptions = --compile-only
Perhaps you could try my code and compilation/diagnostic steps and see what happens with your Windows toolchain.

Can't Build a simple Cuda Program using Xcode !

I'm using Xcode 3.2 on Mac OS 10.6 to build a very simple HelloWorld program for CUDA
but it fails to build .. any ideas !!!
this is the code :
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <CUDA/CUDA.h>
__device__ char napis_device[14];
__global__ void helloWorldOnDevice(void){
napis_device[0]='H';
napis_device[1]='e';
napis_device[2]='l';
napis_device[3]='l';
napis_device[4]='o';
napis_device[5]=' ';
napis_device[6]='W';
napis_device[7]='o';
napis_device[8]='r';
napis_device[9]='l';
napis_device[10]='d';
napis_device[11]='\n';
}
int main (int argc, char * const argv[]) {
helloWorldOnDevice<<<1,1>>> ();
cudaThreadSynchronize();
char napis_host[14];
const char *symbol="napis device";
cudaMemcpyFromSymbol (napis_host, symbol, sizeof(char)*13, 0, cudaMemcpyDeviceToHost);
return 0;
}
The error appears at this line
helloWorldOnDevice<<<1,1>>> ();
Expected primary-expression before '<' token !!!!!!

You're compiling your program with gcc coming with Xcode. Should use nvcc compiler instead to compile CUDA code. Normally I would use a Makefile to tell that *.cu to be compiled by nvcc and *.cpp by gcc, then link produced objects to an executable.

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

CUDA 3.0 and cmake and emulation mode - macos

See http://forums.nvidia.com/index.php?showtopic=166570&st=0&p=1043250&#entry1043250

Related

How to use SYSVIPC in MSYS2?

Can´t make Qt, Visual C++2015 and openCV work together in 64bit

how to compile Cuda source with Go language's cgo?

CUDA 5.0 "Generate Relocatable Device Code" leads to invalid device symbol error

Can't Build a simple Cuda Program using Xcode !

Categories

Resources