I'm using Xcode 3.2 on Mac OS 10.6 to build a very simple HelloWorld program for CUDA
but it fails to build .. any ideas !!!
this is the code :
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <CUDA/CUDA.h>
__device__ char napis_device[14];
__global__ void helloWorldOnDevice(void){
napis_device[0]='H';
napis_device[1]='e';
napis_device[2]='l';
napis_device[3]='l';
napis_device[4]='o';
napis_device[5]=' ';
napis_device[6]='W';
napis_device[7]='o';
napis_device[8]='r';
napis_device[9]='l';
napis_device[10]='d';
napis_device[11]='\n';
}
int main (int argc, char * const argv[]) {
helloWorldOnDevice<<<1,1>>> ();
cudaThreadSynchronize();
char napis_host[14];
const char *symbol="napis device";
cudaMemcpyFromSymbol (napis_host, symbol, sizeof(char)*13, 0, cudaMemcpyDeviceToHost);
return 0;
}
The error appears at this line
helloWorldOnDevice<<<1,1>>> ();
Expected primary-expression before '<' token !!!!!!
You're compiling your program with gcc coming with Xcode. Should use nvcc compiler instead to compile CUDA code. Normally I would use a Makefile to tell that *.cu to be compiled by nvcc and *.cpp by gcc, then link produced objects to an executable.
Related
I've cloned the LuaJIT git repo and built it with:
make STATIC_CC="musl-gcc" BUILDMODE="static"
Then, I compiled a simple Lua "hello world" script into a C header file:
luajit -b test.lua test.h
test.h:
#define luaJIT_BC_test_SIZE 52
static const unsigned char luaJIT_BC_test[] = {
27,76,74,2,10,45,2,0,3,0,2,0,4,54,0,0,0,39,2,1,0,66,0,2,1,75,0,1,0,20,72,101,
108,108,111,32,102,114,111,109,32,76,117,97,33,10,112,114,105,110,116,0
};
After that, I wrote a simple C wrapper by following the official example, test.c:
#include <stdio.h>
#include <lua.h>
#include <lualib.h>
#include <lauxlib.h>
#include "test.h"
int main(void) {
int error;
lua_State *L = lua_open();
luaL_openlibs(L);
error = luaL_loadbuffer(L, (const char *) luaJIT_BC_test, luaJIT_BC_test_SIZE, "test") || lua_pcall(L, 0, 0, 0);
if (error) {
fprintf(stderr, "%s", lua_tostring(L, -1));
lua_pop(L, 1);
}
lua_close(L);
return 0;
}
But when I try to build it, it crashes with an error:
$ musl-gcc -static -ILuaJIT/src -LLuaJIT/src -o test test.c -lluajit
/usr/bin/ld: /usr/lib/gcc/x86_64-pc-linux-gnu/12.1.0/libgcc_eh.a(unwind-dw2-fde-dip.o): in function `_Unwind_Find_FDE':
(.text+0x1953): undefined reference to `_dl_find_object'
collect2: error: ld returned 1 exit status
It's related to libgcc, so I tried building everything with musl-clang, but still got the same error. Can someone explain what I'm missing here?
Figured it out - I needed to build LuaJIT with TARGET_XCFLAGS=-DLUAJIT_NO_UNWIND like so:
make STATIC_CC="musl-gcc" BUILDMODE="static" TARGET_XCFLAGS=-DLUAJIT_NO_UNWIND
I guess this just disables C++ exceptions support, but I'm not sure what the real implications are. Seems to work fine, for now.
On Mac OSX Mojave 10.14.6, the following simple code does not work anymore :
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
int main(int argc, char** argv)
{
int fd = open ("/dev/rdisk0", O_RDONLY);
if (fd == -1)
{
fprintf(stdout, "open(%s) error = %s\n", "/dev/rdisk0", strerror(errno));
fflush(stdout);
return 1;
}
return 0;
}
It gives :
open(/dev/rdisk0) error = Operation not permitted
This happens even when running the executable using sudo.
This code used to work under 10.13 and earlier versions.
Thinking this might be due to SIP, I gave the Terminal and the executable Full Disk Access but it didn't help.
Is there another way to get around this issue? How do I open /dev/rdisk0 now ?
Thanks in advance
#include <stdio.h>
#include <stdlib.h>
#include "ReadMethods.h"
int main(int argc,char * argv[])
{
DPDA WordChecker;
DPDA * WordCheckerPointer=&WordChecker;
WordChecker.DPDAFilename=(char*)malloc(25*sizeof(char));
WordChecker.DPDAInputFilename=(char*)malloc(25*sizeof(char));
WordChecker.DPDAOutputFilename=(char*)malloc(25*sizeof(char));
strcpy( WordChecker.DPDAFilename,argv[1]);
strcpy( WordChecker.DPDAInputFilename,argv[2]);
strcpy( WordChecker.DPDAOutputFilename,argv[3]);
readDPDA(argv[1],WordCheckerPointer);
readInputLines(argv[2],WordCheckerPointer,argv[3]);
return 0;
}
This is my code that gives error from mallocs until last strcpy() ,total 6 lines.The error is "DPDA has no member named DPDAFilename" and same for other fields for every malloc and strcpy linesthat i work on.Here is the part of header file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct tagRule{
char *startingState;
char symbolToPop;
char expectedInput;
char *endingState;
char symbolToPush;
}Rule;
typedef struct tagStackDPDA{
char * arrayOfSymbols;
int stackElementCount;
char * currentState;
}stackDPDA;
typedef struct tagDPDA{
char * alphabet;
char * stackSymbols;
char ** states;
char *startingState;
char **finalStates;
int finalStatesAmount;
Rule * ruleList;
stackDPDA stackOfDPDA;
int sizeArray[4];//This array holds amount values of states,alphabet symbols,stack symbols and transition rules
char *DPDAFilename;
char *DPDAInputFilename;
char *DPDAOutputFilename;
}DPDA;
The code works fine in codeblocks environment but in gcc (-Wall -ansi).Those filenames come from input text files yet i am not sure it can cause this error.
Edit:By the way I am using this command line to compile;
gcc -Wall -ansi main.c ReadMethods.h -o WordChecker
May be if you compile in C mode, you have to use C-style comments in header?
/**/ instead of //
I am trying to convert a c++ program I have which uses random library which is a C++11 feature. After having read through a couple of similar posts here, I tried by separating out the code into three files. At the outset I would like to say that I am not very conversant at C/C++ and mostly use R at work.
The main file looks as follows.
#ifndef _KERNEL_SUPPORT_
#define _KERNEL_SUPPORT_
#include <complex>
#include <random>
#include <iostream>
#include "my_code_header.h"
using namespace std;
std::default_random_engine generator;
std::normal_distribution<double> distribution(0.0,1.0);
const int rand_mat_length = 24561;
double rand_mat[rand_mat_length];// = {0};
void create_std_norm(){
for(int i = 0 ; i < rand_mat_length ; i++)
::rand_mat[i] = distribution(generator);
}
.
.
.
int main(void)
{
...
...
call_global();
return 0;
}
#endif
The header file looks as follows.
#ifndef mykernel_h
#define mykernel_h
void call_global();
void two_d_example(double *a, double *b, double *my_result, size_t length, size_t width);
#endif
And the .cu file looks like the following.
#ifndef _MY_KERNEL_
#define _MY_KERNEL_
#include <iostream>
#include "my_code_header.h"
#define TILE_WIDTH 8
using namespace std;
__global__ void two_d_example(double *a, double *b, double *my_result, size_t length, size_t width)
{
unsigned int row = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int col = blockIdx.x*blockDim.x + threadIdx.x;
if ((row>length) || (col>width)) {
return;
}
...
}
void call_global()
{
const size_t imageLength = 528;
const size_t imageWidth = 528;
const dim3 threadsPerBlock(TILE_WIDTH,TILE_WIDTH);
const dim3 numBlocks(((imageLength) / threadsPerBlock.x), ((imageWidth) / threadsPerBlock.y));
double *d_a, *d_b, *mys ;
...
cudaMalloc((void**)&d_a, sizeof(double) * imageLength);
cudaMalloc((void**)&d_b, sizeof(double) * imageWidth);
cudaMalloc((void**)&mys, sizeof(double) * imageLength * imageWidth);
two_d_example<<<numBlocks,threadsPerBlock>>>(d_a, d_b, mys, imageLength, imageWidth);
...
cudaFree(d_a);
cudaFree(d_b);
}
#endif
Please note that the __global__ has been removed from .h since I was getting the following error owing to it being compiled by g++.
In file included from my_code_main.cpp:12:0:
my_code_header.h:5:1: error: ‘__global__’ does not name a type
When I compile the .cu file with nvcc it is all fine and generates a my_code_kernel.o. But since I am using C++11 in my .cpp I am trying to compile it with g++ and I am getting the following error.
/tmp/ccR2rXzf.o: In function `main':
my_code_main.cpp:(.text+0x1c4): undefined reference to `call_global()'
collect2: ld returned 1 exit status
I understand that this might not have to do anything with CUDA as such and may just be the wrong use of including the header at both places. Also what is the right way to compile and most importantly link the my_code_kernel.o and my_code_main.o(hopefully)? Sorry if this question is too trivial!
It looks like you are not linking with my_code_kernel.o. You have used -c for your nvcc command (causes it to compile but not link, i.e. generate the .o file), I'm going to guess that you're not using -c with your g++ command, in which case you need to add my_code_kernel.o to the list of inputs as well as the .cpp file.
The separation you are trying to achieve is completely possible, it just looks like your not linking properly. If you still have problems, add the compilation commands to your question.
FYI: You don't need to declare two_d_example() in your header file, it is only used within your .cu file (from call_global()).
I'm trying to use CUDA with cmake (v 2.8) on my Mac (OSX 10.6). So far it works fine, I created a small sample just to try it out (see below). However when I switch on emulation mode, it cannot invoke the CUDA kernel anymore and I get the following error message:
Cuda error: kernel invocation: invalid device function .
I also tried to compile it by invoking nvcc by hand and didn't get the error message, so I think it could be a problem with cmake.
I also noticed that emulation mode is deprecated in CUDA 3.0. Why is this? Nvidia points out in their release notes, that they provide Nexus for VS and cuda-gdb on Linux. But what about OSX? I could not find cuda-gdb in the OSX version I installed here..?!
Below the files
CMakeLists.txt
cmake_minimum_required(VERSION 2.8)
project (test)
find_package(CUDA)
add_definitions(-Wall)
# Use CUDA emulator?
set(CUDA_BUILD_EMULATION ON)
set(CUDA_64_BIT_DEVICE_CODE OFF) # Does not work on a Mac currently
set(CMAKE_C_FLAGS -m32)
set(CMAKE_CXX_FLAGS -m32)
set(CUDA_VERBOSE_BUILD ON)
include_directories("${PROJECT_BINARY_DIR}")
cuda_add_executable(test
test.cu
)
test.cu
#include <cuda.h>
#include <stdlib.h>
#include <stdio.h>
#include "test_kernel.cu"
void checkCUDAError(const char *msg);
int main( int argc, const char** argv )
{
int n = 3;
float* a_h;
a_h = (float *)malloc(sizeof(float)*n);
float* a_d;
cudaMalloc((void**) &a_d, sizeof(float)*n);
hello<<<1,128>>>(a_d, n);
checkCUDAError("kernel invocation");
checkCUDAError("memcpy");
free(a_h);
cudaFree(a_d);
return 0;
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg,
cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
test_kernel.cu
#include <stdio.h>
__global__ void hello(float*a, int i)
{
int j = i+1;
#ifdef _DEVICEEMU
printf("Hello.\n");
#endif
}
See
http://forums.nvidia.com/index.php?showtopic=166570&st=0&p=1043250&#entry1043250