C++ operator overloading causing segmentation fault - c++11

below is the code which is giving segmentation fault for I don't know what reason. In an attempt to overload ^ operator, I am getting segmentation fault.
Here is my code.
#include <iostream>
#include <algorithm>
using namespace std;
class bigint {
public:
char val[1000000];
int msdindex;
bool iszero;
bigint( int i ) {
if( i == 0 )
iszero = true;
else {
iszero = false;
msdindex = -1;
while( i > 0 ) {
msdindex++;
val[ msdindex ] = i % 10;
i /= 10;
}
}
}
bigint( const bigint& bi ) {
msdindex = bi.msdindex;
iszero = bi.iszero;
for( int i = 0; i <= msdindex; i++ )
val[i] = bi.val[i];
}
};
bigint operator^( bigint k, int n ) {
if( n == 1 )
return bigint(k);
bigint half = k^(n/2);
return half;
}
int main()
{
bigint bi = bigint( 999 );
bigint di = bi ^ 4;
return 0;
}
Segmentation fault is in the overloaded function ^ and I am clueless of the reason. gdb says this.
Traceback (most recent call last):
File "/usr/share/gdb/auto-load/usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.19-gdb.py", line 63, in
from libstdcxx.v6.printers import register_libstdcxx_printers
ImportError: No module named 'libstdcxx'
Program received signal SIGSEGV, Segmentation fault.
0x0000000000400749 in operator^(bigint, int) ()
Please help.

You are running out of memory, due to which you program crashes everytime. Reducing the status allocation of char in bigint to a smaller value will work fine.
Or use dynamic memory allocation in case you want huge char array, that will solve your problem.
Hope this helps.
class bigint {
public:
char *val;//[1000000];
int msdindex;
bool iszero;
bigint( int i ) {
if( i == 0 )
iszero = true;
else {
iszero = false;
msdindex = -1;
val = new char[1000000];
while( i > 0 ) {
msdindex++;
val[ msdindex ] = i % 10;
i /= 10;
}
}
}
bigint( const bigint& bi ) {
msdindex = bi.msdindex;
iszero = bi.iszero;
val = new char[1000000];
for( int i = 0; i <= msdindex; i++ )
val[i] = bi.val[i];
}
};
Don't forget to write destructor for this to deallocate this dynamically allocated memory. Cheers.

Related

What causes cases with high ZeroMQ latency and how to avoid them?

I try to use ZeroMQ for fast message passing. Messages need to be delivered in less than 1 [ms]. I did some testing (inproc, single process on Linux, no TCP) and see that usually there is no problem with that. The latency is about 10 - 100 [us], depending on how often the messages are sent (why?). Sometimes however messages are received after 6 [ms] which is unacceptable.
What can be the cause that some messages are delayed?
Maybe the process is preempted?
Or it's because of polling used (zmq_poll())?
Example results from my test :
avg lag = 28 [us]
max lag = 5221 [us]
std dev = 25.85 [us]
big lag = 180 x above 200 [us]
"big lag" means number of cases where latency was over 200 [us]. In my tests there are 500 000 messages sent so the value 180 means that latency over 200 [us] was recorded in 180 / 500000 = 0,036%. It's a quite low number but I'd like it to be zero. Even on the expense of average latency.
The test source code is below :
#include <stdlib.h>
#include <math.h>
#include <zmq.h>
#include <pthread.h>
#define SOCKETS_NUM 5
#define RUNS 100000
void *context;
int numbers[SOCKETS_NUM];
struct {
struct timespec send_time;
struct timespec receive_time;
} times[SOCKETS_NUM * RUNS], *ptimes;
static void * worker_thread(void * dummy) {
int * number = dummy;
char endpoint[] = "inproc://endpointX";
endpoint[17] = (char)('0' + *number);
void * socket = zmq_socket(context, ZMQ_PUSH);
zmq_connect(socket, endpoint);
struct timespec sleeptime, remtime;
int rnd = rand() / 3000;
sleeptime.tv_sec = 0;
sleeptime.tv_nsec = rnd;
nanosleep(&sleeptime, &remtime);
clock_gettime(CLOCK_REALTIME, &(ptimes[*number].send_time));
zmq_send(socket, "Hello", 5, 0);
zmq_close(socket);
return NULL;
}
static void run_test(zmq_pollitem_t items[]) {
pthread_t threads[SOCKETS_NUM];
for (int i = 0; i < SOCKETS_NUM; i++) {
pthread_create(&threads[i], NULL, worker_thread, &numbers[i]);
}
char buffer[10];
int to_receive = SOCKETS_NUM;
for (int i = 0; i < SOCKETS_NUM; i++) {
int rc = zmq_poll(items, SOCKETS_NUM, -1);
for (int j = 0; j < SOCKETS_NUM; j++) {
if (items[j].revents & ZMQ_POLLIN) {
clock_gettime(CLOCK_REALTIME, &(ptimes[j].receive_time));
zmq_recv(items[j].socket, buffer, 10, 0);
}
}
to_receive -= rc;
if (to_receive == 0) break;
}
for (int i = 0; i < SOCKETS_NUM; i++) {
pthread_join(threads[i], NULL);
}
}
int main(void)
{
context = zmq_ctx_new();
zmq_ctx_set(context, ZMQ_THREAD_SCHED_POLICY, SCHED_FIFO);
zmq_ctx_set(context, ZMQ_THREAD_PRIORITY, 99);
void * responders[SOCKETS_NUM];
char endpoint[] = "inproc://endpointX";
for (int i = 0; i < SOCKETS_NUM; i++) {
responders[i] = zmq_socket(context, ZMQ_PULL);
endpoint[17] = (char)('0' + i);
zmq_bind(responders[i], endpoint);
numbers[i] = i;
}
time_t tt;
time_t t = time(&tt);
srand((unsigned int)t);
zmq_pollitem_t poll_items[SOCKETS_NUM];
for (int i = 0; i < SOCKETS_NUM; i++) {
poll_items[i].socket = responders[i];
poll_items[i].events = ZMQ_POLLIN;
}
ptimes = times;
for (int i = 0; i < RUNS; i++) {
run_test(poll_items);
ptimes += SOCKETS_NUM;
}
long int lags[SOCKETS_NUM * RUNS];
long int total_lag = 0;
long int max_lag = 0;
long int big_lag = 0;
for (int i = 0; i < SOCKETS_NUM * RUNS; i++) {
lags[i] = (times[i].receive_time.tv_nsec - times[i].send_time.tv_nsec + (times[i].receive_time.tv_sec - times[i].send_time.tv_sec) * 1000000000) / 1000;
if (lags[i] > max_lag) max_lag = lags[i];
total_lag += lags[i];
if (lags[i] > 200) big_lag++;
}
long int avg_lag = total_lag / SOCKETS_NUM / RUNS;
double SD = 0.0;
for (int i = 0; i < SOCKETS_NUM * RUNS; ++i) {
SD += pow((double)(lags[i] - avg_lag), 2);
}
double std_lag = sqrt(SD / SOCKETS_NUM / RUNS);
printf("avg lag = %l5d [us]\n", avg_lag);
printf("max lag = %l5d [us]\n", max_lag);
printf("std dev = %8.2f [us]\n", std_lag);
printf("big lag = %l5d x above 200 [us]\n", big_lag);
for (int i = 0; i < SOCKETS_NUM; i++) {
zmq_close(responders[i]);
}
zmq_ctx_destroy(context);
return 0;
}
Q : "...I'd like it to be zero."
Cool to say, yet hard to make.
As you run an ultra-fast, memory-mapped inproc:// Transport Class, the main focus will be performance tweaking of the Context()-processing. Here, you spend so awfully much setup-overhead & straight termination-overhead operations to send 1E5-times just a 5 [B], so I guess there will never be a queue-management related issue, as there will never be any "stack-growing" at all.
1 ) ( suppose we let the code as-is ) it would be a natural step for the performance tuning to at least set the ZeroMQ mapping of a socket-CPU_core ZMQ_AFFINITY ( not jumping or wandering from core to core ). It may be interesting to see, if that many ~ 5E5 socket setups/terminations on the PUSH-er side, each without ever sending more than a single shot of 5 [B] over the memory-mapped line, could get some help (for those large overheads & maintenance) from configuring the context-instance with SOCKETS_NUM I/O-threads, using the ZMQ_IO_THREADS setting ( fighting for a "RealTime"-ness, using the SCHED_FIFO, having only one I/O-thread does not help much, does it? )
2 ) next level of experimentation is to re-balance the ZMQ_THREAD_AFFINITY_CPU_ADD maps (the global context's I/O-threads onto CPU-cores) and the per-socket setup of the ZMQ_AFFINITY maps onto the context's I/O-thread(s). Having sufficient amount of CPU-cores, there may be some performance / ultra-low latency benefits from making several gangs-of-I/O-threads serving one socket-instance stay "together", on a same CPU-core, yet here we get into territory, where the actual hardware and the real-system's background workloads & still-"spare"-resources for this "RealTime"-ambition motivated experimenting start to become hard to predict without any in-vivo testing & validation.
3 ) tweaking per-socket zmq_setsockopt() parameters may help, yet unless a nano-scaled socket-lifetime ( rather an expensive one-time used "consumable-disposable" ), do not expect any breakthrough from here.
4 ) trying to measure with a nanosecond resolution, the more if used for "durations" of something, ought be used by CLOCK_MONOTONIC_RAW, that avoids ntp-injected adjustments, astronomy-correcting leap seconds injections et al.
5 ) the zmq_poll()-strategy: I would no go this way. Using the timeout == -1 is blocking the whole circus. A thing I strongly discourage in any distributed-computing system, the more in one, that has a "RealTime" ambition. Spinning the PULL-side to a max performance may go via having a 1:1 PUSH/PULL threads on either side, or if trying to challenge the grooming, have 5-PUSH-er threads, as you have it, and collect all ingress messages on a just single, Zero-Copy well oiled PULL-er ( easier polling, may use a payload-based index-helper to which send-side timestamp to put the receive-side timestamp ), anyway, the blocking poller is almost the anti-pattern for challenging any low-latency soft-realtime toys.
Anyway, do not hesistate to refactor the code and to use profiling tools to better see, where you "acquire" the big_lag-s ( my guesses are above )
#include <stdlib.h>
#include <math.h>
#include <zmq.h>
#include <pthread.h>
#define SOCKETS_NUM 5
#define RUNS 100000
void *context;
int numbers[SOCKETS_NUM];
struct {
struct timespec send_time;
struct timespec recv_time;
} times[SOCKETS_NUM * RUNS],
*ptimes;
static void *worker_thread( void *dummy ) { //-------------------------- an ovehead expensive one-shot PUSH-based "Hello"-sender & .close()
int *number = dummy;
char endpoint[] = "inproc://endpointX";
endpoint[17] = (char)( '0' + *number );
int rnd = rand() / 3000;
void *socket = zmq_socket( context, ZMQ_PUSH );
struct timespec remtime,
sleeptime;
sleeptime.tv_sec = 0;
sleeptime.tv_nsec = rnd;
zmq_connect( socket, endpoint );
nanosleep( &sleeptime, &remtime ); // anything betweed < 0 : RAND_MAX/3000 > [ns] ... easily >> 32, as #define RAND_MAX 2147483647 ~ 715 827 [ns]
clock_gettime( CLOCK_REALTIME, &( ptimes[*number].send_time) ); //............................................................................ CLK_set_NEAR_SEND
// any CLOCK re-adjustments may and will skew any non-MONOTONIC_CLOCK
zmq_send( socket, "Hello", 5, 0 );
zmq_close( socket );
return NULL;
}
static void run_test( zmq_pollitem_t items[] ) { //--------------------- zmq_poll()-blocked zmq_recv()-orchestrator ( called ~ 1E5 x !!! resources' nano-use & setup + termination overheads matter )
char buffer[10];
int to_receive = SOCKETS_NUM;
pthread_t threads[SOCKETS_NUM];
for ( int i = 0; i < SOCKETS_NUM; i++ ) { //------------------------ thread-maker ( a per-socket PUSH-er[]-s )
pthread_create( &threads[i], NULL, worker_thread, &numbers[i] );
}
for ( int i = 0; i < SOCKETS_NUM; i++ ) { //------------------------ [SERIAL]-------- [i]-stepping
int rc = zmq_poll( items, SOCKETS_NUM, -1 ); //----------------- INFINITE ??? --- blocks /\/\/\/\/\/\/\/\/\/\/\ --- several may flag ZMQ_POLLIN
for ( int j = 0; j < SOCKETS_NUM; j++ ) { //-------------------- ALL-CHECKED in a loop for an items[j].revents
if ( items[j].revents & ZMQ_POLLIN ) { //------------------- FIND IF IT WAS THIS ONE
clock_gettime( CLOCK_REALTIME, &( ptimes[j].recv_time ) );//...................................................................... CLK_set_NEAR_poll()_POSACK'd R2recv
zmq_recv( items[j].socket, buffer, 10, 0 ); //---------- READ-IN from any POSACK'd by zmq_poll()-er flag(s)
}
}
to_receive -= rc; // ---------------------------------------------------------------------------------------------- SUB rc
if (to_receive == 0) break;
}
for ( int i = 0; i < SOCKETS_NUM; i++ ) { //------------------------ thread-killer
pthread_join( threads[i], NULL );
}
}
int main( void ) {
context = zmq_ctx_new();
zmq_ctx_set( context, ZMQ_THREAD_SCHED_POLICY, SCHED_FIFO );
zmq_ctx_set( context, ZMQ_THREAD_PRIORITY, 99 );
void *responders[SOCKETS_NUM];
char endpoint[] = "inproc://endpointX";
for ( int i = 0; i < SOCKETS_NUM; i++ ) {
responders[i] = zmq_socket( context, ZMQ_PULL ); // ------------ PULL instances into []
endpoint[17] = (char)( '0' + i );
zmq_bind( responders[i], endpoint ); //------------------------- .bind()
numbers[i] = i;
}
time_t tt;
time_t t = time(&tt);
srand( (unsigned int)t );
zmq_pollitem_t poll_items[SOCKETS_NUM];
for ( int i = 0; i < SOCKETS_NUM; i++ ) { //------------------------ zmq_politem_t array[] ---pre-fill---
poll_items[i].socket = responders[i];
poll_items[i].events = ZMQ_POLLIN;
}
ptimes = times;
for ( int i = 0; i < RUNS; i++ ) { //------------------------------- 1E5 RUNs
run_test( poll_items ); // ------------------------------------- RUN TEST
ptimes += SOCKETS_NUM;
}
long int lags[SOCKETS_NUM * RUNS];
long int total_lag = 0;
long int max_lag = 0;
long int big_lag = 0;
for ( int i = 0; i < SOCKETS_NUM * RUNS; i++ ) {
lags[i] = ( times[i].recv_time.tv_nsec
- times[i].send_time.tv_nsec
+ ( times[i].recv_time.tv_sec
- times[i].send_time.tv_sec
) * 1000000000
) / 1000; // --------------------------------------- [us]
if ( lags[i] > max_lag ) max_lag = lags[i];
total_lag += lags[i];
if ( lags[i] > 200 ) big_lag++;
}
long int avg_lag = total_lag / SOCKETS_NUM / RUNS;
double SD = 0.0;
for ( int i = 0; i < SOCKETS_NUM * RUNS; ++i ) {
SD += pow( (double)( lags[i] - avg_lag ), 2 );
}
double std_lag = sqrt( SD / SOCKETS_NUM / RUNS );
printf("avg lag = %l5d [us]\n", avg_lag);
printf("max lag = %l5d [us]\n", max_lag);
printf("std dev = %8.2f [us]\n", std_lag);
printf("big lag = %l5d x above 200 [us]\n", big_lag);
for ( int i = 0; i < SOCKETS_NUM; i++ ) {
zmq_close( responders[i] );
}
zmq_ctx_destroy( context );
return 0;
}
Using nanosleep for a random (not cardinal, safely outside of any control-loop(s) activity) sleep is rather a risky luxury, as in earlier kernels caused problems:
In order to support applications requiring much more precise pauses (e.g., in order to control some time-critical hardware), nanosleep() would handle pauses of up to 2 ms by busy waiting with microsecond precision when called from a thread scheduled under a real-time policy like SCHED_FIFO or SCHED_RR. This special extension was removed in kernel 2.5.39, hence is still present in current 2.4 kernels, but not in 2.6 kernels.

KNEM cookie and declared region

First question is PROT_WRITE and PROT_READ i wasn't able to find anywhere and it's giving me a hard time compiling. I replaced with 0 and 1 but it doesn't seem to work.
Second, "rejected (unexisting region cookie)"
int rank;
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Win win;
int knem_fd = open("/dev/knem", O_RDWR);
int err;
uint64_t size = 64;
if( rank == 0 ){
char *inbuf = malloc(size);
for( int i = 0; i < size; i++ )
inbuf[i] = rand() % 26 + 97;
print_array( inbuf, size, '0' );
struct knem_cmd_create_region create;
struct knem_cmd_param_iovec knem_iov[1];
knem_iov[0].base = (uint64_t)&inbuf;
knem_iov[0].len = size;
create.iovec_array = (uintptr_t) &knem_iov[0];
create.iovec_nr = 1;
create.flags = KNEM_FLAG_SINGLEUSE;
//create.protection = 1;
err = ioctl( knem_fd, KNEM_CMD_CREATE_REGION, &create );
MPI_Send( &(create.cookie), 1, MPI_UINT64_T, 1, 0, MPI_COMM_WORLD );
MPI_Barrier( MPI_COMM_WORLD );
} else if( rank == 1 ){
char *obuf = malloc(size);
int err;
struct knem_cmd_copy copy;
struct knem_cmd_create_region create;
struct knem_cmd_param_iovec knem_iov[1];
knem_iov[0].base = (uint64_t)&obuf;
knem_iov[0].len = size;
create.iovec_array = (uintptr_t) &knem_iov[0];
create.iovec_nr = 1;
//create.protection = 0;
create.flags = KNEM_FLAG_SINGLEUSE;
err = ioctl( knem_fd, KNEM_CMD_CREATE_REGION, &create );
MPI_Recv( &(copy.src_cookie), 1, MPI_UINT64_T, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
copy.src_offset = 0;
copy.dst_cookie = create.cookie;
copy.dst_offset = 0;
copy.flags = 0;
err = ioctl(knem_fd, KNEM_CMD_COPY, &copy);
print_array( obuf, size, '1' );
MPI_Barrier( MPI_COMM_WORLD );
}
0 and 1 both create a region, 0 sends its cookie to 1 and 1 goes in grab data from 0. I checked the received cookie is the same as the send cookie, but it just failed to find the declared region.
PROT_READ and PROT_WRITE are mmap flags, you need to include sys/mman.h to get them. In the second part of the code, you need to set copy.src_cookie to create.cookie (or just use an inline copy to avoid creating that region at all since it'll be destroyed immediately because of the SINGLEUSE flag). Also, make sure ou check the return values of all ioctl before continuing. Copy cannot work if create.cookie wasn't initialized because the create ioctl failed.

Recursive algorithm to find all possible solutions in a nonogram row

I am trying to write a simple nonogram solver, in a kind of bruteforce way, but I am stuck on a relatively easy task. Let's say I have a row with clues [2,3] that has a length of 10
so the solutions are:
$$-$$$----
$$--$$$---
$$---$$$--
$$----$$$-
$$-----$$$
-$$----$$$
--$$---$$$
---$$--$$$
----$$-$$$
-$$---$$$-
--$$-$$$--
I want to find all the possible solutions for a row
I know that I have to consider each block separately, and each block will have an availible space of n-(sum of remaining blocks length + number of remaining blocks) but I do not know how to progress from here
Well, this question already have a good answer, so think of this one more as an advertisement of python's prowess.
def place(blocks,total):
if not blocks: return ["-"*total]
if blocks[0]>total: return []
starts = total-blocks[0] #starts = 2 means possible starting indexes are [0,1,2]
if len(blocks)==1: #this is special case
return [("-"*i+"$"*blocks[0]+"-"*(starts-i)) for i in range(starts+1)]
ans = []
for i in range(total-blocks[0]): #append current solutions
for sol in place(blocks[1:],starts-i-1): #with all possible other solutiona
ans.append("-"*i+"$"*blocks[0]+"-"+sol)
return ans
To test it:
for i in place([2,3,2],12):
print(i)
Which produces output like:
$$-$$$-$$---
$$-$$$--$$--
$$-$$$---$$-
$$-$$$----$$
$$--$$$-$$--
$$--$$$--$$-
$$--$$$---$$
$$---$$$-$$-
$$---$$$--$$
$$----$$$-$$
-$$-$$$-$$--
-$$-$$$--$$-
-$$-$$$---$$
-$$--$$$-$$-
-$$--$$$--$$
-$$---$$$-$$
--$$-$$$-$$-
--$$-$$$--$$
--$$--$$$-$$
---$$-$$$-$$
This is what i got:
#include <iostream>
#include <vector>
#include <string>
using namespace std;
typedef std::vector<bool> tRow;
void printRow(tRow row){
for (bool i : row){
std::cout << ((i) ? '$' : '-');
}
std::cout << std::endl;
}
int requiredCells(const std::vector<int> nums){
int sum = 0;
for (int i : nums){
sum += (i + 1); // The number + the at-least-one-cell gap at is right
}
return (sum == 0) ? 0 : sum - 1; // The right-most number don't need any gap
}
bool appendRow(tRow init, const std::vector<int> pendingNums, unsigned int rowSize, std::vector<tRow> &comb){
if (pendingNums.size() <= 0){
comb.push_back(init);
return false;
}
int cellsRequired = requiredCells(pendingNums);
if (cellsRequired > rowSize){
return false; // There are no combinations
}
tRow prefix;
int gapSize = 0;
std::vector<int> pNumsAux = pendingNums;
pNumsAux.erase(pNumsAux.begin());
unsigned int space = rowSize;
while ((gapSize + cellsRequired) <= rowSize){
space = rowSize;
space -= gapSize;
prefix.clear();
prefix = init;
for (int i = 0; i < gapSize; ++i){
prefix.push_back(false);
}
for (int i = 0; i < pendingNums[0]; ++i){
prefix.push_back(true);
space--;
}
if (space > 0){
prefix.push_back(false);
space--;
}
appendRow(prefix, pNumsAux, space, comb);
++gapSize;
}
return true;
}
std::vector<tRow> getCombinations(const std::vector<int> row, unsigned int rowSize) {
std::vector<tRow> comb;
tRow init;
appendRow(init, row, rowSize, comb);
return comb;
}
int main(){
std::vector<int> row = { 2, 3 };
auto ret = getCombinations(row, 10);
for (tRow r : ret){
while (r.size() < 10)
r.push_back(false);
printRow(r);
}
return 0;
}
And my output is:
$$-$$$----
$$--$$$---
$$---$$$--
$$----$$$--
$$-----$$$
-$$-$$$----
-$$--$$$--
-$$---$$$-
-$$----$$$-
--$$-$$$--
--$$--$$$-
--$$---$$$
---$$-$$$-
---$$--$$$
----$$-$$$
For sure, this must be absolutely improvable.
Note: i did't test it more than already written case
Hope it works for you

CUDA string search in large file, wrong result

I am working on simple naive string search in CUDA.
I am new in CUDA. It works fine fol smaller files ( aprox. ~1MB ). After I make these files bigger ( ctrl+a ctrl+c several times in notepad++ ), my program's results are higher ( about +1% ) than a
grep -o text file_name | wc -l
It is very simple function, so I don't know what could cause this. I need it to work with larger files ( ~500MB ).
Kernel code ( gpuCount is a __device__ int global variable ):
__global__ void stringSearchGpu(char *data, int dataLength, char *input, int inputLength){
int id = blockDim.x*blockIdx.x + threadIdx.x;
if (id < dataLength)
{
int fMatch = 1;
for (int j = 0; j < inputLength; j++)
{
if (data[id + j] != input[j]) fMatch = 0;
}
if (fMatch)
{
atomicAdd(&gpuCount, 1);
}
}
}
This is calling the kernel in main function:
int blocks = 1, threads = fileSize;
if (fileSize > 1024)
{
blocks = (fileSize / 1024) + 1;
threads = 1024;
}
clock_t cpu_start = clock();
// kernel call
stringSearchGpu<<<blocks, threads>>>(cudaBuffer, strlen(buffer), cudaInput, strlen(input));
cudaDeviceSynchronize();
After this I just copy the result to Host and print it.
Can anyone please help me with this?
First of all, you should always check return values of CUDA functions to check for errors. Best way to do so would be the following:
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
Wrap your CUDA calls, such as:
gpuErrchk(cudaDeviceSynchronize());
Second, your kernel accesses out of bounds memory. Suppose, dataLength=100, inputLength=7 and id=98. In your kernel code:
if (id < dataLength) // 98 is less than 100, so condition true
{
int fMatch = 1;
for (int j = 0; j < inputLength; j++) // j runs from [0 - 6]
{
// if j>1 then id+j>=100, which is out of bounds, illegal operation
if (data[id + j] != input[j]) fMatch = 0;
}
Change the condition to something like:
if (id < dataLength - inputLength)

custom ITOA not working right?

I wanted to make a custom ITOA function to put large numbers into small strings, this is what I have coded :
main(){
printf("itoa(2000000000,36)= '%s'",itoa(2000000000,36));
printf("itoa(36,36)= '%s'",itoa(36,36));
printf("itoa(37,36)= '%s'",itoa(37,36));
return 1;
}
stock itoa(val, base)
{
new buf[1024] = {0,...};
new i = 1023;
new LETTERZ[37] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',0};
for(; val && i; --i, val /= base)
buf[i] = LETTERZ[val % base];
return buf[i+1];
}
It's based on 'C' code from this page: http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
But somehow this is the output:
[20:34:35] itoa(2000000000,36)= 'X'
[20:34:35] itoa(36,36)= '1'
[20:34:35] itoa(37,36)= '1'
And this is totally wrong, I don't know which output to expect but 36 and 37 for sure can't be the same output and 2 000 000 000 can't be just 'X', as X is suposed to be 35, not 2 000 000 000,
ZZ should be 1295 I think... I want to base this on the hexadecimal system, but with all the alfabet letters.
Could anyone tell me what's wrong here?
I'm working with a typeless language called PAWN (also known as SMALL) and later i want to use this code in VB.NET
/* itoa example */
#include <stdio.h>
#include <stdlib.h>
int main ()
{
int i;
char buffer [33];
printf ("Enter a number: ");
scanf ("%d",&i);
itoa (i,buffer,10);
printf ("decimal: %s\n",buffer);
itoa (i,buffer,16);
printf ("hexadecimal: %s\n",buffer);
itoa (i,buffer,2);
printf ("binary: %s\n",buffer);
return 0;
}
You only give the number and the base, but parameter 2 needs a pointer to char already allocated. Use a buffer or try NULL, so the function will return the result.
THe solution seemed to be simple, the return buf[i+1] just returned one character so what I did is make it return an array:
new _s#T[4096];
#define sprintf(%1) (format(_s#T, SPRINTF_MAX_STRING, %1), _s#T)
main(){
new num = atoi("ABCDEFG",36);
printf("%d",num);
printf("%s",itoa(num,36));
return 1;
}
stock itoa(val, base)
{
new buf[1024] = {0,...};
new LETTERZ[37] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',0};
for(new pos = 0; val;++pos,val = floatround(val/base,floatround_floor))
strins(buf,sprintf("%c",LETTERZ[val % base]),0);
return buf;
}
stock atoi(val[], base)
{
new CURRNUM = 0;
new len = strlen(val);
new LETTERZ[37] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',0};
for(new i = 0; i < len; ++i)
{
for(new x = 0; x < base; ++x)
{
new y = (len-i)-1;
if(val[y] == LETTERZ[x])
{
CURRNUM += x*floatround(floatpower(base,i));
}
}
}
return CURRNUM;
}

Resources