Is the cuda kernel limited by memory usage per thread/block - memory-management

I have a kernel code that executes properly
runnable code
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
//const Integer TID = CudaGetTargetID();
const Integer ID =CudaGetTargetID();
/*if(ID >= ComputeParticleNumber)
return ;
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction.x = 0;
NearestDistance.Direction.y = 0;
NearestDistance.Direction.z = 0;//make_Scalar3(0,0,0);
//if(c_daOutputParticleID[ID] < -1)
// c_daSTLDistance[ID] = NearestDistance;
// c_daSTLID[ID] = NearestID;
// return;
//Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&c_daParticlePosition[ID],CIDX, CIDY, CIDZ);
if(CID >=0 && CID < c_CellNum)
//Integer Range = 1;
for(Integer k = -1; k <= 1; ++k)
for(Integer j = -1; j <= 1; ++j)
for(Integer i = -1; i <= 1; ++i)
if(MCID < 0 || MCID >= c_CellNum)
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
for(unsigned int l = 0; l < TriangleNum; ++l)
TriangleID = c_daCell[MCID].m_TriangleID[l];
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &c_daParticlePosition[ID], &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
NearestDistance = Distance;
NearestID = TriangleID;
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
and when I add any basic variables or perform any checking operation, it gives unknown error and while checking wih cuda-memcheck, it suggests memory read error.
here in the changed code, i tried to check the previously calculated part and tried to skip the redundant calculation. for this I tried to perform basic check operation in array but it throws memory error.
error raising code
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
//const Integer TID = CudaGetTargetID();
const Integer ID =CudaGetTargetID();
/*if(ID >= ComputeParticleNumber)
return ;
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction.x = 0;
NearestDistance.Direction.y = 0;
NearestDistance.Direction.z = 0;//make_Scalar3(0,0,0);
//if(c_daOutputParticleID[ID] < -1)
// c_daSTLDistance[ID] = NearestDistance;
// c_daSTLID[ID] = NearestID;
// return;
//Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&c_daParticlePosition[ID],CIDX, CIDY, CIDZ);
int len=0;
int td[100];
for(int m=0;m<100;m++)
if(CID >=0 && CID < c_CellNum)
//Integer Range = 1;
for(Integer k = -1; k <= 1; ++k)
for(Integer j = -1; j <= 1; ++j)
for(Integer i = -1; i <= 1; ++i)
if(MCID < 0 || MCID >= c_CellNum)
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
bool flag = false;
//len=len+TriangleNum ;
for(unsigned int l = 0; l < TriangleNum; ++l)
TriangleID = c_daCell[MCID].m_TriangleID[l];
//tem[l] = c_daCell[MCID].m_TriangleID[l];
for(int m=0;m<100;m++)
if(TriangleID ==td[m])
flag= true;
if(flag == true)
if(flag == true)
td[len] = TriangleID;
len= len+1;
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &c_daParticlePosition[ID], &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
NearestDistance = Distance;
NearestID = TriangleID;
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
this problem arises whenever I tried to add any piece of code,thus I suspects that this block of kernel is not allowing me to add any further code due to memory over use.
is there any memory violation rule per block or thread??
how to find the total memory usuage per kernel ?? is there any way??


Change variable in array coded with processing

I'm drawing a sketch with randomized letters that fill the screen. The code issue is with my
for (int i = 0; i < I1; i++) {
targets = tX + i * ts;
targets2 = tY;
} else {
runTowards = false;
for (int i = 0; i < I1; i++) {
targets = random(ts, width - ts);
targets2 = random(ts, height - ts);`
** I need to update i within the for loop to denote the array letters' locations. The error returns a type mismatch, "float" does not match with int[]
How do I update the i within the for loop?/ (the targets= and targets2=)
Here is the code**
String message = "a,b,c,d,e,f";
int []positions = new int [0];
int[]positions2= new int[1];
int []targets = new int [0] ;
int [] targets2= new int[1];
int I1= message.length();
boolean runTowards = true;
float runSpeed = 100;
float restSpeed = 1;
float currentSpeed = restSpeed;
float ts = 64;
boolean thisHasReached;
float bk;
float elapsedTime;
boolean allHaveReached;
float pauseTime = 1000;
float distX;
float distY;
float changeX;
float changeY;
float tX;
float tY;
float reachedTargetAt = 0;
boolean hasReachedTarget = true;
void setup() {
size (600,600);
if ((I1 > targets.length) && (I1 > positions.length)){
void draw() {
elapsedTime = millis() - reachedTargetAt;
if (elapsedTime > pauseTime) {
void drawChars() {
for (int i = 0; i < I1; i++) {
text(message.charAt(i),positions[i], positions[i]);
void updatePositions() {
allHaveReached = true;
thisHasReached = false;
for (int i = 0; i < I1; i++) {
distX = abs(positions[i] - targets[i]);
distY = abs(positions2[i] - targets2[i]);
changeX = random(currentSpeed);
changeY = random(currentSpeed);
thisHasReached = changeX > distX && changeY > distY;
if (positions[i] > targets[i]) {
changeX = -changeX;
if (positions2[i] > targets2[i]) {
changeY = -changeY;
positions[i] += changeX;
positions2[i] += changeY;
allHaveReached = allHaveReached && thisHasReached;
if ((!hasReachedTarget) && (allHaveReached)) {
hasReachedTarget = true;
reachedTargetAt = millis();
void updateCurrentSpeed() {
if (hasReachedTarget) {
if (currentSpeed >= restSpeed) {
currentSpeed -= (currentSpeed - restSpeed) * 9;
} else {
currentSpeed += 1;
} else {
if (currentSpeed <= runSpeed) {
currentSpeed += (runSpeed - currentSpeed) * 0.25;
} else {
currentSpeed -= 1;
void pickNewTarget() {
if (!runTowards && random(1) > 0.75) {
runTowards = true;
tX = random(ts, width - 3 * ts);
tY = random(ts, height - ts);
for (int i = 0; i < I1; i++) {
targets = tX + i * ts;
targets2 = tY;
} else {
runTowards = false;
for (int i = 0; i < I1; i++) {
targets = random(ts, width - ts);
targets2 = random(ts, height - ts);
hasReachedTarget = false;
Please look at this reference for random(): Note that it states "If two parameters are specified, the function will return a float with a value between the two values." You got the error message because you tried to mix ints and floats. You can correct it by changing the array types from int to float, eg.
float[] targets = new float[80] ;
float[] targets2= new float[80];
You'll also need to make those arrays large enough to handle the size of your data or you'll wind up with another error that the array length has been overrun. Then change the loop to reflect that you are adding data to the arrays ([i]):
void pickNewTarget() {
if (!runTowards && random(1) > 0.75) {
runTowards = true;
tX = random(ts, width - 3 * ts);
tY = random(ts, height - ts);
for (int i = 0; i < I1; i++) {
targets[i] = tX + i * ts;
targets2[i] = tY;
} else {
runTowards = false;
for (int i = 0; i < I1; i++) {
targets[i] = random(ts, width - ts);
targets2[i] = random(ts, height - ts);
hasReachedTarget = false;
Also need to add a fill() for the text characters or you will see a blank screen.

Updating sand simulation in grid doesn't work

I want to make a falling sand simulation using cellular automata, but when I update it, nothing happens, and when I want to do a line of diffrent material using lineDrawing() this material appear in random cells. This is update code:
void update()
for (int i = verticalNumberOfCells - 1; i > 0; i--)
for (int j = 0; j < horizontalNumberOfCells; j++)
for (int y = verticalNumberOfCells - 1; y > 0; y--)
for (int x = 0; x < horizontalNumberOfCells; x++)
if (world[x][y].hasMoved) continue;
if (world[x][y].state == 0 && world[x][y].state == 1) continue;
if (canMove(world[x][y].state, x, y + 1))
move(x, y, x, y + 1);
The auxiliary functions that I use to check if the contents of a cell can change and to change the contents of a cell look like this:
boolean canMove(int state, int positionX, int positionY)
if (positionX < 0 || positionX >= horizontalNumberOfCells || positionY < 0 || positionY >= verticalNumberOfCells) return false;
int otherSubstance = world[positionX][positionY].state;
if (state == 5) return (otherSubstance == 4);
if (otherSubstance == 0) return true;
if (state == 2 && otherSubstance == 3 && random(1f) < 0.5f) return true;
return false;
void move(int fromX, int fromY, int toX, int toY)
Cells otherSubstance = world[toX][toY];
world[toX][toY] = world[fromX][fromY];
world[fromX][fromY] = otherSubstance;
world[fromX][fromY].hasMoved = true;
world[toX][toY].hasMoved = true;
world[fromX][fromY].velocityX = 0;
world[fromX][fromY].velocityY = 0;
if (toX > fromX)
world[toX][toY].velocityX = 1;
} else if (toX < fromX)
world[toX][toY].velocityX = -1;
} else
world[toX][toY].velocityX = 0;
if (toY > fromY)
world[toX][toY].velocityY = 1;
} else if (toY < fromY)
world[toX][toY].velocityY = -1;
} else
world[toX][toY].velocityY = 0;
I was able to fix this problem. The thing was, copying a cell in the move function didn't work. Here is the wrong version of the code:
Cells otherSubstance = world[toX][toY];
world[toX][toY] = world[fromX][fromY];
world[fromX][fromY] = otherSubstance;
and here is right version of the code:
int oldState = world[toX][toY].state;
world[toX][toY].state = world[fromX][fromY].state;
world[fromX][fromY].state = oldState;

How to efficiently compute weird numbers

I am trying to print n weird numbers where n is really big number (eg: 10000).
I found this site to check the algorithm for n 600 if I have some errors:
However, my algorithm is really slow in bigger numbers:
import java.util.ArrayList;
import java.util.List;
public class Test {
public static void main(String[] args) {
int n = 2;
for ( int count = 1 ; count <= 15000 ; n += 2 ) {
if (n % 6 == 0) {
List<Integer> properDivisors = getProperDivisors(n);
int divisorSum = -> i.intValue()).sum();
if ( isDeficient(divisorSum, n) ) {
if ( isWeird(n, properDivisors, divisorSum) ) {
System.out.printf("w(%d) = %d%n", count, n);
private static boolean isWeird(int n, List<Integer> divisors, int divisorSum) {
return isAbundant(divisorSum, n) && ! isSemiPerfect(divisors, n);
private static boolean isDeficient(int divisorSum, int n) {
return divisorSum < n;
private static boolean isAbundant(int divisorSum, int n) {
return divisorSum > n;
private static boolean isSemiPerfect(List<Integer> divisors, int sum) {
int size = divisors.size();
// The value of subset[i][j] will be true if there is a subset of divisors[0..j-1] with sum equal to i
boolean subset[][] = new boolean[sum+1][size+1];
// If sum is 0, then answer is true
for (int i = 0; i <= size; i++) {
subset[0][i] = true;
// If sum is not 0 and set is empty, then answer is false
for (int i = 1; i <= sum; i++) {
subset[i][0] = false;
// Fill the subset table in bottom up manner
for ( int i = 1 ; i <= sum ; i++ ) {
for ( int j = 1 ; j <= size ; j++ ) {
subset[i][j] = subset[i][j-1];
int test = divisors.get(j-1);
if ( i >= test ) {
subset[i][j] = subset[i][j] || subset[i - test][j-1];
return subset[sum][size];
private static final List<Integer> getProperDivisors(int number) {
List<Integer> divisors = new ArrayList<Integer>();
long sqrt = (long) Math.sqrt(number);
for ( int i = 1 ; i <= sqrt ; i++ ) {
if ( number % i == 0 ) {
int div = number / i;
if ( div != i && div != number ) {
return divisors;
I have three easy breakouts:
If a number is divisable by 6 it is semiperfect which means it cannot be weird
If a number is deficient this means it cannot be weird
The above points are based on
If a a number is odd it cannot be weird at least for 10^21 numbers (which is good for the numbers I am trying to obtain).
The other optimization that I used is the optimization for finding all the dividers of a number. Instead of looping to n, we loop to SQRT(n).
However, I still need to optimize:
1. isSemiPerfect because it is really slow
2. If I can optimize further getProperDivisors it will be good too.
Any suggestions are welcome, since I cannot find any more optimizations to find 10000 weird numbers in reasonable time.
PS: Any code in Java, C#, PHP and JavaScript are OK for me.
EDIT: I found this topic and modified isSemiPerfect to look like this. However, it looks like it does not optimize but slow down the calculations:
private static boolean isSemiPerfect(List<Integer> divisors, int n) {
BigInteger combinations = BigInteger.valueOf(2).pow(divisors.size());
for (BigInteger i = BigInteger.ZERO; i.compareTo(combinations) < 0; i = i.add(BigInteger.ONE)) {
int sum = 0;
for (int j = 0; j < i.bitLength(); j++) {
sum += i.testBit(j) ? divisors.get(j) : 0;
if (sum == n) {
return true;
return false;
The issue is indeed in function isSemiPerfect. I transposed your code in C++, it was still quite slow.
Then I modified this function by using backtracking. I now obtain the first 15000 weird values in about 15s. My interpretation is that in about all the cases, the value is semiperfect, and the backtracking function converges rapidly.
Note also that in my backtracking implementation, I sort the divisors, which allow to reduce the number of cases to be examined.
Edit 1: an error was corrected in getProperDivisors. Final results did not seem to be modified !
#include <iostream>
#include <vector>
#include <cmath>
#include <numeric>
#include <algorithm>
// return true if sum is obtained
bool test_sum (std::vector<int>& arr, int amount) {
int n = arr.size();
std::sort(arr.begin(), arr.end(), std::greater<int>());
std::vector<int> bound (n);
std::vector<int> select (n);
bound[n-1] = arr[n-1];
for (int i = n-2; i >= 0; --i) {
bound[i] = bound[i+1] + arr[i];
int sum = 0; // current sum
int i = 0; // index of the coin being examined
bool up_down = true;
while (true) {
if (up_down) {
if (i == n || sum + bound[i] < amount) {
up_down = false;
sum += arr[i];
select[i] = 1;
if (sum == amount) return true;
if (sum < amount) {
up_down = false;
if (select[i] == 0) i--;
} else { // DOWN
if (i < 0) break;
if (select[i] == 0) {
} else {
sum -= arr[i];
select[i] = 0;
up_down = true;
return false;
bool isDeficient(int divisorSum, int n) {
return divisorSum < n;
bool isAbundant(int divisorSum, int n) {
return divisorSum > n;
bool isSemiPerfect(std::vector<int> &divisors, int sum) {
int size = divisors.size();
// The value of subset[i][j] will be true if there is a subset of divisors[0..j-1] with sum equal to i
//bool subset[sum+1][size+1];
std::vector<std::vector<bool>> subset(sum+1, std::vector<bool> (size+1));
// If sum is 0, then answer is true
for (int i = 0; i <= size; i++) {
subset[0][i] = true;
// If sum is not 0 and set is empty, then answer is false
for (int i = 1; i <= sum; i++) {
subset[i][0] = false;
// Fill the subset table in bottom up manner
for ( int i = 1 ; i <= sum ; i++ ) {
for ( int j = 1 ; j <= size ; j++ ) {
subset[i][j] = subset[i][j-1];
int test = divisors[j-1];
if ( i >= test ) {
subset[i][j] = subset[i][j] || subset[i - test][j-1];
return subset[sum][size];
bool isWeird(int n, std::vector<int> &divisors, int divisorSum) {
//return isAbundant(divisorSum, n) && !isSemiPerfect(divisors, n);
return isAbundant(divisorSum, n) && !test_sum(divisors, n);
std::vector<int> getProperDivisors_old(int number) {
std::vector<int> divisors;
long sqrtn = sqrt(number);
for ( int i = 1 ; i <= sqrtn ; i++ ) {
if ( number % i == 0 ) {
int div = number / i;
if (div != i && div != number) {
return divisors;
std::vector<int> getProperDivisors(int number) {
std::vector<int> divisors;
long sqrtn = sqrt(number);
for ( int i = 2 ; i <= sqrtn ; i++ ) {
if (number % i == 0) {
int div = number/i;
if (div != i) divisors.push_back(div);
return divisors;
int main() {
int n = 2, count;
std::vector<int> weird;
int Nweird = 15000;
for (count = 0; count < Nweird; n += 2) {
if (n % 6 == 0) continue;
auto properDivisors = getProperDivisors(n);
int divisorSum = std::accumulate (properDivisors.begin(), properDivisors.end(), 0);
if (isDeficient(divisorSum, n) ) {
if (isWeird(n, properDivisors, divisorSum)) {
//std::cout << count << " " << n << "\n";
weird.push_back (n);
for (int i = Nweird - 10; i < Nweird; ++i) {
std::cout << << " ";
std::cout << "\n";
EDIT 2 The generation of Divisors were completely redefined. It uses now prime decomposition. Much more complex, but global time divided by 7.5. Generation of weird numbers take now 2s on my PC.
#include <iostream>
#include <vector>
#include <cmath>
#include <numeric>
#include <algorithm>
template <typename T>
struct factor {T val = 0; T mult = 0;};
template <typename T>
class decompo {
std::vector<T> memory = {2, 3, 5, 7, 11, 13, 17, 19, 23, 31, 37, 39, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};
T index = 0;
decompo () {};
void reset () {index = 0;};
T pop () {index = memory.size() - 1; return memory[index];};
T get_next ();
std::vector<T> find_all_primes (T n);
std::vector<factor<T>> decomp (T n);
std::vector<T> GetDivisors (T n);
void complete (T n);
template <typename T>
T decompo<T>::get_next () {
if (index <= memory.size()) {
return memory[index-1];
T n = memory.size();
T candidate = memory[n-1] + 2;
while (1) {
bool found = true;
for (T i = 1; memory[i] * memory[i] <= candidate; ++i) {
if (candidate % memory[i] == 0) {
found = false;
if (found) {
memory.push_back (candidate);
return candidate;
candidate += 2;
template <typename T>
std::vector<T> decompo<T>::find_all_primes (T n) {
std::vector<T> result;
while (1) {
T candidate = get_next();
if (candidate <= n) {
result.push_back (candidate);
} else {
return result;
template <typename T>
void decompo<T>::complete (T n) {
T last = pop();
while (last < n) {
last = get_next();
template <typename T>
std::vector<factor<T>> decompo<T>::decomp (T n) {
std::vector<factor<T>> result;
if (n < 2) return result;
T candidate = get_next();
T last_prime = 0;
while (candidate*candidate <= n) {
if (n % candidate == 0) {
if (candidate == last_prime) {
result[result.size()-1].mult ++;
} else {
result.push_back ({candidate, 1});
last_prime = candidate;
n /= candidate;
} else {
candidate = get_next();
if (n > 1) {
if (n != last_prime) result.push_back ({n, 1});
else result[result.size()-1].mult ++;
return result;
template <typename T>
std::vector<T> decompo<T>::GetDivisors (T n) {
std::vector<T> div;
auto primes = decomp (n);
int n_primes = primes.size();
std::vector<int> exponent (n_primes, 0);
int current_index = 0;
int product = 1;
std::vector<int> product_partial(n_primes, 1);;
while (true) {
current_index = 0;
while (current_index < n_primes && exponent[current_index] == primes[current_index].mult) current_index++;
if (current_index == n_primes) break;
for (int index = 0; index < current_index; ++index) {
exponent[index] = 0;
product /= product_partial[index];
product_partial[index] = 1;
product *= primes[current_index].val;
product_partial[current_index] *= primes[current_index].val;
if (product != n && product != 1) div.push_back (product);
return div;
// return true if sum is obtained
bool test_sum (std::vector<int>& arr, int amount) {
int n = arr.size();
std::sort(arr.begin(), arr.end(), std::greater<int>());
std::vector<int> bound (n);
std::vector<int> select (n);
bound[n-1] = arr[n-1];
for (int i = n-2; i >= 0; --i) {
bound[i] = bound[i+1] + arr[i];
int sum = 0; // current sum
int i = 0; // index of the coin being examined
bool up_down = true;
while (true) {
if (up_down) {
if (i == n || sum + bound[i] < amount) {
up_down = false;
sum += arr[i];
select[i] = 1;
if (sum == amount) return true;
if (sum < amount) {
up_down = false;
if (select[i] == 0) i--;
} else { // DOWN
if (i < 0) break;
if (select[i] == 0) {
} else {
sum -= arr[i];
select[i] = 0;
up_down = true;
return false;
bool isDeficient(int divisorSum, int n) {
return divisorSum < n;
bool isAbundant(int divisorSum, int n) {
return divisorSum > n;
bool isSemiPerfect(std::vector<int> &divisors, int sum) {
int size = divisors.size();
// The value of subset[i][j] will be true if there is a subset of divisors[0..j-1] with sum equal to i
//bool subset[sum+1][size+1];
std::vector<std::vector<bool>> subset(sum+1, std::vector<bool> (size+1));
// If sum is 0, then answer is true
for (int i = 0; i <= size; i++) {
subset[0][i] = true;
// If sum is not 0 and set is empty, then answer is false
for (int i = 1; i <= sum; i++) {
subset[i][0] = false;
// Fill the subset table in bottom up manner
for ( int i = 1 ; i <= sum ; i++ ) {
for ( int j = 1 ; j <= size ; j++ ) {
subset[i][j] = subset[i][j-1];
int test = divisors[j-1];
if ( i >= test ) {
subset[i][j] = subset[i][j] || subset[i - test][j-1];
return subset[sum][size];
bool isWeird(int n, std::vector<int> &divisors, int divisorSum) {
//return isAbundant(divisorSum, n) && !isSemiPerfect(divisors, n);
return isAbundant(divisorSum, n) && !test_sum(divisors, n);
std::vector<int> getProperDivisors(int number) {
std::vector<int> divisors;
long sqrtn = sqrt(number);
for ( int i = 2 ; i <= sqrtn ; i++ ) {
if (number % i == 0) {
int div = number/i;
if (div != i) divisors.push_back(div);
return divisors;
int main() {
decompo <int> decomposition;
decomposition.complete (1e3); // not relly useful
int n = 2, count;
std::vector<int> weird;
int Nweird = 15000;
for (count = 0; count < Nweird; n += 2) {
if (n % 6 == 0) continue;
//auto properDivisors = getProperDivisors(n);
auto properDivisors = decomposition.GetDivisors(n);
int divisorSum = std::accumulate (properDivisors.begin(), properDivisors.end(), 0);
if (isDeficient(divisorSum, n) ) {
if (isWeird(n, properDivisors, divisorSum)) {
//std::cout << count << " " << n << "\n";
weird.push_back (n);
for (int i = Nweird - 10; i < Nweird; ++i) {
std::cout << << " ";
std::cout << "\n";

Given a position in matrix [i, j], find the block it belongs to

Well, I am dealing with sudoku solving algorithm and generation but stuck at rather simple task. I have made the check, whether a number is really fit in the position row-wise and column-wise. But what it is driving me mad is block check, ie, whether the number is really fit in the 3x3 block.
It must be simple enough but I can't really arrive at the solution. In short, I want to know the 3x3 block to which a position in matrix belongs. Here are some of the assert cases. The block no, row no and col no indexing starts from 0.
assert("x( 0, 8 ) === 2");
assert("x( 8, 8 ) === 8");
assert("x( 3, 3 ) === 4");
assert("x( 3, 7 ) === 5");
assert("x( 7, 1 ) === 6");
x( i , j ) returns the block number where i = row and j = col.
Isn't it just:
block = 3 * (i / 3) + (j / 3)
(assumes integer operations).
I would code a check, something like this (in pseudo C++)
// row = row to check
// col = column to check
// checkNum = number we are thinking of inserting
bool check(int row, int col, int checkNum)
int blockRow = 3 * (row/3);
int blockCol = 3 * (col/3);
for(int i = 0 ; i < 9 ; i++)
if(grid[row][i] == checkNum) return false; // number exists in the row.
if(grid[i][col] == checkNum) return false; // number exists in the col.
if(grid[blockRow + i/3][blockCol + i%3] == checkNum) return false; // number exists in the block.
return true;
Here is a sudoku solver in javascript. Taken from DSSudokuSolver, that I created.
The CleanElements function does something similar to what you are asking for.
CleanElements = function(comp_ary, Qsudoku){
for(i=0; i<9; i++){
for(j=0; j<9; j++){
/*if(Qsudoku[i][j] != ""){
for(k=0; k<9; k++){
i_index = comp_ary[i][k].indexOf(Qsudoku[i][j]);
if(i_index != -1){
comp_ary[i][k].splice(i_index, 1);
j_index = comp_ary[k][j].indexOf(Qsudoku[i][j]);
if(j_index != -1){
comp_ary[k][j].splice(j_index, 1);
if(i < 3){
i_min = 0;
i_max = 2;
else if(i < 6){
i_min = 3;
i_max = 5;
i_min = 6;
i_max = 8;
if(j < 3){
j_min = 0;
j_max = 2;
else if(j < 6){
j_min = 3;
j_max = 5;
j_min = 6;
j_max = 8;
for(i_box=i_min; i_box<=i_max; i_box++){
for(j_box=j_min; j_box<=j_max; j_box++){
index = comp_ary[i_box][j_box].indexOf(Qsudoku[i][j]);
if(index != -1){
comp_ary[i_box][j_box].splice(index, 1);
return comp_ary;
FindElements = function(comp_ary, Qsudoku){
for(i=0; i<9; i++){
for(j=0; j<9; j++){
if(comp_ary[i][j].length == 1){
if (Qsudoku[i][j] == ""){
Qsudoku[i][j] = comp_ary[i][j][0];
comp_ary[i][j] = [];
return Qsudoku;
IsThereNullElement = function(Qsudoku){
for(i=0; i<9; i++){
for(j=0; j<9; j++){
if(Qsudoku[i][j] == ""){
return false;
return true;
InitEmptyArray = function(){
empty_ary = Array();
for(i=0; i<9; i++){
empty_ary[i] = Array();
for(j=0; j<9; j++){
empty_ary[i][j] = Array();
for(k=0; k<9; k++){
empty_ary[i][j][k] = (k+1).toString();
return empty_ary;
DSSolve = function(Qsudoku){
comp_ary = InitEmptyArray(); //Complementary Array
window.comp_ary_old = comp_ary;
IterationMax = 5000;
IterationMax -= 1;
comp_ary = CleanElements(comp_ary, Qsudoku);
if(window.comp_ary_old == comp_ary){
//implement this.
window.comp_ary_old = comp_ary;
Qsudoku = FindElements(comp_ary, Qsudoku);
return Qsudoku;
if(IterationMax == 0){
return null;

Uva Judge 10149, Yahtzee

UPDATE: I have found the problem that my DP solution didn't handle bonus correctly. I added one more dimension to the state array to represent the sum of the first 6 categories. However, the solution got timed out. It's not badly timeout since each test case can be solved less than 1 sec on my machine.
The problem description is here:
I searched online and found that it should be solved by DP and bitmask. I implemented the code and passed all test cases I tested, but the Uva Judge returns wrong answer.
My idea is to have state[i][j] to be matching round i to category bitmasked by j. Please point out my mistakes or link some code that can solve this problem correctly. Here is my code:
public class P10149 {
public static void main(String[] args) throws IOException {
Scanner in = new Scanner(new FileInputStream("input.txt"));
// Scanner in = new Scanner(;
while (in.hasNextLine()) {
int[][] round = new int[13][5];
for (int i = 0; i < 13; i++) {
for (int j = 0; j < 5; j++) {
round[i][j] = in.nextInt();
int[][] point = new int[13][13];
for (int i = 0; i < 13; i++) {
for (int j = 0; j < 13; j++) {
point[i][j] = getPoint(round[i], j);
int[][] state = new int[14][1 << 13];
for (int i = 1; i <= 13; i++) {
Arrays.fill(state[i], -1);
int[][] bonusSum = new int[14][1 << 13];
int[][] choice = new int[14][1 << 13];
for (int i = 1; i <= 13; i++) {
for (int j = 0; j < (1 << 13); j++) {
int usedSlot = 0;
for (int b = 0; b < 13; b++) {
if (((1 << b) & j) != 0) {
if (usedSlot != i) {
for (int b = 0; b < 13; b++) {
if (((1 << b) & j) != 0) {
int j2 = (~(1 << b) & j);
int bonus;
if (b < 6) {
bonus = bonusSum[i - 1][j2] + point[i - 1][b];
} else {
bonus = bonusSum[i - 1][j2];
int newPoint;
if (bonus >= 63 && bonusSum[i - 1][j2] < 63) {
newPoint = 35 + state[i - 1][j2] + point[i - 1][b];
} else {
newPoint = state[i - 1][j2] + point[i - 1][b];
if (newPoint > state[i][j]) {
choice[i][j] = b;
state[i][j] = newPoint;
bonusSum[i][j] = bonus;
int index = (1 << 13) - 1;
int maxPoint = state[13][index];
boolean bonus = (bonusSum[13][index] >= 63);
int[] mapping = new int[13];
for (int i = 13; i >= 1; i--) {
mapping[choice[i][index]] = i;
index = (~(1 << choice[i][index]) & index);
for (int i = 0; i < 13; i++) {
System.out.print(point[mapping[i] - 1][i] + " ");
if (bonus) {
System.out.print("35 ");
} else {
System.out.print("0 ");
static int getPoint(int[] round, int category) {
if (category < 6) {
int sum = 0;
for (int i = 0; i < round.length; i++) {
if (round[i] == category + 1) {
sum += category + 1;
return sum;
int sum = 0;
int[] count = new int[7];
for (int i = 0; i < round.length; i++) {
sum += round[i];
if (category == 6) {
return sum;
} else if (category == 7) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 3) {
return sum;
} else if (category == 8) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 4) {
return sum;
} else if (category == 9) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 5) {
return 50;
} else if (category == 10) {
for (int i = 1; i <= 3; i++) {
if (isStraight(count, i, 4)) {
return 25;
} else if (category == 11) {
for (int i = 1; i <= 2; i++) {
if (isStraight(count, i, 5)) {
return 35;
} else if (category == 12) {
for (int i = 1; i <= 6; i++) {
for (int j = 1; j <= 6; j++) {
if (i != j && count[i] == 3 && count[j] == 2) {
return 40;
return 0;
static boolean isStraight(int[] count, int start, int num) {
for (int i = start; i < start + num; i++) {
if (count[i] == 0) {
return false;
return true;
Here is the working solution.
import java.util.Arrays;
import java.util.Scanner;
public class P10149 {
static final int MAX_BONUS_SUM = 115;
public static void main(String[] args) throws IOException {
Scanner in = new Scanner(new FileInputStream("input.txt"));
// Scanner in = new Scanner(;
long t1 = System.currentTimeMillis();
while (in.hasNextLine()) {
int[][] round = new int[13][5];
for (int i = 0; i < 13; i++) {
for (int j = 0; j < 5; j++) {
round[i][j] = in.nextInt();
int[][] point = new int[13][13];
for (int i = 0; i < 13; i++) {
for (int j = 0; j < 13; j++) {
point[i][j] = getPoint(round[i], j);
int[][] state = new int[1 << 13][MAX_BONUS_SUM + 1];
int[][] newState = new int[1 << 13][MAX_BONUS_SUM + 1];
for (int j = 0; j < (1 << 13); j++) {
Arrays.fill(state[j], -1);
Arrays.fill(newState[j], -1);
state[0][0] = 0;
int[][][] choice = new int[13][1 << 13][MAX_BONUS_SUM + 1];
for (int i = 0; i < 13; i++) {
for (int j = 0; j < (1 << 13); j++) {
int usedSlot = 0;
for (int b = 0; b < 13; b++) {
if (((1 << b) & j) != 0) {
if (usedSlot != i + 1) {
for (int b = 0; b < 13; b++) {
if (((1 << b) & j) != 0) {
int j2 = (~(1 << b) & j);
for (int s = 0; s <= MAX_BONUS_SUM; s++) {
int oldSum;
if (b < 6) {
if (s < point[i][b]) {
s = point[i][b] - 1;
oldSum = s - point[i][b];
} else {
oldSum = s;
if (state[j2][oldSum] < 0) {
int newPoint;
if (s >= 63 && oldSum < 63) {
newPoint = 35 + state[j2][oldSum] + point[i][b];
} else {
newPoint = state[j2][oldSum] + point[i][b];
if (newPoint > newState[j][s]) {
choice[i][j][s] = b;
newState[j][s] = newPoint;
for (int j = 0; j < (1 << 13); j++) {
for (int s = 0; s <= MAX_BONUS_SUM; s++) {
state[j][s] = newState[j][s];
Arrays.fill(newState[j], -1);
int index = (1 << 13) - 1;
int maxPoint = -1;
int sum = 0;
for (int s = 0; s <= MAX_BONUS_SUM; s++) {
if (state[index][s] > maxPoint) {
maxPoint = state[index][s];
sum = s;
boolean bonus = (sum >= 63);
int[] mapping = new int[13];
for (int i = 12; i >= 0; i--) {
mapping[choice[i][index][sum]] = i;
int p = 0;
if (choice[i][index][sum] < 6) {
p = point[i][choice[i][index][sum]];
index = (~(1 << choice[i][index][sum]) & index);
sum -= p;
for (int i = 0; i < 13; i++) {
System.out.print(point[mapping[i]][i] + " ");
if (bonus) {
System.out.print("35 ");
} else {
System.out.print("0 ");
long t2 = System.currentTimeMillis();
// System.out.println(t2 - t1);
static int getPoint(int[] round, int category) {
if (category < 6) {
int sum = 0;
for (int i = 0; i < round.length; i++) {
if (round[i] == category + 1) {
sum += category + 1;
return sum;
int sum = 0;
int[] count = new int[7];
for (int i = 0; i < round.length; i++) {
sum += round[i];
if (category == 6) {
return sum;
} else if (category == 7) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 3) {
return sum;
} else if (category == 8) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 4) {
return sum;
} else if (category == 9) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 5) {
return 50;
} else if (category == 10) {
for (int i = 1; i <= 3; i++) {
if (isStraight(count, i, 4)) {
return 25;
} else if (category == 11) {
for (int i = 1; i <= 2; i++) {
if (isStraight(count, i, 5)) {
return 35;
} else if (category == 12) {
for (int i = 1; i <= 6; i++) {
if (count[i] >= 5) {
return 40;
for (int i = 1; i <= 6; i++) {
for (int j = 1; j <= 6; j++) {
if (i != j && count[i] == 3 && count[j] == 2) {
return 40;
return 0;
static boolean isStraight(int[] count, int start, int num) {
for (int i = start; i < start + num; i++) {
if (count[i] == 0) {
return false;
return true;
Use Munker's algorithm to solve this problem
