Invalid accelerator data region: branching into or out of region is not allowed? - openacc

pgcc is showing "Invalid accelerator data region: branching into or out of region is not allowed" for the lines that I put my acc pragmas, but I don't understand why.
I'm using copy, copyin, and create for all arrays that the loops would utilize.
What am I missing? Thanks!
#pragma acc data copy(graph->pagerank), copyin(graph->indegree, graph->outdegree), create(pagerankNew)
while (1) {
#pragma acc kernels
{
for (i = 0; i < n; ++i) {
double sum = 0;
for (k = 0; k < graph->indegree[i]; ++k) {
//int j = graph->inlinks[i][k];
int j = 0;
sum += (1.0 / graph->outdegree[j]) * graph->pagerank[j];
}
pagerankNew[i] = firstterm + damping * sum;
double diff = fabs(graph->pagerank[i] - pagerankNew[i]);
// if(iterations > 50) {
if (diff != 0.000000 && diff < epsilon) {
return iterations;
}
}
for (k = 0; k < n; ++k) {
graph->pagerank[k] = pagerankNew[k];
}
}
++iterations;
}

Related

Simplex solver - issues with getting it working

I'm trying to write a simple simplex solver for linear optimization problems, but I'm having trouble getting it working. Every time I run it I get a vector subscript out of range (which is quite easy to find), but I think that its probably a core issue somewhere else in my impl.
Here is my simplex solver impl:
bool pivot(vector<vector<double>>& tableau, int row, int col) {
int n = tableau.size();
int m = tableau[0].size();
double pivot_element = tableau[row][col];
if (pivot_element == 0) return false;
for (int j = 0; j < m; j++) {
tableau[row][j] /= pivot_element;
}
for (int i = 0; i < n; i++) {
if (i != row) {
double ratio = tableau[i][col];
for (int j = 0; j < m; j++) {
tableau[i][j] -= ratio * tableau[row][j];
}
}
}
return true;
}
int simplex(vector<vector<double>>& tableau, vector<double>& basic, vector<double>& non_basic) {
int n = tableau.size() - 1;
int m = tableau[0].size() - 1;
while (true) {
int col = -1;
for (int j = 0; j < m; j++) {
if (tableau[n][j] > 0) {
col = j;
break;
}
}
if (col == -1) break;
int row = -1;
double min_ratio = numeric_limits<double>::infinity();
for (int i = 0; i < n; i++) {
if (tableau[i][col] > 0) {
double ratio = tableau[i][m] / tableau[i][col];
if (ratio < min_ratio) {
row = i;
min_ratio = ratio;
}
}
}
if (row == -1) return -1;
if (!pivot(tableau, row, col)) return -1;
double temp = basic[row];
basic[row] = non_basic[col];
non_basic[col] = temp;
}
return 1;
}

Is declaring a new intger inside a loop changes the space complexity?

Is declaring a new intger inside a loop changes the space complexity of the metohd?
for exampe if i'm looking at the follwoing 2 methods, is both of the methods space complexity is O(1)? or in the first method becuase I'm declaring the variable c over and over until the loop end it's space complexity is O(n)?
public static int what (int []a) {
int temp = 0;
for (int i = 0; i < a.length; i++) {
for (int j = i; j < a.length; j++) {
**int c = f(a, i, j);**
if (c % 2 == 0) {
if (j - i + 1 > temp)
temp = j - i + 1;
}
}
}
return temp;
}
public static int what (int []a) {
int temp = 0;
**int c;**
for (int i = 0; i < a.length; i++) {
for (int j = i; j < a.length; j++) {
**c = f(a, i, j);**
if (c % 2 == 0) {
if (j - i + 1 > temp)
temp = j - i + 1;
}
}
}
return temp;
}
Not sure if it's relevant to the question but also attahced the f method.
private static int f (int[]a, int low, int high)
{
int res = 0;
for (int i=low; i<=high; i++)
res += a[i];
return res;
}
When you declare a variable inside the for loop it goes out of scope when the iteration ends and gets re declared in the next iteration so you are not declaring n variables, you are declaring a variable n times

Finding number of pairs, product of whose indices is divisible by another number X

Given an array and some value X, find the number of pairs such that i < j , a[i] = a[j] and (i * j) % X == 0
Array size <= 10^5
I am thinking of this problem for a while but only could come up with the brute force solution(by checking all pairs) which will obviously time-out [O(N^2) time complexity]
Any better approach?
First of all, store separate search structures for each distinct A[i] as we iterate.
i * j = k * X
i = k * X / j
Let X / j be some fraction. Since i is an integer, k would be of the form m * least_common_multiple(X, j) / X, where m is natural.
Example 1: j = 20, X = 60:
lcm(60, 20) = 60
matching `i`s would be of the form:
(m * 60 / 60) * 60 / 20
=> m * q, where q = 3
Example 2: j = 6, X = 2:
lcm(2, 6) = 6
matching `i`s would be of the form:
(m * 6 / 2) * 2 / 6
=> m * q, where q = 1
Next, I would consider how to efficiently query the number of multiples of a number in a sorted list of arbitrary naturals. One way is to hash the frequency of divisors of each i we add to the search structure of A[i]. But first consider i as j and add to the result the count of divisors q that already exist in the hash map.
JavaScript code with brute force testing at the end:
function gcd(a, b){
return b ? gcd(b, a % b) : a;
}
function getQ(X, j){
return X / gcd(X, j);
}
function addDivisors(n, map){
let m = 1;
while (m*m <= n){
if (n % m == 0){
map[m] = -~map[m];
const l = n / m;
if (l != m)
map[l] = -~map[l];
}
m += 1;
}
}
function f(A, X){
const Ais = {};
let result = 0;
for (let j=1; j<A.length; j++){
if (A[j] == A[0])
result += 1;
// Search
if (Ais.hasOwnProperty(A[j])){
const q = getQ(X, j);
result += Ais[A[j]][q] || 0;
// Initialise this value's
// search structure
} else {
Ais[A[j]] = {};
}
// Add divisors for j
addDivisors(j, Ais[A[j]]);
}
return result;
}
function bruteForce(A, X){
let result = 0;
for (let j=1; j<A.length; j++){
for (let i=0; i<j; i++){
if (A[i] == A[j] && (i*j % X) == 0)
result += 1;
}
}
return result;
}
var numTests = 1000;
var n = 100;
var m = 50;
var x = 100;
for (let i=0; i<numTests; i++){
const A = [];
for (let j=0; j<n; j++)
A.push(Math.ceil(Math.random() * m));
const X = Math.ceil(Math.random() * x);
const _brute = bruteForce(A, X);
const _f = f(A, X);
if (_brute != _f){
console.log("Mismatch!");
console.log(X, JSON.stringify(A));
console.log(_brute, _f);
break;
}
}
console.log("Done testing.")
Just in case If someone needed the java version of this answer - https://stackoverflow.com/a/69690416/19325755 explanation has been provided in that answer.
I spent lot of time in understanding the javascript code so I thought the people who are comfortable with java can refer this for better understanding.
import java.util.HashMap;
public class ThisProblem {
public static void main(String[] args) {
int t = 1000;
int n = 100;
int m = 50;
int x = 100;
for(int i = 0; i<t; i++) {
int[] A = new int[n];
for(int j = 0; j<n; j++) {
A[j] = ((int)Math.random()*m)+1;
}
int X = ((int)Math.random()*x)+1;
int optR = createMaps(A, X);
int brute = bruteForce(A, X);
if(optR != brute) {
System.out.println("Wrong Answer");
break;
}
}
System.out.println("Test Completed");
}
public static int bruteForce(int[] A, int X) {
int result = 0;
int n = A.length;
for(int i = 1; i<n; i++) {
for(int j = 0; j<i; j++) {
if(A[i] == A[j] && (i*j)%X == 0)
result++;
}
}
return result;
}
public static int gcd(int a, int b) {
return b==0 ? a : gcd(b, a%b);
}
public static int getQ(int X, int j) {
return X/gcd(X, j);
}
public static void addDivisors(int n, HashMap<Integer, Integer> map) {
int m = 1;
while(m*m <= n) {
if(n%m == 0) {
map.put(m, map.getOrDefault(m, 0)+1);
int l = n/m;
if(l != m) {
map.put(l, map.getOrDefault(l, 0)+1);
}
}
m++;
}
}
public static int createMaps(int[] A, int X) {
int result = 0;
HashMap<Integer, HashMap<Integer, Integer>> contentsOfA = new HashMap<>();
int n = A.length;
for(int i = 1; i<n; i++) {
if(A[i] == A[0])
result++;
if(contentsOfA.containsKey(A[i])) {
int q = getQ(X, i);
result += contentsOfA.get(A[i]).getOrDefault(q, 0);
} else {
contentsOfA.put(A[i], new HashMap<>());
}
addDivisors(i, contentsOfA.get(A[i]));
}
return result;
}
}

What randomness algorithm does Array#sample use?

I'm trying to figure out which random algorithm Array#sample uses, but I'm getting a bit lost hunting around in the Ruby C code.
I find it helpful to dig in pry to examine actual source code although you can find it also by going to Ruby Docs and hover the method names to reveal the click to toggle source with magnifying glass which will also show you the same source code you can find here in pry, if you've done gem install pry-doc
arr = []
cd arr
show-method sample
(#<Array>):1> show-method sample
From: array.c (C Method):
Owner: Array
Visibility: public
Number of lines: 103
static VALUE
rb_ary_sample(int argc, VALUE *argv, VALUE ary)
{
VALUE nv, result;
VALUE opts, randgen = rb_cRandom;
long n, len, i, j, k, idx[10];
long rnds[numberof(idx)];
if (OPTHASH_GIVEN_P(opts)) {
VALUE rnd;
ID keyword_ids[1];
keyword_ids[0] = id_random;
rb_get_kwargs(opts, keyword_ids, 0, 1, &rnd);
if (rnd != Qundef) {
randgen = rnd;
}
}
len = RARRAY_LEN(ary);
if (argc == 0) {
if (len < 2)
i = 0;
else
i = RAND_UPTO(len);
return rb_ary_elt(ary, i);
}
rb_scan_args(argc, argv, "1", &nv);
n = NUM2LONG(nv);
if (n < 0) rb_raise(rb_eArgError, "negative sample number");
if (n > len) n = len;
if (n <= numberof(idx)) {
for (i = 0; i < n; ++i) {
rnds[i] = RAND_UPTO(len - i);
}
}
k = len;
len = RARRAY_LEN(ary);
if (len < k && n <= numberof(idx)) {
for (i = 0; i < n; ++i) {
if (rnds[i] >= len) return rb_ary_new_capa(0);
}
}
if (n > len) n = len;
switch (n) {
case 0:
return rb_ary_new_capa(0);
case 1:
i = rnds[0];
return rb_ary_new_from_values(1, &RARRAY_AREF(ary, i));
case 2:
i = rnds[0];
j = rnds[1];
if (j >= i) j++;
return rb_ary_new_from_args(2, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j));
case 3:
i = rnds[0];
j = rnds[1];
k = rnds[2];
{
long l = j, g = i;
if (j >= i) l = i, g = ++j;
if (k >= l && (++k >= g)) ++k;
}
return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), RARRAY_AREF(ary, k));
}
if (n <= numberof(idx)) {
long sorted[numberof(idx)];
sorted[0] = idx[0] = rnds[0];
for (i=1; i<n; i++) {
k = rnds[i];
for (j = 0; j < i; ++j) {
if (k < sorted[j]) break;
++k;
}
memmove(&sorted[j+1], &sorted[j], sizeof(sorted[0])*(i-j));
sorted[j] = idx[i] = k;
}
result = rb_ary_new_capa(n);
RARRAY_PTR_USE(result, ptr_result, {
for (i=0; i<n; i++) {
ptr_result[i] = RARRAY_AREF(ary, idx[i]);
}
});
}
else {
result = rb_ary_dup(ary);
RBASIC_CLEAR_CLASS(result);
RB_GC_GUARD(ary);
RARRAY_PTR_USE(result, ptr_result, {
for (i=0; i<n; i++) {
j = RAND_UPTO(len-i) + i;
nv = ptr_result[j];
ptr_result[j] = ptr_result[i];
ptr_result[i] = nv;
}
});
RBASIC_SET_CLASS_RAW(result, rb_cArray);
}
ARY_SET_LEN(result, n);
return result;
}
So we can see
VALUE opts, randgen = rb_cRandom;
This would indicate the ruby Random class c is used for the randomness.
The code alone wouldn't tell us much about the algo
So lookup of https://ruby-doc.org/core-2.5.0/Random.html tells us it uses
PRNGs are currently implemented as a modified Mersenne Twister with a period of 2**19937-1.
And what the heck is a Mersenne Twister? I don't know but it sounds cool 😎 so https://en.wikipedia.org/wiki/Mersenne_Twister tells us about it and that it is exstensively used:
The Mersenne Twister is the default PRNG for the following software systems: Microsoft Excel,[3] GAUSS,[4] GLib,[5] GNU Multiple Precision Arithmetic Library,[6] GNU Octave,[7] GNU Scientific Library,[8] gretl,[9] IDL,[10] Julia,[11] CMU Common Lisp,[12] Embeddable Common Lisp,[13] Steel Bank Common Lisp,[14] Maple,[15] MATLAB,[16] Free Pascal,[17] PHP,[18] Python,[19][20] R,[21] Ruby,[22] SageMath,[23] Scilab,[24] Stata.[25]

Number of binary trees with equal values

There is array of values:
1 - n_1 times
2 - n_2 times
...
k - n_k times
How many trees with this nodes exist?
I create simple algorythm:
int get_count(const vector<int> n_i) {
if (n_i.size() <= 1) {
return 1;
} else {
int total_count = 0;
for (int i = 0; i < n_i.size(); ++i) {
vector<int> first;
vector<int> second;
for (int j = 0; j < i; ++j) {
first.push_back(n_i[j]);
}
if (n_i[i] != 1) {
second.push_back(n_i[i] - 1);
}
for (int j = i + 1; j < n_i.size(); ++j) {
second.push_back(n_i[j]);
}
total_count += (get_count(first) * get_count(second));
}
return total_count;
}
}
Because
#(n_1, n_2, ... n_k) = #(n_1 - 1, n_2, ..., n_k) + #(n_1) #(n_2 - 1, ... n_k) + ... + #(n_1, ..., n_k - 1)
and
#(0, n_i, n_j, ...) = #(n_i, n_j, ...)
But my code is so slow.
Is there a final formula via Cathalan's numbers, for example?
I guess that the problem can be split into calculating the number of permutations and calculating the number of binary trees of given size. I converted my initial recursive Java code (which gives up on n1=10,n2=10,n3=10) into this iterative one:
static int LIMIT = 1000;
static BigInteger[] numberOfBinaryTreesOfSize = numberOfBinaryTreesBelow(LIMIT);
static BigInteger[] numberOfBinaryTreesBelow(int m) {
BigInteger[] arr = new BigInteger[m];
arr[0] = BigInteger.ZERO;
arr[1] = arr[2] = BigInteger.ONE;
for (int n = 3; n < m; n++) {
BigInteger s = BigInteger.ZERO;
for (int i = 1; i < n; i++)
s = s.add(arr[i].multiply(arr[n - i]));
arr[n] = s;
}
return arr;
}
static BigInteger[] fac = facBelow(LIMIT);
static BigInteger[] facBelow(int m) {
BigInteger[] arr = new BigInteger[m];
arr[0] = arr[1] = BigInteger.ONE;
for (int i = 2; i < m; i++)
arr[i] = arr[i - 1].multiply(BigInteger.valueOf(i));
return arr;
}
static BigInteger getCountFast(int[] arr) {
// s: sum of n_i
int s = 0; for (int i = 0; i < arr.length; i++) { s += arr[i]; }
// p: number of permutations
BigInteger p = fac[s]; for (int i = 0; i < arr.length; i++) { p = p.divide(fac[arr[i]]); }
BigInteger count = p.multiply(numberOfBinaryTreesOfSize[s]);
return count;
}
public static void main(String[] args) {
System.out.println(getCountFast(new int[]{ 150, 150, 150, 150, 150 }));
}
The LIMIT limits the sum of the n_i. The above example takes about two seconds on my machine. Maybe it helps you with a C++ solution.

Resources