Collect all computed by mpi rows of matrix to root matrix - matrix

I'm trying hard all day to implement matrix multiply with help of MPI, all examples from the Internet didn't work for me (I don't know why, it compiles, run but not computing). Here is what I'm doing:
From bash:
mpirun -n 2 out/lb8
It reading matrix 2x4 (1 row per process) and starting to compute.
The problem is in SendRecv block (or generally in collecting data)
void Matrix_MPY(double **matrix_a, double **matrix_b, double ***matrix_c, int a_rows, int a_cols) {
int i, j;
int process_rank, process_count;
MPI_Comm_rank(MPI_COMM_WORLD, &process_rank);
MPI_Comm_size(MPI_COMM_WORLD, &process_count);
if (a_rows % process_count != 0) {
error_code = NOT_DEVIDED_BY_RANK_EXCEPTION;
return;
}
int rows_per_process = a_rows / process_count;
int current_row = rows_per_process * process_rank;
double **temp;
temp = (double **) malloc(sizeof(double *) * a_rows);
for (i = 0; i < a_rows; ++i){
temp[i] = (double *) malloc(sizeof(double) * a_rows);
}
for (i = current_row; i < current_row + rows_per_process; ++i) {
for (j = 0; j < a_rows; ++j)
{
int k;
for(k = 0; k < a_cols; ++k){
temp[i][j] += matrix_a[i][k] * matrix_b[k][j];
}
}
MPI_Sendrecv(temp[i], a_rows, MPI_DOUBLE, ROOT, TAG, temp[i], a_rows, MPI_DOUBLE, process_rank, TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
*matrix_c = temp;
}

This solution is worked for me
....
if (process_rank != ROOT)
MPI_Send(temp[i], a_rows, MPI_DOUBLE, ROOT, i, MPI_COMM_WORLD);
}
if (process_rank == ROOT) {
for (i = 1; i < process_count; ++i)
{
for (j = i * rows_per_process; j < i * rows_per_process + rows_per_process; ++j)
{
MPI_Recv(temp[j], a_rows, MPI_DOUBLE, i, j, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
}
}
}
*matrix_c = temp;

Related

Runtime error for large inputs for sorting ( quicksort)

This is a very simple program where the user inputs (x,y) coordinates and distance 'd' and the program has to find out the number of unrepeated coordinates from (x,y) to (x+d,y).
For eg: if input for one test case is: 4,9,2 then the unrepeated coordinates are (4,9),(5,9) and (6,9)(x=4,y=9,d=2). I have used a sorting algorithm as mentioned in the question (to keep track of multiple occurrences) however the program shows runtime error for test cases beyond 30. Is there any mistake in the code or is it an issue with my compiler?
For a detailed explanation of question: https://www.hackerearth.com/practice/algorithms/sorting/merge-sort/practice-problems/algorithm/missing-soldiers-december-easy-easy/
#include <stdio.h>
#include <stdlib.h>
int partition(int *arr, int p, int r) {
int x;
x = arr[r];
int tmp;
int i = p - 1;
for (int j = p; j <= r - 1; ++j) {
if (arr[j] <= x) {
i = i + 1;
tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
}
}
tmp = arr[i + 1];
arr[i + 1] = arr[r];
arr[r] = tmp;
return (i + 1);
}
void quicksort(int *arr, int p, int r) {
int q;
if (p < r) {
q = partition(arr, p, r);
quicksort(arr, p, q - 1);
quicksort(arr, q + 1, r);
}
}
int count(int A[],int ct) {
int cnt = 0;
for (int i = 0; i < ct; ++i) {
if (A[i] != A[i + 1]) {
cnt++;
}
}
return cnt;
}
int main() {
int t;
scanf("%d", &t);
long int tmp, y, d;
int ct = 0;
int i = 0;
int x[1000];
int j = 0;
for (int l = 0; l < t; ++l) {
scanf("%d%d%d", &tmp, &y, &d);
ct = ct + d + 1; //this counts the total no of coordinates for each (x,y,d)
for (int i = 0; i <= d; ++i) {
x[j] = tmp + i; //storing all possible the x and x+d coordinates
j++;
}
}
int cnt;
int p = ct - 1;
quicksort(x, 0, p); //quicksort sorting
for (int l = 0; l < ct; ++l) {
printf("%d ", x[l]); //prints sorted array not necessary to question
}
cnt = count(x, ct); //counts the number of non-repeated vertices
printf("%d\n", cnt);
}
The problem was the bounds of the array int x[1000] is not enough for the data given below.

matrix multiplication using malloc without user input

I am trying to use Malloc function to dynamically allocate memory but I also want to specify my data entry for operation rather than taking the user input.
I have found this code here which works fine, but I am working with a large data set and taking user input is not an option, so I want to keep using MALLOC but also define the data set.
like instead of following,
//Input Matrix1
for (i = 0; i < r1; i++)
for (j = 0; j < c1; j++)
scanf_s("%d", &mat1[i][j]);
I want something like
//mat1[2][2] = { {1,2},{2,3} }
to be inputed in the code
What would be the way to do it? I would really appreciate some advice. Thanks
#include<stdio.h>
#include<stdlib.h>
int main() {
int **mat1, **mat2, **res, i, j,k, r1, c1, r2, c2;
printf("\nEnter the Order of the First matrix...\n");
scanf_s("%d %d", &r1, &c1);
printf("\nEnter the Order of the Second matrix...\n");
scanf_s("%d %d", &r2, &c2);
if (c1 != r2) {
printf("Invalid Order of matrix");
exit(EXIT_SUCCESS);
}
mat1 = (int**)malloc(r1 * sizeof(int*));
for (i = 0; i < c1; i++)
mat1[i] = (int*)malloc(c1 * sizeof(int));
mat2 = (int**)malloc(r2 * sizeof(int*));
for (i = 0; i < c2; i++)
mat2[i] = (int*)malloc(c2 * sizeof(int));
res = (int**)calloc(r1, sizeof(int*));
for (i = 0; i < c2; i++)
res[i] = (int*)calloc(c2, sizeof(int));
/**/
//Input Matrix1
for (i = 0; i < r1; i++)
for (j = 0; j < c1; j++)
scanf_s("%d", &mat1[i][j]);
//Input Matrix2
for (i = 0; i < r2; i++)
for (j = 0; j < c2; j++)
scanf_s("%d", &mat2[i][j]);
//Printing Input Matrix 1 and 2
printf("\n Entered Matrix 1: \n");
for (i = 0; i < r1; i++) {
for (j = 0; j < c1; j++)
printf("%d ", mat1[i][j]);
printf("\n");
}
printf("\n Entered Matrix 2: \n");
for (i = 0; i < r2; i++) {
for (j = 0; j < c2; j++)
printf("%d ", mat2[i][j]);
printf("\n");
}
//int mat1[2][2] = { {1,2},{2,3} };
//int mat2[2][2] = { {1,3},{2,4} };
//Computation
//Multiplication
for (i = 0; i < r1; i++) {
for (j = 0; j < c2; j++) {
res[i][j] = 0;
for (k = 0; k < c1; k++)
res[i][j] += mat1[i][k] * mat2[k][j];
}
printf("\n");
}
printf("\nThe Multiplication of two matrix is\n");
for (i = 0; i < r1; i++) {
printf("\n");
for (j = 0; j < c2; j++)
printf("%d\t", res[i][j]);
}
printf("\n");
/* Addition
for(i=0;i<r1;i++)
for(j=0;j<c2;j++)
res[i][j]=mat1[i][j]+mat2[i][j];
printf("\nThe Addition of two matrix is\n");
for(i=0;i<r1;i++){
printf("\n");
for(j=0;j<c2;j++)
printf("%d\t",res[i][j]);
}
*/
return 0;
}
Please specify the format of your input data. Is it a csv file?
You can only specify data in the format int b[4] = {1, 2, 3, 4} when the size of b is fixed, i.e. known at compile time. But if all you matrices are known at compile time anyway, why bother doing dynamic allocation?
Also I cleaned up your code a bit:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define index(x, y, r) (x+r*y)
struct matrix {
int cols;
int rows;
double * data;
};
void print_mat(struct matrix * mat) {
int i, j;
for (i = 0; i < mat->rows; i++) {
for (j = 0; j < mat->cols; j++) {
printf("%f \t", mat->data[index(i, j, mat->rows)]);
}
printf("\n");
}
}
int mat_alloc(struct matrix *mat) {
mat->data = (double*)malloc(mat->rows*mat->cols*sizeof(double));
}
int read_mat(struct matrix *mat) {
int i, j;
for (i = 0; i < mat->rows; i++) {
for (j = 0; j < mat->cols; j++) {
scanf("%lf", &(mat->data[index(i, j, mat->rows)]));
}
}
}
int multiply(struct matrix * a, struct matrix * b, struct matrix * res) {
if(a->cols != b->rows){
printf("Matrix dimensions do not match!\n");
return 0;
}
res->rows = a->rows;
res->cols = b->cols;
mat_alloc(res);
memset(res->data, 0, res->cols*res->rows*sizeof(double));
int i, j, k;
for (i = 0; i < res->rows; i++) {
for (j = 0; j < res->cols; j++) {
for (k = 0; k < a->cols; k++) {
res->data[index(i, j, res->rows)] += a->data[index(i, k, a->rows)] * b->data[index(k, j, b->rows)];
}
}
}
}
int main() {
struct matrix mat1, mat2, res;
printf("\nEnter the Order of the First matrix...\n");
scanf("%d %d", &mat1.rows, &mat1.cols);
printf("\nEnter the Order of the Second matrix...\n");
scanf("%d %d", &mat2.rows, &mat2.cols);
mat_alloc(&mat1);
mat_alloc(&mat2);
read_mat(&mat1);
read_mat(&mat2);
printf("Scanned matrices: \n");
print_mat(&mat1);
printf("\n");
print_mat(&mat2);
printf("\n");
multiply(&mat1, &mat2, &res);
printf("Calculated result: \n");
print_mat(&res);
return 0;
}

error when use multiple mpi_bcast

I have a problem with 3 mpi_bcast and one mpi_scatter, my program don't work well ,mpi_scatter don't work and globalparcsr don't scatter between nodes. when i delete second and third mpi_bcast ,mpi_scatter work well. I want broadcast a and globalindividual and globalfitness and then scatter globalparcsr, part of my code as bellow:
int malloc2dint(int ***array, int n, int m) {
/* allocate the n*m contiguous items */
int *p = (int *)malloc(n*m * sizeof(int));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (int **)malloc(n * sizeof(int*));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (int i = 0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
int main(int argc, char *argv[]) {
int size, rank, divided_pop_size, sum = 0, root = 0, procgridsize, sum2 = 0,generation=0;
int **globalindividual, **localindividual;
int *globalfitness, *localfitness;
int *globalparcsr, *localparcsr;
int **recbuf;
int *sendcounts, *parsendcount; //specifying the number of elements to send to each processor
int *displs, *pardispls; //Entry i specifies the displacement
MPI_Status status;
int offset, rows;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
divided_pop_size = n_initial_pop / size;
if (rank == root)
{
malloc2dint(&globalindividual, n_initial_pop, num_vertices);
read_graph();
globalfitness = (int*)malloc(n_initial_pop * sizeof(int));
globalparcsr = (int*)malloc(n_initial_pop * sizeof(int));
globalindividual = initial_population(globalindividual, n_initial_pop);
for (int i = 0; i < n_initial_pop; i++) {
printf("\n");
for (int j = 0; j < num_vertices; j++)
printf("%d", globalindividual[i][j]);
}
}
for (int p = 0; p < size; p++) {
if (rank == p) {
malloc2dint(&localindividual,n_initial_pop + 2, num_vertices);
localindividual = initial_population(localindividual, divided_pop_size + 2);
}
}
MPI_Bcast(&a[0][0], 5000 * 5000, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&globalindividual[0][0], n_initial_pop*num_vertices, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&globalfitness, n_initial_pop, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
a is a 2d array and globalindividual is a 2d array with 12 rows and 8 columns and globalfitness is 1d array with size 12
please help me.

Why are my MaxHeapify and BuildMaxHeap procedures failing to organise a heap?

In my ansi-c implementation of heap I have two procedures:
void MaxHeapify(Heap * h, int i)
{
int l = Left(i);
int r = Right(i);
int L, tmp;
if(l < h->heapsize && h->data[l] > h->data[i]) L = l;
else L = i;
if(r < h->heapsize && h->data[r] > h->data[L]) L = r;
if(L != i)
{
tmp = h->data[i];
h->data[i] = h->data[L];
h->data[L] = tmp;
MaxHeapify(h, L);
}
}
void BuildMaxHeap(Heap * h)
{
int i;
h->heapsize = h->length;
for(i = h->length / 2; i >= 0; i--)
MaxHeapify(h, i);
}
And main.c
int main(int argc, char *argv[])
{
int i;
Heap h;
int tab[] = {4,1,3,2,16,9,10,14,8,7};
HeapInit(&h, tab, 10);
for(i = 0; i < 10; i++) printf("%d ", h.data[i]);
printf("\n");
BuildMaxHeap(&h);
for(i = 0; i < 10; i++) printf("%d ", h.data[i]);
return 0;
}
I have strange output:
16 14 10 10 8 1 4 2 3 7
I've checked code a few times, but found nothing wrong.
CORRECTED NODE INDEX RETURNING FUNCTIONS:
int Left(int i)
{
return 2*i+1;
}
int Right(int i)
{
return 2*i+2;
}
#JimMischel posted correct answer in comments. It was written basing on pseudocode with indexing from 1 and that confused me. Correct code posted (via edit) in question.

Iterative/ Non-Recursive Merge Sort

I was trying iterative merge sort , but am stuck at at conditions when input length is not 2^x.
like int[] A ={4,5,1,254,66,75,12,8,65,4,87,63,53,8,99,54,12,34};
public class MergeSort {
public static void sort(int[] A) {
System.out.println("Log(A.len):"+log(A.length, 2));
for (int i = 0; i < log(A.length, 2); i++) { //log A.len
int r = 2 << i; //2^i
int mid = r >>> 1;
for (int j = 0; j+r < A.length; j = j + r) {
System.out.print("offset:" + j + " mid:" + (j + mid) + " r:" + (j + r));
merge(A, j, (j + mid), (j + r));
}
}
}
public static void merge(int[] A, int offset, int mid, int n) {
mid = mid - offset;
n = n - offset;
int[] L = new int[mid];
int[] R = new int[n - mid];
for (int i = 0; i < mid; i++) {
L[i] = A[i + offset];
R[i] = A[mid + i + offset];
}
System.out.print("\nL:");
print_array(L);
System.out.print("\nR:");
print_array(R);
int l = 0;
int r = 0; //left right pointer
int k = offset;
while (l < mid && r < mid) {
if (L[l] < R[r]) {
// System.out.println("in left");
A[k] = L[l];
l++;
} else {
// System.out.println("in right");
A[k] = R[r];
r++;
}
k++;
}
while (l < mid) {
A[k] = L[l];
l++;
k++;
}
while (r < mid) {
A[k] = R[r];
r++;
k++;
}
System.out.print("\nA:");
print_array(A);
System.out.print("\n\n");
}
public static void main(String[] args) {
int[] A ={4,5,1,254,66,75,12,8,65,4,87,63,53,8,99,54,12,34};
sort(A);
}
public static void print_array(int[] A) {
for (int i = 0; i < A.length; i++) {
System.out.print(A[i] + " ");
}
}
static int log(int x, int base) {
return (int) (Math.log(x) / Math.log(base));
}
}
It works fine when input length is 2^x.
Also is there any better way to implement iterative version , this looks a lot messy.
C++ example of bottom up merge sort. a[] is array to sort, b[] is temp array. It includes a check for number of merge passes and swaps in place if the number of passes would be odd, in order to end up with the sorted data in a[].
void BottomUpMerge(int a[], int b[], size_t ll, size_t rr, size_t ee);
void BottomUpCopy(int a[], int b[], size_t ll, size_t rr);
size_t GetPassCount(size_t n);
void BottomUpMergeSort(int a[], int b[], size_t n)
{
size_t s = 1; // run size
if(GetPassCount(n) & 1){ // if odd number of passes
for(s = 1; s < n; s += 2) // swap in place for 1st pass
if(a[s] < a[s-1])
std::swap(a[s], a[s-1]);
s = 2;
}
while(s < n){ // while not done
size_t ee = 0; // reset end index
while(ee < n){ // merge pairs of runs
size_t ll = ee; // ll = start of left run
size_t rr = ll+s; // rr = start of right run
if(rr >= n){ // if only left run
rr = n;
BottomUpCopy(a, b, ll, rr); // copy left run
break; // end of pass
}
ee = rr+s; // ee = end of right run
if(ee > n)
ee = n;
BottomUpMerge(a, b, ll, rr, ee);
}
std::swap(a, b); // swap a and b
s <<= 1; // double the run size
}
}
void BottomUpMerge(int a[], int b[], size_t ll, size_t rr, size_t ee)
{
size_t o = ll; // b[] index
size_t l = ll; // a[] left index
size_t r = rr; // a[] right index
while(1){ // merge data
if(a[l] <= a[r]){ // if a[l] <= a[r]
b[o++] = a[l++]; // copy a[l]
if(l < rr) // if not end of left run
continue; // continue (back to while)
do // else copy rest of right run
b[o++] = a[r++];
while(r < ee);
break; // and return
} else { // else a[l] > a[r]
b[o++] = a[r++]; // copy a[r]
if(r < ee) // if not end of right run
continue; // continue (back to while)
do // else copy rest of left run
b[o++] = a[l++];
while(l < rr);
break; // and return
}
}
}
void BottomUpCopy(int a[], int b[], size_t ll, size_t rr)
{
do // copy left run
b[ll] = a[ll];
while(++ll < rr);
}
size_t GetPassCount(size_t n) // return # passes
{
size_t i = 0;
for(size_t s = 1; s < n; s <<= 1)
i += 1;
return(i);
}

Resources