How to use unique pointers to create matrix? - c++11

I have to create a matrix using unique poiters that permit operations: Matrix a,b; Matrix c(b) and Matrix d=a;
So far I did the simple implementing of a matrix
class Matrix
{
public:vector<vector<int>> data;
Matrix() {}
Matrix(vector<vector<int>> matrix)
{
this->data=matrix;
}
Matrix (const Matrix& m2)
{
this->data=m2.data;
}
Matrix& operator= (const Matrix &m2)
{
this->data = m2.data;
return *this;
}
}
It's first time for me facing unique_ptr vectors, I found a plenty of informations about unique_ptr vectors creating arrays, but not much for matrix, it's so unclear.
How can I use unique_ptr vectors(I must use them) instread of simple vector?
Any help is welcome, thank you!

Try this for init matrix only with std::unique_ptr [1]:
const int rowCount = 3;
const int clmnCount = 6;
std::unique_ptr<std::unique_ptr<int[]>[]> matrix(new std::unique_ptr<int[]>[rowCount]());
// or
// auto matrix = new std::unique_ptr<int[]>[rowCount]();
for (int i = 0; i < rowCount; i++)
matrix[i] = std::make_unique<int[]>(clmnCount);
Example of use [1]:
#include <iostream>
#include <iomanip>
#include <memory>
int main() {
const int rowCount = 3;
const int clmnCount = 6;
std::unique_ptr<std::unique_ptr<int[]>[]> matrix(new std::unique_ptr<int[]>[rowCount]());
for (int i = 0; i < rowCount; i++)
{
matrix[i] = std::make_unique<int[]>(clmnCount);
for (int j = 0; j < clmnCount; j++) {
matrix[i][j] = j;
std::cout << std::setw(3) << matrix[i][j];
}
std::cout << std::endl;
}
}
Or if u want use std::vector of std::unique_ptr try this [2]:
const int rowCount = 3;
const int clmnCount = 6;
std::vector<std::unique_ptr<int[]>> matrix;
for (int i = 0; i < rowCount; i++)
matrix.push_back(std::make_unique<int[]>(clmnCount));
Example of use [2]:
#include <iostream>
#include <iomanip>
#include <memory>
#include <vector>
int main() {
const int rowCount = 3;
const int clmnCount = 6;
std::vector<std::unique_ptr<int[]>> matrix;
for (int i = 0; i < rowCount; i++)
{
matrix.push_back(std::make_unique<int[]>(clmnCount));
for (int j = 0; j < clmnCount; j++) {
matrix[i][j] = j;
std::cout << std::setw(3) << matrix[i][j];
}
std::cout << std::endl;
}
}

Related

Value of sum from thrust::reduce not correct

I have been trying to implement some code requiring to call reduce on thrust::device_ptr, and the results are not consistent with CPU implementation while dealing with large values. I have to deal with large values. So is there a way around:
My code:
#include <cuda_runtime_api.h>
#include <stdio.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <iostream>
#define NZ 412//
#define NX 402//
using namespace std;
using real =double;
void allocate_array_2d(real**& preal, const int dim1, const int dim2) {
// Contiguous allocation of 2D arrays
preal = new real * [dim1];
preal[0] = new real[dim1 * dim2];
for (int i = 1; i < dim1; i++) preal[i] = preal[i - 1] + dim2;
for (int i = 0; i < dim1; i++) {
for (int j = 0; j < dim2; j++) {
preal[i][j] = 0;
}
}
}
#define cudaCheckError(code) \
{ \
if ((code) != cudaSuccess) { \
fprintf(stderr, "Cuda failure %s:%d: '%s' \n", __FILE__, __LINE__, \
cudaGetErrorString(code)); \
} \
}
int main()
{
real** a;
std::cout.precision(30);
allocate_array_2d(a, NZ, NX);//input array
for (int i = 0; i < NZ; i++) {
for (int j = 0; j < NX; j++) {
a[i][j] = 2.14748e+09;
}
}
real* da;
cudaCheckError(cudaMalloc(&da, NZ * NX * sizeof(real)));
cudaCheckError(cudaMemcpy(da,a[0], NZ * NX * sizeof(real),cudaMemcpyHostToDevice));
///************************
//CUDA KERNELS ARE HERE
// REMOVED FOR CLEAR QUESTION
///*************************
real sum1=0;
thrust::device_ptr<real> dev_ptr = thrust::device_pointer_cast(da);
sum1 = thrust::reduce(dev_ptr, dev_ptr+NZ*NX, 0, thrust::plus<real>());
cout<<" \nsum gpu "<< sum1<<"\n";
real sum2=0;
////////CPU PART DOING SAME THING//////
for (int i = 0; i < NZ; i++) {
for (int j = 0; j < NX; j++) {
sum2 += a[i][j];
}
}
cout<<"\nsum cpu "<< sum2<<"\n";
if((sum2-sum1)<0.001)
std::cout << "\nSUCESS "<< "\n";
else
std::cout << "\nFailure & by "<<sum2-sum1<< "\n";
}
The compiler that I am using is nvcc and my graphics card is nvidia 1650 with compute capability 7.5.
According to the documentation, thrust expects the type for summation to be reflected in the init value:
sum1 = thrust::reduce(dev_ptr, dev_ptr+NZ*NX, 0, thrust::plus<real>());
^
The type of that constant you have is an integral type. If you change that to a double-precision constant:
sum1 = thrust::reduce(dev_ptr, dev_ptr+NZ*NX, 0.0, thrust::plus<real>());
you get matching results, between CPU and GPU, according to my testing. (You could alternatively cast your constant to real type: (real)0 and use that, and there are other ways to address this as well, such as dropping the use of the init value and the binary op.)

Topcoder - grafixMask, Implementing DFS

I have been stuck at the problem grafixMask for a day now. This is the code I wrote following the pseudocode in the tutorial for DFS. I think that my code is not respecting the condition which decides which grid to include resulting in wrong answer but I can't figure out how to fix it.
#include <iostream>
#include <vector>
#include <stack>
#include <algorithm>
#include <sstream>
#include <string>
using namespace std;
const int ROWS = 400;
const int COLUMNS = 600;
class grafixMask {
public:
bool visited[ROWS][COLUMNS];
vector<int> result;
vector<int> sortedAreas (vector<string> rectangles) {
// initialize graph
for (int row = 0; row < ROWS; row++)
for (int column = 0; column < COLUMNS; column++)
visited[row][column] = false;
for (string rec: rectangles) {
int r1, c1, r2, c2;
istringstream ss(rec);
ss >> r1 >> c1 >> r2 >> c2;
// set rectangular masks
for(int i = r1; i <= r2; i++)
for (int j = c1; j <= c2; j++)
visited[i][j] = true;
for (int row = 0; row < ROWS; row++)
for (int column = 0; column < COLUMNS; column++)
if (!visited[row][column])
result.push_back(doFill(row, column)); // find all connected points enclosed by masks
}
sort(result.begin(), result.end());
return result;
}
int doFill(int row, int column){
int res = 0;
stack<pair<int, int> > s;
s.push(make_pair(row, column));
while(!s.empty()) {
pair<int, int> p = s.top();
int r = p.first;
int c = p.second;
s.pop();
if (r < 0 || r >= 400 || c < 0 || c >= 600 || visited[r][c]) continue;
visited[r][c] = true;
res++; // we covered additional area
s.push(make_pair(r-1, c));
s.push(make_pair(r+1, c));
s.push(make_pair(r, c-1));
s.push(make_pair(r, c+1));
}
return res;
}
};
Going through the code infinite number of times I finally spotted what I did wrong:
Look at the code where I take the input as rectangles. Here I have accidentally included the for loop to find all the connected components of grid. So the correct code is:
#include <algorithm>
#include <iostream>
#include <sstream>
#include <stack>
#include <string>
#include <vector>
using namespace std;
const int ROWS = 400;
const int COLUMNS = 600;
bool visited[400][600] = {false};
class grafixMask {
public:
vector<int> result;
vector<int> sortedAreas(vector<string> rectangles) {
for (auto rec : rectangles) {
istringstream ss(rec);
int r1, c1, r2, c2;
ss >> r1 >> c1 >> r2 >> c2;
for (int i = r1; i <= r2; i++)
for (int j = c1; j <= c2; j++) visited[i][j] = true;
}
for (int row = 0; row < ROWS; row++)
for (int column = 0; column < COLUMNS; column++)
if (!visited[row][column]) {
result.push_back(doFill(row, column));
}
sort(result.begin(), result.end());
return result;
}
int doFill(int row, int column) {
int res = 0;
stack<pair<int, int> > s;
s.push(make_pair(row, column));
while (s.empty() == false) {
pair<int, int> p = s.top();
int r = p.first;
int c = p.second;
s.pop();
if (r < 0 || r >= 400 || c < 0 || c >= 600 ||
visited[r][c])
continue;
visited[r][c] = true;
res++; // we covered additional area
int dirRow[] = {1, -1, 0, 0};
int dirCol[] = {0, 0, 1, -1};
for (int i = 0; i < 4; i++) {
int newRow = r + dirRow[i];
int newCol = c + dirCol[i];
if (newRow >= 0 && newRow < 400 && newCol >= 0 && newCol < 600 &&
!visited[newRow][newCol]) {
s.push(make_pair(newRow, newCol));
}
}
}
return res;
}
};

Template Type Matrix Class Addition: error in Proper Addition

Can anyone help me out finding why the sum of my matrices is not how they should be?
template <typename T>
Matrix<T> Matrix<T>::operator + (const Matrix<T> &M){
Matrix<T> tmp(m,n,M.x);
for(int i=0;i<m;i++)
for(int j=0;j<n;j++)
tmp.Mat[i][j]+= Mat[i][j]+ M.Mat[i][j];
return tmp;
}
This is my program's body:
#include <iostream>
#include <vector>
template <typename T>
class Matrix {
private:
unsigned int m; unsigned int n;
std::vector<T> x;
std::vector<T> y;
std::vector<std::vector<int>> Mat;
public:
Matrix (unsigned int m, unsigned int n, std::vector<T> x);
Matrix (const Matrix<T> &M); //= default;
// Matrix ();
Matrix<T> operator = (const Matrix<T> &M); // Assignment
Matrix<T> operator + (const Matrix<T> &M); // Addition
Matrix<T> operator - (const Matrix<T> &M); // Subtraction
Matrix<T> operator * (const T &scalar); // Scalar Multiplication
Matrix<T> operator * (const Matrix<T> &M); // Matrix Multiplication
friend std::ostream& operator << (std::ostream& os, const Matrix<T> &M){
for (int i = 0; i< M.m; i++){
for (int j = 0; j< M.n; j++){
os << M.Mat[i][j] << ' ';
}
os << '\n';
}
os << '\n' ;
return os;
}
};
template <typename T>
Matrix<T>::Matrix (unsigned int m, unsigned int n, std::vector<T> x){ //constructor
this -> m = m;
this -> n = n;
this -> x = x;
int index = 0;
Mat.resize(m);
for (unsigned int i = 0; i < Mat.size(); i++) {
Mat[i].resize(n);
}
for (unsigned int i = 0; i<m; i++){
for (unsigned int j = 0; j<n; j++){
Mat[i][j] = x[index];
index++;
}
}
}
template<typename T>
Matrix<T>::Matrix(const Matrix &M) //copy constructor
:
m(M.m),
n(M.n),
Mat(M.Mat)
{}
And this is my main:
Note: We were required in the initialization that the constructor should take in 2 unsigned integers and a linear container where the elements will be divided to rows and columns according to the inputted.
int main(){
std::vector<int> x = {1,2,3,4};
std::vector<int> y = {5,6,7,8};
std::vector<int> z = {9,10,11,12};
Matrix<int> A{2,2,x};
Matrix<int> B{2,2,y};
Matrix<int> C{2,2,z};
C = B;
std::cout << "A\n" << A;
std::cout << "B\n" << B;
std::cout << "C\n" << C;
Matrix<int> E(A + B);
std::cout << "E\n" << E;
}
When I add A and B I always get
11 14
17 20
It's like the matrix B doubles before adding to A
Thanks in advance!
A.operator+(M) computes A + 2*M. You use the contents of M twice - once when constructing tmp ( Matrix<T> tmp(m,n,M.x) ) and again when updating it ( tmp.Mat[i][j]+= Mat[i][j]+ M.Mat[i][j] )

Generating random numbers into an array which prints to a Txt file. Before hitting the text file, the numbers need to be sorted

so my objective is to basically print out random numbers from 40,000 to 1,000,000 to a txt file, sorted using the heap method. I can print to the text file just fine with random numbers, but I am a bit stuck at sorting them using a heap method. I started a method and got a bit lost half way through after looking at some tutorials. Any thoughts/ helpful comments? Im literally new to stack overflow (posting wise) so forgive me if I didnt place this here correctly. Thank you!
//Heapsort algorithm
#include <iostream>
#include <string>
#include <fstream>
#include <ctime>
#include <stdio.h>
#include <stdlib.h>
using namespace std;
/*class generateDataSet {
public: static const int MAX_VALUE = 1000000;
public: static const bool SORTED = false;
public: static const bool REVERSE_SORTED = false;
};
*/
int main()
{
const int array= 16;
int arrayNum[array];
srand(time(NULL));
int upB = 1000000;
int loB = 40000;
int temp;
ofstream inputFile;
inputFile.open("randomData.txt");
if (inputFile.is_open())
{
for (int i = 0; i < array; i++)
{
arrayNum[i] = (rand()% (upB - loB + 1)) + loB;
inputFile << arrayNum[i] << "\n";
}
}
return 0;
}
void MaxHeapify(int d[], int i, int n)
{
int j;
int temp;
temp = d[i];
j = 2 * 1;
while (j <= n)
{
if (j < n && d[j + 1] > d[j])
j = j + 1;
if (temp > d[j])
break;
else if (temp <= d[j])
{
d[j / 2] = d[j];
j = 2 * j;
}
}
}
void heapSort(int d[], int n)
{
int i;
int temp;
for (i = n; i >= 2; i);
}

How to improve Dijkstra algorithm when querying n times?

I'm currently working on a problem at Codechef. You can find the problem statement here:
Delivery Boy
In short, the problem is asking to query n times the shortest path from a start to an end. My solution is to use Dijsktra with priority_queue plus caching the result into a hash_map in case we already had a start. Unfortunately, I got time limit exceed many times and I couldn't find a better way to make it faster. I wonder am I in the right track? or there is a better algorithm to this problem?
By the way, since the contest is still going, please don't post any solution. A hint is more than enough to me. Thanks.
Here is my attempt:
#ifdef __GNUC__
#include <ext/hash_map>
#else
#include <hash_map>
#endif
#include <iostream>
#include <iomanip>
#include <vector>
#include <string>
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <stack>
#include <deque>
#include <queue>
#include <fstream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cassert>
using namespace std;
#ifdef __GNUC__
namespace std {
using namespace __gnu_cxx;
}
#endif
const int MAX_VERTICES = 250;
const int INFINIY = (1 << 28);
int weight[MAX_VERTICES + 1][MAX_VERTICES + 1];
bool visited_start[MAX_VERTICES + 1] = { 0 };
struct vertex {
int node;
int cost;
vertex(int node = 0, int cost = 0)
: node(node), cost(cost) {
}
bool operator <(const vertex& rhs) const {
return cost < rhs.cost;
}
bool operator >(const vertex& rhs) const {
return cost > rhs.cost;
}
};
hash_map<int, vector<vertex> > cache;
typedef priority_queue<vertex, vector<vertex>, greater<vertex> > min_pq;
vector<vertex> dijkstra_compute_path(int start, int n) {
min_pq pq;
vector<vertex> path;
vector<int> visited(n, 0);
int min_cost = 0;
int better_cost;
vertex u;
for (int i = 0; i < n; ++i) {
path.push_back(vertex(i, INFINIY));
}
path[start].cost = 0;
pq.push(vertex(start, path[start].cost));
while (!pq.empty()) {
// extract min cost
u = pq.top();
pq.pop();
// mark it as visited
visited[u.node] = 1;
// for each vertex v that is adjacent to u
for (int v = 0; v < n; ++v) {
// if it's not visited, visit it
if (visited[v] == 0) {
better_cost = path[u.node].cost + weight[u.node][v];
// update cost
if (path[v].cost > better_cost) {
path[v].cost = better_cost;
pq.push(vertex(v, path[v].cost));
}
}
}
}
return path;
}
void check_in_cache(vector<vertex>& path, int start, int no_street) {
if (visited_start[start] == 0) {
path = dijkstra_compute_path(start, no_street);
cache.insert(make_pair(start, path));
visited_start[start] = 1;
}
else {
path = cache[start];
}
}
void display_cost(int stop_at_gas_cost, int direct_cost) {
printf("%d ", stop_at_gas_cost);
if (stop_at_gas_cost > direct_cost) {
printf("%d\n", stop_at_gas_cost - direct_cost);
}
else {
printf("0\n");
}
}
void handle_case_one() {
int no_scenario;
int dummy;
int s, g, d;
scanf("%d", &dummy);
scanf("%d", &no_scenario);
for (int i = 0; i < no_scenario; ++i) {
scanf("%d %d %d", &s, &g, &d);
printf("0 0\n");
}
}
void inout_delivery_boy() {
int no_street;
int no_scenario;
int restaurant;
int gas_station;
int destination;
int stop_at_gas_cost;
int direct_cost;
vector<vertex> direct;
vector<vertex> indirect;
vector<vertex> d;
int c;
scanf("%d", &no_street);
if (no_street == 1) {
handle_case_one();
return;
}
for (int x = 0; x < no_street; ++x) {
for (int y = 0; y < no_street; ++y) {
scanf("%d", &c);
weight[x][y] = c;
}
}
for (int i = 0; i < no_street; ++i) {
d.push_back(vertex(i, INFINIY));
}
scanf("%d", &no_scenario);
for (int i = 0; i < no_scenario; ++i) {
scanf("%d %d %d", &restaurant, &gas_station, &destination);
// check in cache
check_in_cache(direct, restaurant, no_street);
check_in_cache(indirect, gas_station, no_street);
// calculate the cost
stop_at_gas_cost = direct[gas_station].cost + indirect[destination].cost;
direct_cost = direct[destination].cost;
// output
display_cost(stop_at_gas_cost, direct_cost);
}
}
void dijkstra_test(istream& in) {
int start;
int no_street;
int temp[4] = { 0 };
vector<vertex> path;
in >> no_street;
for (int x = 0; x < no_street; ++x) {
for (int y = 0; y < no_street; ++y) {
in >> weight[x][y];
}
}
// arrange
start = 0;
temp[0] = 0;
temp[1] = 2;
temp[2] = 1;
temp[3] = 3;
// act
path = dijkstra_compute_path(start, no_street);
// assert
for (int i = 0; i < no_street; ++i) {
assert(path[i].cost == temp[i]);
}
// arrange
start = 1;
temp[0] = 1;
temp[1] = 0;
temp[2] = 2;
temp[3] = 4;
// act
path = dijkstra_compute_path(start, no_street);
// assert
for (int i = 0; i < no_street; ++i) {
assert(path[i].cost == temp[i]);
}
// arrange
start = 2;
temp[0] = 2;
temp[1] = 1;
temp[2] = 0;
temp[3] = 3;
// act
path = dijkstra_compute_path(start, no_street);
// assert
for (int i = 0; i < no_street; ++i) {
assert(path[i].cost == temp[i]);
}
// arrange
start = 3;
temp[0] = 1;
temp[1] = 1;
temp[2] = 1;
temp[3] = 0;
// act
path = dijkstra_compute_path(start, no_street);
// assert
for (int i = 0; i < no_street; ++i) {
assert(path[i].cost == temp[i]);
}
}
int main() {
// ifstream inf("test_data.txt");
// dijkstra_test(inf);
inout_delivery_boy();
return 0;
}
please notice N is small in the problem. have you tried Floyd shortest path algorithm to pre-calculate shortest path between each two nodes ? it will cost O(N^3) time, which is 250^3=15625000 in the problem, should be easy to be finished running in 1 second. Then you can answer each query in O(1).
the introduction of Floyd :
http://en.wikipedia.org/wiki/Floyd%E2%80%93Warshall_algorithm
ps: i think cached dijstra costs a maximum running time of O(N^3) for overall test case as well . but the way you implement the cache will spend more unnecessary time on memory copying, which may lead to a TLE. Just a guess.
Indeed Floyd-Warshall's Algorithm is better than Dijkstra's in this case, the complexity for Dijkstra is O(m*n^2) and in this problem M is much much higher than N so the O(n^3) time complexity of Floyd-Warshall is better.

Resources