Check if object is in range in a matrix - algorithm

I would like to check if an object is in range in a matrix.
A 1 range would be 9 blocks around the player (orange).
But a two range would be 25 blocks (blue). The player is the red cross.I tried the following code:`
int size = ((range * 2) +1) * ((range * 2) + 1);
int sq = (range * 2) + 1;
int startX = x - range; if (startX < 0) startX = 0;
int startY = y - range; if (startY < 0) startY = 0;
int endX = x + range; if (endX > arrayWitdth) endX = arrayWitdth;
int endY = y + range; if (endY > arrayLenght) endY = arrayLenght;
//printf("Range: %d\n", range);
for (size_t i = startX; i < endX; i++)
{
for (size_t j = startY; j < endY; j++)
{
//printf("Looking at (%d,%d)\n", i, j);
if (map[i][j] == charTocheck) return 1;
}
}
`

You don't check the last block, so the correct implementation would be:
int size = ((range * 2) +1) * ((range * 2) + 1);
int sq = (range * 2) + 1;
int startX = x - range; if (startX < 0) startX = 0;
int startY = y - range; if (startY < 0) startY = 0;
int endX = x + range + 1; if (endX > arrayWitdth) endX = arrayWitdth;
int endY = y + range + 1; if (endY > arrayLenght) endY = arrayLenght;
//printf("Range: %d\n", range);
for (size_t i = startX; i < endX; i++)
{
for (size_t j = startY; j < endY; j++)
{
//printf("Looking at (%d,%d)\n", i, j);
if (map[i][j] == charTocheck) return 1;
}
}
notice that endX and endY have slightly changed.

Related

Digital Image Processing Contrast Stretching Histogram

Here I attach my code that I use to Draw the Histogram of the Contrasted image and also to convert a gray image into Contrast Image. Here I used low pint as 122 and highest point as 244. In the output histogram it reduce the height of the histogram.
I cannot find the error in my code
#include "opencv2/opencv.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/core.hpp"
using namespace cv;
using namespace std;
int main(int argc, char* argv[]) {
Mat img = imread(argv[1], 1);
if (!img.data) {
cout << "Could not find the image!" << endl;
return -1;
}
int height = img.rows;
int width = img.cols;
int widthstep = img.step;
int ch = img.channels();
printf("Height : %d\n", height);
printf("Width : %d\n", width);
printf("Widthstep : %d\n", widthstep);
printf("No of channels : %d\n", ch);
Mat gray_image(height, width, CV_8UC1, Scalar(0));
cvtColor(img, gray_image, COLOR_BGR2GRAY);
Mat new_image = gray_image.clone();
int v;
int output{};
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int v = (int)gray_image.at<uchar>(y, x);
if (v >= 0 && v <= 122) {
output = int((6 / 122) * v);
}
else if (v > 100 && v <= 244) {
output = int(((244) / (122)) * (v - 122) + 6);
}
else if (v > 244 && v <= 255) {
output = int(((5) / (11)) * (v - 244) + 250);
}
new_image.at<uchar>(y, x) = (uchar)output;
}
}
int histn[256];
for (int i = 0; i < 256; i++) {
histn[i] = 0;
}
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
histn[(int)new_image.at<uchar>(y, x)] = histn[(int)new_image.at<uchar>(y, x)] + 1;
}
}
for (int i = 0; i < 256; i++) {
cout << i << ":" << histn[i] << endl;
}
int hist_wn = 512;
int hist_hn = 400;
int bin_wn = cvRound((double)hist_wn / 256);
Mat new_histogramImage(hist_hn, hist_wn, CV_8UC1, Scalar(255));
int maxn = histn[0];
for (int i = 0; i < 256; i++) {
if (maxn < histn[i]) {
maxn = histn[i];
}
}
for (int i = 0; i < 256; i++) {
histn[i] = ((double)histn[i] / maxn) * new_histogramImage.rows;
}
for (int i = 0; i < 256; i++) {
line(new_histogramImage, Point(bin_wn * (i), hist_hn), Point(bin_wn * (i), hist_hn - histn[i]), Scalar(0), 1, 8, 0);
}
imwrite("Gray_Image.png", gray_image);
imwrite("newcontrast_Image.png", new_image);
imwrite("Histogram.png", new_histogramImage);
namedWindow("Image");
imshow("Image", img);
namedWindow("Gray_Image");
imshow("Gray_Image", gray_image);
namedWindow("newcontrast_Image");
imshow("newcontrast_Image", new_image);
namedWindow("New_Histogram");
imshow("New_Histogram", new_histogramImage);
namedWindow("Old_Histogram");
imshow("Old_Histogram", histImage);
waitKey(0);
return 0;
}
Here are the new and old histograms that I got as outputs
I found the solution for the question. Here I changed the lowest and highest point values as 100 and 240 and when using the values set those as decimals values.
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int v = (int)gray_image.at<uchar>(y, x);
if (v >= 0 && v <= 100) {
output = int((5.0/ 100.0) * v);
}
else if (v > 100 && v <= 240) {
output = int(((245.0) / (140.0)) * (v - 100.0) + 5.0);
}
else if (v > 240 && v <= 255) {
output = int(((5.0) / (15.0)) * (v - 240.0) + 250.0);
}
new_image.at<uchar>(y, x) = (uchar)output;
}
}

Is there a way to avoid CUDA atomicAdd in my situation?

I'm doing an operation as the figure below.
Here is my kernel.
As shown in the figure, I make a small matrix using about one million vectors and accumulate it in a large prepared matrix.
I need an idea that can improve performance without exceeding 8Gb of GPU global memory.
How can I avoid atomic operations? I use the GTX1080. Existing kernels take about 250ms.
__global__ void buildMatrixKernel(const CostJacobianCT *src, const int num, const int st, const int mw_width, double *A, double *b)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < num)
{
if (src[idx].mask == 1)
{
// matrix width
int cols = 6 * (mw_width + 1);
// calc position for insert
int idx0 = (src[idx].fid0 - st);
if (idx0 == mw_width - 2)
{
idx0 = idx0 - 1;
}
else if (idx0 == mw_width - 1)
{
idx0 = idx0 - 2;
}
int idx1 = (src[idx].fid1 - st);
if (idx1 == mw_width - 2)
{
idx1 = idx1 - 1;
}
else if (idx1 == mw_width - 1)
{
idx1 = idx1 - 2;
}
int pos0 = idx0 * 6;
int pos1 = idx1 * 6;
// set tempolar matrix
double _A00[24 * 24];
double _A11[24 * 24];
double _A01[24 * 24];
double _b0[24];
double _b1[24];
for (int y = 0; y < 24; y++)
{
for (int x = 0; x < 24; x++)
{
_A00[y * 24 + x] = src[idx].w * src[idx].J0[y] * src[idx].J0[x];
_A11[y * 24 + x] = src[idx].w * src[idx].J1[y] * src[idx].J1[x];
_A01[y * 24 + x] = src[idx].w * src[idx].J0[y] * src[idx].J1[x];
}
_b0[y] = src[idx].w * src[idx].c * src[idx].J0[y];
_b1[y] = src[idx].w * src[idx].c * src[idx].J1[y];
}
// set final matrix
for (int i = 0; i < 24; i++)
{
for (int j = 0; j < 24; j++)
{
atomicAdd(&A[(i + pos0) * cols + (j + pos0)], _A00[i * 24 + j]); // 00
atomicAdd(&A[(i + pos1) * cols + (j + pos1)], _A11[i * 24 + j]); // 11
atomicAdd(&A[(i + pos0) * cols + (j + pos1)], _A01[i * 24 + j]); // 01
atomicAdd(&A[(i + pos1) * cols + (j + pos0)], _A01[j * 24 + i]); // 10
}
atomicAdd(&b[i + pos0], _b0[i]); // 0
atomicAdd(&b[i + pos1], _b1[i]); // 1
}
}
}
}
2019.3.6.
I modified the code below to see some performance improvements.
250ms -> 95ms
__global__ void buildMatrixKernel(const CostJacobianCT *src, const int num, const int st, const int mw_width, double *A, double *b)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < num)
{
int src_idx = idx / 576;
if (src[src_idx].mask == 1)
{
int cols = 6 * (mw_width + 1);
int pos0 = src[src_idx].pos0;
int pos1 = src[src_idx].pos1;
double w = src[src_idx].w;
double c = src[src_idx].c;
int sub_idx = idx % 576;
int i = sub_idx / 24;
int j = sub_idx % 24;
double J0_i = src[src_idx].J0[i];
double J0_j = src[src_idx].J0[j];
double J1_i = src[src_idx].J1[i];
double J1_j = src[src_idx].J1[j];
atomicAdd(&A[(i + pos0) * cols + (j + pos0)], w * J0_i * J0_j); // 00
atomicAdd(&A[(i + pos1) * cols + (j + pos1)], w * J1_i * J1_j); // 11
atomicAdd(&A[(i + pos0) * cols + (j + pos1)], w * J0_i * J1_j); // 01
atomicAdd(&A[(i + pos1) * cols + (j + pos0)], w * J1_i * J0_j); // 10
if (j == 0)
{
atomicAdd(&b[i + pos0], w * c * J0_i); // 0
atomicAdd(&b[i + pos1], w * c * J1_i); // 1
}
}
}
}

3d point closest to multiple lines in 3D space

I search for non iterative, closed form, algorithm to find Least squares solution for point closest to the set of 3d lines. It is similar to 3d point triangulation (to minimize re-projections) but seems to be be simpler and faster?
Lines can be described in any form, 2 points, point and unit direction or similar.
Let the i th line be given by point ai and unit direction vector di. We need to find the single point that minimizes the sum of squared point to line distances. This is where the gradient is the zero vector:
Expanding the gradient,
Algebra yields a canonical 3x3 linear system,
where the k'th row (a 3-element row vector) of matrix M is
with vector ek the respective unit basis vector, and
It's not hard to turn this into code. I borrowed (and fixed a small bug in) a Gaussian elimination function from Rosettacode to solve the system. Thanks to the author!
#include <stdio.h>
#include <math.h>
typedef double VEC[3];
typedef VEC MAT[3];
void solve(double *a, double *b, double *x, int n); // linear solver
double dot(VEC a, VEC b) { return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; }
void find_nearest_point(VEC p, VEC a[], VEC d[], int n) {
MAT m = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
VEC b = {0, 0, 0};
for (int i = 0; i < n; ++i) {
double d2 = dot(d[i], d[i]), da = dot(d[i], a[i]);
for (int ii = 0; ii < 3; ++ii) {
for (int jj = 0; jj < 3; ++jj) m[ii][jj] += d[i][ii] * d[i][jj];
m[ii][ii] -= d2;
b[ii] += d[i][ii] * da - a[i][ii] * d2;
}
}
solve(&m[0][0], b, p, 3);
}
// Debug printing.
void pp(VEC v, char *l, char *r) {
printf("%s%.3lf, %.3lf, %.3lf%s", l, v[0], v[1], v[2], r);
}
void pv(VEC v) { pp(v, "(", ")"); }
void pm(MAT m) { for (int i = 0; i < 3; ++i) pp(m[i], "\n[", "]"); }
// A simple verifier.
double dist2(VEC p, VEC a, VEC d) {
VEC pa = { a[0]-p[0], a[1]-p[1], a[2]-p[2] };
double dpa = dot(d, pa);
return dot(d, d) * dot(pa, pa) - dpa * dpa;
}
double sum_dist2(VEC p, VEC a[], VEC d[], int n) {
double sum = 0;
for (int i = 0; i < n; ++i) sum += dist2(p, a[i], d[i]);
return sum;
}
// Check 26 nearby points and verify the provided one is nearest.
int is_nearest(VEC p, VEC a[], VEC d[], int n) {
double min_d2 = 1e100;
int ii = 2, jj = 2, kk = 2;
#define D 0.01
for (int i = -1; i <= 1; ++i)
for (int j = -1; j <= 1; ++j)
for (int k = -1; k <= 1; ++k) {
VEC pp = { p[0] + D * i, p[1] + D * j, p[2] + D * k };
double d2 = sum_dist2(pp, a, d, n);
// Prefer provided point among equals.
if (d2 < min_d2 || i == 0 && j == 0 && k == 0 && d2 == min_d2) {
min_d2 = d2;
ii = i; jj = j; kk = k;
}
}
return ii == 0 && jj == 0 && kk == 0;
}
void normalize(VEC v) {
double len = sqrt(dot(v, v));
v[0] /= len;
v[1] /= len;
v[2] /= len;
}
int main(void) {
VEC a[] = {{-14.2, 17, -1}, {1, 1, 1}, {2.3, 4.1, 9.8}, {1,2,3}};
VEC d[] = {{1.3, 1.3, -10}, {12.1, -17.2, 1.1}, {19.2, 31.8, 3.5}, {4,5,6}};
int n = 4;
for (int i = 0; i < n; ++i) normalize(d[i]);
VEC p;
find_nearest_point(p, a, d, n);
pv(p);
printf("\n");
if (!is_nearest(p, a, d, n)) printf("Woops. Not nearest.\n");
return 0;
}
// A linear solver from rosettacode (with bug fix: added a missing fabs())
#define mat_elem(a, y, x, n) (a + ((y) * (n) + (x)))
void swap_row(double *a, double *b, int r1, int r2, int n)
{
double tmp, *p1, *p2;
int i;
if (r1 == r2) return;
for (i = 0; i < n; i++) {
p1 = mat_elem(a, r1, i, n);
p2 = mat_elem(a, r2, i, n);
tmp = *p1, *p1 = *p2, *p2 = tmp;
}
tmp = b[r1], b[r1] = b[r2], b[r2] = tmp;
}
void solve(double *a, double *b, double *x, int n)
{
#define A(y, x) (*mat_elem(a, y, x, n))
int i, j, col, row, max_row, dia;
double max, tmp;
for (dia = 0; dia < n; dia++) {
max_row = dia, max = fabs(A(dia, dia));
for (row = dia + 1; row < n; row++)
if ((tmp = fabs(A(row, dia))) > max) max_row = row, max = tmp;
swap_row(a, b, dia, max_row, n);
for (row = dia + 1; row < n; row++) {
tmp = A(row, dia) / A(dia, dia);
for (col = dia+1; col < n; col++)
A(row, col) -= tmp * A(dia, col);
A(row, dia) = 0;
b[row] -= tmp * b[dia];
}
}
for (row = n - 1; row >= 0; row--) {
tmp = b[row];
for (j = n - 1; j > row; j--) tmp -= x[j] * A(row, j);
x[row] = tmp / A(row, row);
}
#undef A
}
This isn't extensively tested, but seems to be working fine.
Let base point of line is p and unit direction vector is d.
Then distance from point v to this line might be calculated using cross product
SquaredDist = ((v - p) x d)^2
Using Maple packet symbolic calculation, we can get
d := <dx, dy, dz>;
v := <vx, vy, vz>;
p := <px, py, pz>;
w := v - p;
cp := CrossProduct(d, w);
nrm := BilinearForm(cp, cp, conjugate=false); //squared dist
nr := expand(nrm);
//now partial derivatives
nrx := diff(nr, vx);
//results:
nrx := -2*dz^2*px-2*dy^2*px+2*dz^2*vx+2*dy^2*vx
+2*dx*py*dy-2*dx*vy*dy+2*dz*dx*pz-2*dz*dx*vz
nry := -2*dx^2*py-2*dz^2*py-2*dy*vz*dz+2*dx^2*vy
+2*dz^2*vy+2*dy*pz*dz+2*dx*dy*px-2*dx*dy*vx
nrz := -2*dy^2*pz+2*dy^2*vz-2*dy*dz*vy+2*dx^2*vz
-2*dx^2*pz-2*dz*vx*dx+2*dy*dz*py+2*dz*px*dx
To minimize sum of squared distances, we have to make system of linear equations for zero partial derivatives like this:
vx*2*(Sum(dz^2)+Sum(dy^2)) + vy * (-2*Sum(dx*dy)) + vz *(-2*Sum(dz*dx)) =
2*Sum(dz^2*px)-2*Sum(dy^2*px) -2*Sum(dx*py*dy)-2*Sum(dz*dx*pz)
where
Sum(dz^2) = Sum{over all i in line indexes} {dz[i] * dz[i]}
and solve it for unknowns vx, vy, vz
Edit: Old erroneous answer for planes instead of lines, left for reference
If we use general equation of line
A * x + B * y + C * z + D = 0
then distance from point (x, y, z) to this line is
Dist = Abs(A * x + B * y + C * z + D) / Sqrt(A^2 + B^2 + C^2)
To simplify - just normalize all line equations dividing by Norm's
Norm = Sqrt(A^2 + B^2 + C^2)
a = A / Norm
b = B / Norm
c = C / Norm
d = D / Norm
now equation is
a * x + b * y + c * z + d = 0
and distance
Dist = Abs(a * x + b * y + c * z + d)
and we can use squared distances like LS method (ai, bi, ci, di are coefficients for i-th line)
F = Sum(ai*x + bi*y + ci * z + d)^2 =
Sum(ai^2*x^2 + bi^2*y^2 + ci^2*z^2 + d^2 +
2 * (ai*bi*x*y + ai*ci*x*z + bi*y*ci*z + ai*x*di + bi*y*di + ci*z*di))
partial derivatives
dF/dx = 2*Sum(ai^2*x + ai*bi*y + ai*ci*z + ai*di) = 0
dF/dy = 2*Sum(bi^2*y + ai*bi*x + bi*ci*z + bi*di) = 0
dF/dz = 2*Sum(ci^2*z + ai*ci*x + bi*ci*y + ci*di) = 0
so we have system of linear equation
x * Sum(ai^2) + y * Sum(ai*bi) + z * Sum(ai*ci)= - Sum(ai*di)
y * Sum(bi^2) + x * Sum(ai*bi) + z * Sum(bi*ci)= - Sum(bi*di)
z * Sum(ci^2) + x * Sum(ai*ci) + y * Sum(bi*ci)= - Sum(ci*di)
x * Saa + y * Sab + z * Sac = - Sad
x * Sab + y * Sbb + z * Sbc = - Sbd
x * Sac + y * Sbc + z * Scc = - Scd
where S** are corresponding sums
and can solve it for unknowns x, y, z
I needed this for a sketch in Processing, so I ported Gene's answer. Works great and thought it might save someone else a little time. Unfortunately PVector/PMatrix don't have array accessors for vectors or matrices so I had to add these as local functions.
float getv(PVector v, int i) {
if(i == 0) return v.x;
if(i == 1) return v.y;
return v.z;
}
void setv(PVector v, int i, float value) {
if (i == 0) v.x = value;
else if (i == 1) v.y = value;
else v.z = value;
}
void incv(PVector v, int i, float value) {
setv(v,i,getv(v,i) + value);
}
float getm(float[] mm, int r, int c) { return mm[c + r*4]; }
void setm(float[] mm, int r, int c, float value) { mm[c + r*4] = value; }
void incm(float[] mm, int r, int c, float value) { mm[c + r*4] += value; }
PVector findNearestPoint(PVector a[], PVector d[]) {
var mm = new float[16];
var b = new PVector();
var n = a.length;
for (int i = 0; i < n; ++i) {
var d2 = d[i].dot(d[i]);
var da = d[i].dot(a[i]);
for (int ii = 0; ii < 3; ++ii) {
for (int jj = 0; jj < 3; ++jj) {
incm(mm,ii,jj, getv(d[i],ii) * getv(d[i],jj));
}
incm(mm, ii,ii, -d2);
incv(b, ii, getv(d[i], ii) * da - getv(a[i], ii) * d2);
}
}
var p = solve(mm, new float[] {b.x, b.y, b.z});
return new PVector(p[0],p[1],p[2]);
}
// Verifier
float dist2(PVector p, PVector a, PVector d) {
PVector pa = new PVector( a.x-p.x, a.y-p.y, a.z-p.z );
float dpa = d.dot(pa);
return d.dot(d) * pa.dot(pa) - dpa * dpa;
}
//double sum_dist2(VEC p, VEC a[], VEC d[], int n) {
float sum_dist2(PVector p, PVector a[], PVector d[]) {
int n = a.length;
float sum = 0;
for (int i = 0; i < n; ++i) {
sum += dist2(p, a[i], d[i]);
}
return sum;
}
// Check 26 nearby points and verify the provided one is nearest.
boolean isNearest(PVector p, PVector a[], PVector d[]) {
float min_d2 = 3.4028235E38;
int ii = 2, jj = 2, kk = 2;
final float D = 0.1f;
for (int i = -1; i <= 1; ++i)
for (int j = -1; j <= 1; ++j)
for (int k = -1; k <= 1; ++k) {
PVector pp = new PVector( p.x + D * i, p.y + D * j, p.z + D * k );
float d2 = sum_dist2(pp, a, d);
// Prefer provided point among equals.
if (d2 < min_d2 || i == 0 && j == 0 && k == 0 && d2 == min_d2) {
min_d2 = d2;
ii = i; jj = j; kk = k;
}
}
return ii == 0 && jj == 0 && kk == 0;
}
void setup() {
PVector a[] = {
new PVector(-14.2, 17, -1),
new PVector(1, 1, 1),
new PVector(2.3, 4.1, 9.8),
new PVector(1,2,3)
};
PVector d[] = {
new PVector(1.3, 1.3, -10),
new PVector(12.1, -17.2, 1.1),
new PVector(19.2, 31.8, 3.5),
new PVector(4,5,6)
};
int n = 4;
for (int i = 0; i < n; ++i)
d[i].normalize();
PVector p = findNearestPoint(a, d);
println(p);
if (!isNearest(p, a, d))
println("Woops. Not nearest.\n");
}
// From rosettacode (with bug fix: added a missing fabs())
int mat_elem(int y, int x) { return y*4+x; }
void swap_row(float[] a, float[] b, int r1, int r2, int n)
{
float tmp;
int p1, p2;
int i;
if (r1 == r2) return;
for (i = 0; i < n; i++) {
p1 = mat_elem(r1, i);
p2 = mat_elem(r2, i);
tmp = a[p1];
a[p1] = a[p2];
a[p2] = tmp;
}
tmp = b[r1];
b[r1] = b[r2];
b[r2] = tmp;
}
float[] solve(float[] a, float[] b)
{
float[] x = new float[] {0,0,0};
int n = x.length;
int i, j, col, row, max_row, dia;
float max, tmp;
for (dia = 0; dia < n; dia++) {
max_row = dia;
max = abs(getm(a, dia, dia));
for (row = dia + 1; row < n; row++) {
if ((tmp = abs(getm(a, row, dia))) > max) {
max_row = row;
max = tmp;
}
}
swap_row(a, b, dia, max_row, n);
for (row = dia + 1; row < n; row++) {
tmp = getm(a, row, dia) / getm(a, dia, dia);
for (col = dia+1; col < n; col++) {
incm(a, row, col, -tmp * getm(a, dia, col));
}
setm(a,row,dia, 0);
b[row] -= tmp * b[dia];
}
}
for (row = n - 1; row >= 0; row--) {
tmp = b[row];
for (j = n - 1; j > row; j--) {
tmp -= x[j] * getm(a, row, j);
}
x[row] = tmp / getm(a, row, row);
}
return x;
}

Finding the biggest square in a rectangle with a condition

In a rectangle with given height and width. I'm supposed to find the square with most 1s and print the number of 1s on stdout, also in that same square there must not be more 2s than half of 1s, i.e:((# of 1s) /2) >= (# of 2s).
Square is always at least 2x2 big.
So for the input (first two numbers are height and width):
6 8
0 0 2 2 2 1 2 1
0 1 2 2 1 0 1 1
0 0 1 0 1 2 0 2
2 1 0 2 2 1 1 1
1 2 1 0 0 0 1 0
1 2 0 1 1 2 1 1
The correct answer is 9.(square is 5x5 big and the upperleft corner is on second row, third column)
Now i managed to somewhat write a program that does this correctly, but it's too slow.
So my I'm asking for an advice how to write the algorithm so that it solves this: https://justpaste.it/1cfem under 1 second(correct answer 15) and this: https://justpaste.it/1cfen under 4 seconds(correct answer 556).
EDIT: I forgot to mention by square I mean only the perimeter of the square (the four sides)
My code works something like this:
Iterate trough all the fields in the input and iterate trough all the possible squares that start in this field(starting from the biggest square possible). Then I have some conditions like that I break the iteration when the possible perimeter of the square is smaller than the already biggest number of 1s i have found so far in a perimete etc. Also when I'm trying to find the squares starting from the given field, I remember the up side and left side of the preceding square and then just decrement it(if there is a 1 or 2).
But this isn't enough, since solution like this solves the second input in like 1 and a half minute a I need it in four seconds.
The code:
NOTE: the minerals represent 1s and toxics represent 2s
#include <stdio.h>
#include <stdlib.h>
int maxMinerals;
void traverseforH(const int const *map, const int height, const int width) {
const int h1 = height - 1;
const int w1 = width - 1;
int lineOffset = 0;
for (int startY = 0; startY < h1; startY++) {
int yside = height - startY;
if (!(yside * 2 + (yside - 2)*2 > maxMinerals)) {
break;
}
for (int startX = 0; startX < w1; startX++) {
int xside = width - startX;
if (!(xside * 2 + (xside - 2)*2 > maxMinerals)) {
break;
}
int maxBoundl = width;
int maxBoundm = width;
if (startY + maxBoundm - height - startX > 0) {
maxBoundl = height;
maxBoundm = height;
if (startX - startY > 0) {
maxBoundl = maxBoundl + startY - startX;
} else {
maxBoundm = maxBoundm + startX - startY;
}
} else if (startY - startX > 0) {
maxBoundm = maxBoundm + startY - startX;
maxBoundl = maxBoundm;
maxBoundm = maxBoundm + startX - startY;
} else {
maxBoundl = maxBoundl + startY - startX;
}
int mBw = (maxBoundl - 1) * width;
int toxicsLeftSide = 0;
int mineralsLeftSide = 0;
int toxicsUpSide = 0;
int mineralsUpSide = 0;
int mw;
int lastMinerals = 0;
int toxics = 0;
int sidey = lineOffset + width;
for (int x = startX; x < maxBoundm; x++) {
mw = x + lineOffset;
if (map[mw] == 1) {
mineralsUpSide++;
lastMinerals++;
} else if (map[mw]) {
toxicsUpSide++;
toxics++;
}
mw = x + mBw;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
}
for (int y = startY + 1; y < maxBoundl - 1; y++) {
mw = startX + sidey;
if (map[mw] == 1) {
mineralsLeftSide++;
lastMinerals++;
} else if (map[mw]) {
toxicsLeftSide++;
toxics++;
}
mw = maxBoundm - 1 + sidey;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
sidey = sidey + width;
}
if (map[startX + mBw] == 1) {
mineralsLeftSide++;
} else if (map[startX + mBw]) {
toxicsLeftSide++;
}
int upsideData [2];
upsideData[0] = mineralsUpSide;
upsideData[1] = toxicsUpSide;
if (!(lastMinerals / 2.0 < toxics) && lastMinerals > maxMinerals) {
maxMinerals = lastMinerals;
}
mBw = mBw - width;
int noOfSquares;
if (xside < yside) {
noOfSquares = xside - 1;
} else {
noOfSquares = yside - 1;
}
for (int k = 1; k < noOfSquares; k++) {
int maxBoundy = maxBoundl - k;
int maxBoundx = maxBoundm - k;
if (!(((maxBoundx - startX)*2 + (maxBoundx - 2 - startX)*2) > maxMinerals)) {
break;
}
sidey = lineOffset + width;
lastMinerals = 0;
toxics = 0;
if (map[maxBoundx + lineOffset] == 1) {
mineralsUpSide--;
} else if (map[maxBoundx + lineOffset]) {
toxicsUpSide--;
}
if (map[startX + mBw + width] == 1) {
mineralsLeftSide--;
} else if (map[startX + mBw + width]) {
toxicsLeftSide--;
}
for (int x = startX + 1; x < maxBoundx; x++) {
mw = x + mBw;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
}
for (int y = startY + 1; y < maxBoundy - 1; y++) {
mw = maxBoundx - 1 + sidey;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
sidey = sidey + width;
}
int finalMinerals = lastMinerals + mineralsLeftSide + mineralsUpSide;
int finalToxics = toxics + toxicsLeftSide + toxicsUpSide;
if (!(finalMinerals / 2.0 < finalToxics) && finalMinerals > maxMinerals) {
maxMinerals = finalMinerals;
}
mBw = mBw - width;
}
}
lineOffset = lineOffset + width;
}
printf("%d\n", maxMinerals);
}
void traverseforW(int *map, const int height, const int width) {
int h1 = height - 1;
int w1 = width - 1;
int lineOffset = 0;
for (int startY = 0; startY < h1; startY++) {
int yside = height - startY;
if (!(yside * 2 + (yside - 2)*2 > maxMinerals)) {
break;
}
for (int startX = 0; startX < w1; startX++) {
int xside = width - startX;
if (!(xside * 2 + (xside - 2)*2 > maxMinerals)) {
break;
}
int maxBoundl = height;
int maxBoundm = height;
if (startX + maxBoundl - width - startY > 0) {
maxBoundl = width;
maxBoundm = width;
if (startX - startY > 0) {
maxBoundl = maxBoundl + startY - startX;
} else {
maxBoundm = maxBoundm + startX - startY;
}
} else if (startY - startX > 0) {
maxBoundm = maxBoundm + startX - startY;
} else {
maxBoundl = maxBoundl + startX - startY;
maxBoundm = maxBoundl;
maxBoundl = maxBoundl + startY - startX;
}
int mBw = (maxBoundl - 1) * width;
int toxicsLeftSide = 0;
int mineralsLeftSide = 0;
int toxicsUpSide = 0;
int mineralsUpSide = 0;
int mw;
int lastMinerals = 0;
int toxics = 0;
int sidey = lineOffset + width;
for (int x = startX; x < maxBoundm; x++) {
mw = x + lineOffset;
if (map[mw] == 1) {
mineralsUpSide++;
lastMinerals++;
} else if (map[mw]) {
toxicsUpSide++;
toxics++;
}
mw = x + mBw;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
}
for (int y = startY + 1; y < maxBoundl - 1; y++) {
mw = startX + sidey;
if (map[mw] == 1) {
mineralsLeftSide++;
lastMinerals++;
} else if (map[mw]) {
toxicsLeftSide++;
toxics++;
}
mw = maxBoundm - 1 + sidey;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
sidey = sidey + width;
}
if (map[startX + mBw] == 1) {
mineralsLeftSide++;
} else if (map[startX + mBw]) {
toxicsLeftSide++;
}
if (!(lastMinerals / 2.0 < toxics) && lastMinerals > maxMinerals) {
maxMinerals = lastMinerals;
}
mBw = mBw - width;
int noOfSquares;
if (xside < yside) {
noOfSquares = xside - 1;
} else {
noOfSquares = yside - 1;
}
for (int k = 1; k < noOfSquares; k++) {
int maxBoundy = maxBoundl - k;
int maxBoundx = maxBoundm - k;
if (!(((maxBoundx - startX)*2 + (maxBoundx - 2 - startX)*2) > maxMinerals)) {
break;
}
sidey = lineOffset + width;
lastMinerals = 0;
toxics = 0;
if (map[maxBoundx + lineOffset] == 1) {
mineralsUpSide--;
} else if (map[maxBoundx + lineOffset]) {
toxicsUpSide--;
}
if (map[startX + mBw + width] == 1) {
mineralsLeftSide--;
} else if (map[startX + mBw + width]) {
toxicsLeftSide--;
}
int finalMinerals = mineralsUpSide + mineralsLeftSide;
int finalToxics = toxicsLeftSide + toxicsUpSide;
for (int x = startX + 1; x < maxBoundx; x++) {
mw = x + mBw;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
}
for (int y = startY + 1; y < maxBoundy - 1; y++) {
mw = maxBoundx - 1 + sidey;
if (map[mw] == 1) {
lastMinerals++;
} else if (map[mw]) {
toxics++;
}
sidey = sidey + width;
}
finalMinerals += lastMinerals;
finalToxics += toxics;
if (!(finalMinerals / 2.0 < finalToxics) && finalMinerals > maxMinerals) {
maxMinerals = finalMinerals;
}
mBw = mBw - width;
}
}
lineOffset = lineOffset + width;
}
printf("%d\n", maxMinerals);
}
int main() {
char hw[14];
FILE * file = fopen("pub01.in", "r");
char c;
int k = 0;
while ((c = fgetc(file)) != '\n') {
hw[k] = c;
k++;
}
int h, w;
sscanf(hw, "%d %d", &h, &w);
int size = h * w;
int* input = malloc(size * sizeof (int) + 1);
k = 0;
while ((c = fgetc(file)) != EOF) {
if (c == '0' || c == '1' || c == '2') {
input[k] = c - '0';
k++;
}
}
input[k] = '\0';
if (h > w) {
traverseforH(input, h, w);
} else {
traverseforW(input, h, w);
}
return 0;
}
Preprocess step:
First pre-process matrix, using prefix sum method all rows and columns so that you will be able to calculate # of 1s and # of 2s in the perimeter of square in O(1).
By now you will have 4 data-structures: rowSumFor1, rowSumFor2, colSumFor1, colSumFor2. For example: rowSumFor1[i][j] would tell us # of 1s in ith row for column indices between 0 and j inclusive.
Time complexity: O(w x h)
Complete Code:
#include<stdio.h>
int min(int a,int b){
return (a<=b)?a:b;
}
int max(int a,int b){
return (a>=b)?a:b;
}
// currently hard-coding dimensions for test purposes
// horizontal sums
int rowSumFor1[600][600];
int rowSumFor2[600][600];
// vertical sums
int colSumFor1[600][600];
int colSumFor2[600][600];
int main(){
int w,h;
scanf("%d %d",&h,&w);
for(int row=1;row <= h;row++)for(int col=1;col <= w;col++){
int temp;
scanf("%d",&temp);
// first add previous sum
rowSumFor1[row][col]=rowSumFor1[row][col - 1];
rowSumFor2[row][col]=rowSumFor2[row][col - 1];
colSumFor1[col][row]=colSumFor1[col][row - 1];
colSumFor2[col][row]=colSumFor2[col][row - 1];
if(temp==1){
rowSumFor1[row][col]++;
colSumFor1[col][row]++;
}
else if(temp==2){
rowSumFor2[row][col]++;
colSumFor2[col][row]++;
}
else{
// do nothing
}
}
int result = 0,rowId,colId,mlength;
for(int len=min(w,h); len > 1 ; len-- ) // iteration on possible lengths
{
for(int row=1;row <= (h - len + 1);row++)for(int col=1;col <= (w - len + 1);col++){ // iteration on all co-ordinates as upper-left corner of our square
// Do calculation here for properties and necessary checking constraints for validity of this square
// Note: not checking trivial conditions like boundary conditions in square, you will have to!!
// Beware of over-counting of corners here, one way to avoid is to select indices such that they don't overcount corners
// 4x4 square example for counting
// aaab
// d b
// d b
// dccc
int topEdge1 = rowSumFor1[row][col + len - 2] - rowSumFor1[row][col - 1];
int bottomEdge1 = rowSumFor1[row + len - 1][col + len - 1] - rowSumFor1[row + len - 1][col];
int leftEdge1 = colSumFor1[col][row + len - 1] - colSumFor1[col][row];
int rightEdge1 = colSumFor1[col + len - 1][row + len - 2] - colSumFor1[col + len - 1][row - 1];
int ones= topEdge1 + bottomEdge1 + leftEdge1 + rightEdge1; // # of 1s on perimeter of this square
int topEdge2 = rowSumFor2[row][col + len - 2] - rowSumFor2[row][col-1];
int bottomEdge2 = rowSumFor2[row+len-1][col+len-1] - rowSumFor2[row+len-1][col];
int leftEdge2 = colSumFor2[col][row + len - 1] - colSumFor2[col][row];
int rightEdge2 = colSumFor2[col + len - 1][row + len - 2] - colSumFor2[col + len -1][row - 1];
int twos= topEdge2 + bottomEdge2 + leftEdge2 + rightEdge2; // # of 2s on perimeter of this square
if(ones >= 2* twos){
if(ones > result){
result = ones;
rowId = row;
colId = col;
mlength = len;
}
}
}
}
printf("%d %d %d\n",rowId,colId,mlength);
printf("%d\n",result);
return 0;
}
Time complexity: O(w x h x min(w,h))
EDIT:
Replaced pseudo-code with complete code. It results as expected for all 3 tests presented by OP.

Applying a transformation to NV12 like for I420

I have an injective function that moves around some pixels in an image:
pixel (x, y) ===func===> pixel (X, Y)
X = funcX(x, y)
Y = funcY(y, x)
I want to use this function to transform the whole image in RGB, I420 and NV12 mode.
* RGB *: If the image is in RGB mode, it's pretty obvious:
strideR = strideG = strideB = width;
//Temporary table for the destination
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
toR[i][j] = j * strideR + i;
toG[i][j] = j * strideG + i;
toB[i][j] = j * strideB + i;
}
//Temporary table for the source
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
fromR[i][j] = funcY(i, j) * strideR + funcX(i, j);
fromG[i][j] = funcY(i, j) * strideG + funcX(i, j);
fromB[i][j] = funcY(i, j) * strideB + funcX(i, j);
}
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
destR[ toR[i][j] ] = srcR[ fromR[i][j] ];
destG[ toG[i][j] ] = srcG[ fromG[i][j] ];
destb[ toB[i][j] ] = srcB[ fromB[i][j] ];
}
* I420 *: If the image is in I420 mode (YYYYYYYY UU VV), the following is working:
strideY = width;
strideU = strideV = width / 2;
//Temporary table for the destination
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
toY[i][j] = j * strideY + i;
toU[i][j] = j / 2 * strideU + i / 2;
toV[i][j] = j / 2 * strideV + i / 2;
}
//Temporary table for the source
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
fromY[i][j] = funcY(i, j) * strideY + funcX(i, j);
fromU[i][j] = funcY(i, j) / 2 * strideU + funcX(i, j) / 2;
fromV[i][j] = funcY(i, j) / 2 * strideV + funcX(i, j) / 2;
}
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
destY[ toY[i][j] ] = srcY[ fromY[i][j] ];
if ((i % 2 == 0) && (j % 2 == 0)) {
destU[ toU[i][j] ] = srcU[ fromU[i][j] ];
destV[ toV[i][j] ] = srcV[ fromV[i][j] ];
}
}
* NV12 *: If the image is in NV12 mode (YYYYYYYY UVUV), the following is NOT working:
strideY = strideUV = width;
//Temporary table for the destination
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
toY[i][j] = j * strideY + i;
toUV[i][j] = j / 2 * strideUV + i;
}
//Temporary table for the source
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
fromY[i][j] = funcY(i, j) * strideY + funcX(i, j);
fromUV[i][j] = funcY(i, j) / 2 * strideUV + funcX(i, j);
}
for (j = 0; j < height; j++)
for (i = 0; i < width; i++) {
destY[ toY[i][j] ] = srcY[ fromY[i][j] ];
if ((i % 2 == 0) && (j % 2 == 0)) {
destUV[ toUV[i][j] ] = srcUV[ fromUV[i][j] ];
destUV[ toUV[i][j] + 1 ] = srcUV[ fromUV[i][j] + 1 ];
}
}
I got the image but with wrong colors. The black and white portion (aka the Y portion) is correct but the color portion (aka the UV portion) is altered. What am I doing wrong?
Found the problem! Solution is:
fromUV[i][j] = funcY(i, j) / 2 * strideUV + ((int)(funcX(i, j) / 2)) * 2;
I needed to floor X/2 to get the start of the UV byte.

Resources