squarepie solution - algorithm

The following problem was asked in the programming contest, which is over now.
Squarepie program
I tried the best solution I could, but always got time limit exceeded error. My solution was as follows.
First add all the edges in a structure which is first sorted by length and then by their position. I was having two different structures for x and y edges. Find the outside rectangle, and add it to the stack. Now for each rectangle in the stack find if there is any intersecting edge. If yes divide this rectangle in two by this edge and add both to the stack. If failed to find any bisecting edge, add the area of the rectangle in priority queue. At the end print elements from priority queue.
I now wonder is there any faster solution.
Edit :-
Attaching my solution.
My Final solution

Everything looks good except for getLargestRect(), which you really overcomplicate. Just return rectangle(minX, minY, maxX, maxY). You can find the mins and maxs in linear time. The current implementation is O(n2) when all the lines have the same length.
I also coded up my own algorithm, if you want to have a look at a different approach. My idea was to store all the vertical lines in a fancy data structure, then scan through the horizontal lines and find the rectangles they close off.
When looking at the horizontal line h, all the vertical lines with y1 < h.y && y2 >= h.y are stored in a map by their x value. The current horizontal line forms rectangles with all the vertical lines from map[h.x1] to map[h.x2]. The outer two lines extend past h.y, but all the middle ones must end at h.y and are therefore removed from the map after the area of their rectangles has been calculated. The vertical lines that need to be added to the map for each horizontal line are found efficiently by sorting the verticals according to their y1 value.
Here is the code:
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) < (b) ? (b) : (a))
using namespace std;
class Horizontal
{
public:
int x1, x2, y;
Horizontal(int x1, int x2, int y) : x1(x1), x2(x2), y(y) {}
static bool comp(const Horizontal & a, const Horizontal & b)
{
return a.y < b.y;
}
};
class Vertical
{
public:
int x, y1; // no need to store y2
Vertical(int x, int y1) : x(x), y1(y1) {}
static bool comp(const Vertical & a, const Vertical & b)
{
return a.y1 < b.y1;
}
};
long long total = 0;
int vertI = 0; // index of next vertical to add to currentVerts
map<int, int> currentVerts; // currentVerts[5] = y1 of the vert line with x=5
vector<Vertical> verticals;
vector<Horizontal> horizontals;
vector<int> solutions;
void readInput();
void processHorizontal(Horizontal & line);
int main()
{
cout.precision(10);
readInput();
sort(verticals.begin(), verticals.end(), Vertical::comp);
sort(horizontals.begin(), horizontals.end(), Horizontal::comp);
// process the lines (start at i = 1 to ignore the top one)
for (int i = 1; i < horizontals.size(); i++)
{
processHorizontal(horizontals[i]);
}
sort(solutions.begin(), solutions.end());
for (int i = solutions.size() - 1; i >= 0; i--)
{
cout << (double) solutions[i] / total << "\n";
}
}
void readInput()
{
int n;
cin >> n;
int x1, x2, y1, y2;
for (int i = 0; i < n; i++)
{
cin >> x1 >> y1 >> x2 >> y2;
if (x2 < x1) swap(x1, x2);
if (y2 < y1) swap(y1, y2);
if (x1 == x2) verticals.push_back(Vertical(x1, y1));
else horizontals.push_back(Horizontal(x1, x2, y1));
}
}
void processHorizontal(Horizontal & horiz)
{
// add all vert lines which start above horiz to currentVert
for ( ; vertI < verticals.size() && verticals[vertI].y1 < horiz.y;
vertI++)
{
int x = verticals[vertI].x;
currentVerts[x] = verticals[vertI].y1;
}
map<int, int>::iterator left = currentVerts.find(horiz.x1);
map<int, int>::iterator right = currentVerts.find(horiz.x2);
map<int, int>::iterator i;
map<int, int>::iterator next;
for (i = next = left; i != right; i = next)
{
next++;
int width = (*next).first - (*i).first; // difference in x
int height = horiz.y - (*i).second; // difference y
int area = width * height;
total += area;
solutions.push_back(area);
if (i != left)
{
// if i is not the start it must be a short
// line which ends here, so delete it
currentVerts.erase(i);
}
else
{
// if it is left, cut the rectangle at horiz.y
// by modifying the start of the line
(*i).second = horiz.y;
}
}
}

Related

Given N equal circles (possibly overlapping) and M points on a plane. Find a circle which contains maximum number of points

Picture below shows a simple case. Circle 1 is the winner, because it contains points [1, 2, 5] -- more then any other circle.
Naive implementation which checks every point against every circle gives Time Limit.
"Use hash" they say. But where?
#include <iostream>
#include <vector>
using namespace std;
struct Point
{
int x;
int y;
};
int64_t dist(Point p1, Point p2)
{
int64_t dx = p1.x - p2.x;
int64_t dy = p1.y - p2.y;
return dx*dx + dy*dy;
}
int main()
{
int circle_num;
cin >> circle_num;
vector<Point> circles(circle_num);
vector<int64_t> count (circle_num);
for (Point& p : circles)
cin >> p.x >> p.y;
int points_num;
cin >> points_num;
while (points_num--)
{
Point p;
cin >> p.x >> p.y;
for (int i = 0; i != circle_num; ++i)
{
if (dist(p, circles[i]) <= 400)
++count[i];
}
}
int index = 0;
int64_t max_count = 0;
for (int i = 0; i != circle_num; ++i)
{
if (count[i] > max_count)
{
max_count = count[i];
index = i;
}
}
cout << (index + 1) << endl;
}
Possible input:
3 // number of circles
-1 0 // circle 1 center
1 0 // circle 2 center
2 5 // circle 3 center
3 // number of points
10 0
20 0
22 5
Output: 3 -- circle 3 contains the most number of points
Since the circles are all the same size (800 units), a practical approach is to divide the plane into a grid, with each square 401x401 units, and use a hash from (x,y) -> list to collect the points in each square.
Then for each circle, just check the points in the up to 9 squares that it overlaps.

How can you find the cuboid with the greatest volume in a heightmap? (with low complexity)

I need to find the cuboid with the greatest volume, contained within a 2D-heightmap.
The heightmap is an array of size w*d where w is width, h is height and d is depth.
In C, this would look along the lines of:
unsigned heightmap[w][d]; // all values are <= h
I already know that there is a naive algorithm which can solve this with O(w*d*h) complexity.
However, I suspect that there is a more optimal method out there.
It works as follows, in pythonic pseudocode:
resultRectangle = None
resultHeight = None
resultVolume = -1
# iterate over all heights
for loopHeight in range(0, h):
# create a 2D bitmap from our heightmap where a 1 represents a height >= loopHeight
bool bitmap[w][d]
for x in range(0, w):
for y in range(0, d):
bitmap[x][y] = heightmap[x][y] >= loopHeight
# obtain the greatest-volume cuboid at this particular height
maxRectangle = maxRectangleInBitmap(bitmap)
volume = maxRectangle.area() * loopHeight
# compare it to our current maximum and replace it if we found a greater cuboid
if volume > resultVolume:
resultHeight = loopHeight
resultVolume = volume
resultRectangle = maxRectangle
resultCuboid = resultRectangle.withHeight(resultHeight)
Finding the greatest area of all 1 in a rectangle is a known problem with O(1) complexity per pixel or O(w*d) in our case.
The total complexity of the naive approach is thus O(w*h*d).
So as I already stated, I was wondering if we can beat this complexity.
Perhaps we can get it down to O(w*d * log(h)) by searching through heights more intelligently instead of "brute-forcing" all of them.
The answer to this question Find largest cuboid containing only 1's in an NxNxN binary array by Evgeny Kluev seems to take a similar approach, but it falsely(?) assumes that the volumes which we would find at these heights form a unimodal function.
If this was the case, we could use Golden Section Search to choose heights more intelligently, but I don't think we can.
Here is an idea, with a significant assumption. pseudo-code:
P <- points from heightmap sorted by increasing height.
R <- set of rectangles. All maximal empty sub-rectangles for the current height.
R.add(Rectangle(0,0,W,H)
result = last_point_in(P).height()
foreach(p in P):
RR <- rectangles from R that overlap P (can be found in O(size(RR)), possibly with some logarithmic factors)
R = R - RR
foreach(r in RR)
result = max(result, r.area() * p.height())
split up r, adding O(1) new rectangles to R.
return result
The assumption, which I have a gut feeling about, but can't prove, is that RR will be O(1) size on average.
Edit: to clarify the "splittting", if we split at point p:
AAAAADFFF
AAAAADFFF
AAAAADFFF
BBBBBpGGG
CCCCCEHHH
CCCCCEHHH
We generate new rectangles consisting of:
ABC, CEH, FGH, ADF, and add them to R.
OK, another take. Most "meat" is in the go function. It uses the same "splitting" concept as in my other answer, but uses top-down dynamic programming with memoization. rmq2d implements 2D Range Minimum Query. for size 1000x1000 it takes about 30 seconds (while using 3GB of memory).
#include <iostream>
#include <vector>
#include <cassert>
#include <set>
#include <tuple>
#include <memory.h>
#include <limits.h>
using namespace std;
constexpr int ilog2(int x){
return 31 - __builtin_clz(x);
}
const int MAX_DIM = 100;
template<class T>
struct rmq2d{
struct point{
int x,y;
point():x(0),y(0){}
point(int x,int y):x(x),y(y){}
};
typedef point array_t[MAX_DIM][ilog2(MAX_DIM)+1][MAX_DIM];
int h, logh;
int w, logw;
vector<vector<T>> v;
array_t *A;
rmq2d(){A=nullptr;}
rmq2d &operator=(const rmq2d &other){
assert(sizeof(point)==8);
if(this == &other) return *this;
if(!A){
A = new array_t[ilog2(MAX_DIM)+1];
}
v=other.v;
h=other.h;
logh = other.logh;
w=other.w;
logw=other.logw;
memcpy(A, other.A, (ilog2(MAX_DIM)+1)*sizeof(array_t));
return *this;
}
rmq2d(const rmq2d &other){
A = nullptr;
*this = other;
}
~rmq2d(){
delete[] A;
}
T query(point pos){
return v[pos.y][pos.x];
}
rmq2d(vector<vector<T>> &v) : v(v){
A = new array_t[ilog2(MAX_DIM)+1];
h = (int)v.size();
logh = ilog2(h) + 1;
w = (int)v[0].size();
logw = ilog2(w) + 1;
for(int y=0; y<h; ++y){
for(int x=0;x<w;x++) A[0][y][0][x] = {x, y};
for(int jx=1; jx<logw; jx++){
int sz = 1<<(jx-1);
for(int x=0; x+sz < w; x++){
point i1 = A[0][y][jx-1][x];
point i2 = A[0][y][jx-1][x+sz];
if(query(i1) < query(i2)){
A[0][y][jx][x] = i1;
}else{
A[0][y][jx][x] = i2;
}
}
}
}
for(int jy=1; jy<logh; ++jy){
int sz = 1<<(jy-1);
for(int y=0; y+sz<h; ++y){
for(int jx=0; jx<logw; ++jx){
for(int x=0; x<w; ++x){
point i1 = A[jy-1][y][jx][x];
point i2 = A[jy-1][y+sz][jx][x];
if(query(i1) < query(i2)){
A[jy][y][jx][x] = i1;
}else{
A[jy][y][jx][x] = i2;
}
}
}
}
}
}
point pos_q(int x1, int x2, int y1, int y2){
assert(A);
int lenx = ilog2(x2 - x1);
int leny = ilog2(y2 - y1);
point idxs[] = {
A[leny][y1][lenx][x1],
A[leny][y2-(1<<leny)][lenx][x1],
A[leny][y1][lenx][x2-(1<<lenx)],
A[leny][y2-(1<<leny)][lenx][x2-(1<<lenx)]
};
point ret = idxs[0];
for(int i=1; i<4; ++i){
if(query(ret) > query(idxs[i])) ret = idxs[i];
}
return ret;
}
T val_q(int x1, int x2, int y1, int y2){
point pos = pos_q(x1,x2,y1,y2);
return v[pos.y][pos.x];
}
};
rmq2d<long long> rmq;
set<tuple<int, int, int ,int>> cac;
vector<vector<long long>> v(MAX_DIM-5,vector<long long>(MAX_DIM-5,0));
long long ret = 0;
int nq = 0;
void go(int x1, int x2, int y1, int y2){
if(x1 >= x2 || y1>=y2) return;
if(!cac.insert(make_tuple(x1,y1,x2,y2)).second) return;
++nq;
auto p = rmq.pos_q(x1, x2, y1, y2);
long long cur = v[p.y][p.x]*(x2-x1)*(y2-y1);
if(cur > ret){
cout << x1 << "-" << x2 << ", " << y1 << "-" << y2 << " h=" << v[p.y][p.x] << " :" << cur << endl;
ret = cur;
}
go(p.x+1, x2, y1, y2);
go(x1, p.x, y1, y2);
go(x1, x2, p.y+1, y2);
go(x1, x2, y1, p.y);
}
int main(){
int W = (int)v[0].size();
int H=(int)v.size();
for(int y=0; y<H;++y){
for(int x=0; x<W; ++x){
v[y][x] = rand()%10000;
}
}
rmq = rmq2d<long long>(v);
go(0,W, 0, H);
cout << "nq:" << nq << endl;
}

Connected component labeling with diagonal connections using union-find

I'm trying to develop a modification of the connected component algorithm I found as an answer to this question: Connected Component Labelling.
Basically, I have 2d- and 3d- matrices consisting of 0s and 1s. My problem is to find connected regions of 1s, labeling each region separately. The matrix sizes can be very large (consisting of 5e4-by-5e4 elements in 2-d and 1000^3 elements in 3d). So I need something which doesn't strain the stack memory, and which is fast enough to repeat several times over the course of a simulation.
The most upvoted answer to that question, using depth-first search, gives a stack overflow error (as noted in a comment). I have been trying to use the union-find algorithm suggested by another user.
The original code (by user Dukeling) works very well for large 2-d matrices, but I want to have diagonal connections between elements. Here's my code, with the example input I am trying to use:
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
const int w = 8, h = 8;
int input[w][h] = {{1,0,0,0,1,0,0,1},
{1,1,0,1,1,1,1,0},
{0,1,0,0,0,0,0,1},
{1,1,1,1,0,1,0,1},
{0,0,0,0,0,0,1,0},
{0,0,1,0,0,1,0,0},
{0,1,0,0,1,1,1,0},
{1,0,1,1,0,1,0,1}};
int component[w*h];
void doUnion(int a, int b)
{
// get the root component of a and b, and set the one's parent to the other
while (component[a] != a)
a = component[a];
while (component[b] != b)
b = component[b];
component[b] = a;
}
void unionCoords(int x, int y, int x2, int y2)
{
if (y2 < h && x2 < w && input[x][y] && input[x2][y2] && y2 > 0 && x2 > 0)
doUnion(x*h + y, x2*h + y2);
}
int main()
{
int i, j;
for (i = 0; i < w*h; i++)
component[i] = i;
for (int x = 0; x < w; x++)
for (int y = 0; y < h; y++)
{
unionCoords(x, y, x+1, y);
unionCoords(x, y, x, y+1);
unionCoords(x, y, x+1, y+1);
unionCoords(x, y, x-1, y+1);
unionCoords(x, y, x+1, y-1);
unionCoords(x, y, x-1, y-1);
}
// print the array
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
if (input[x][y] == 0)
{
printf("%4d ",input[x][y]);
continue;
}
int c = x*h + y;
while (component[c] != c) c = component[c];
printf("%4d ", component[c]);
}
printf("\n");
}
}
As you can see, I added 4 commands for doing diagonal connectivity between elements. Is this a valid modification of the union-find algorithm? I searched Google and stackoverflow in particular, but I can't find any example of diagonal connectivity. In addition, I want to extend this to 3 dimensions - so I would need to add 26 commands for checking. Will this way scale well? I mean the code seems to work for my case, but sometimes I randomly get an unlabeled isolated element. I don't want to integrate it with my code only to discover a bug months later.
Thanks.
There is nothing wrong with your approach using the union find algorithm. Union find runs on any graph. For each node it examines, it checks its connected nodes to determine whether they are in the same subset. Your approach appears to be doing just that, checking the 8 adjacent nodes of any observed node. The union find algorithm has nothing to do with the dimensions of your graph. You can extend that approach to 3d or any dimension, as long as your graph corresponds correctly to that dimension. If you are experiencing errors with this, you can post an example of that error, or check out code review: https://codereview.stackexchange.com/.

Polygon splitted by 4 parts

I have an arbitrary convex polygon. And it is splitted by 2 perpendicular lines (vectors of them are (0,1) and (1,0)). Is there any algorithm that can calculate area by smaller figures (S1, S2, S3, S4). All I can do is to calculate points where lines cross the polygon and then calculate areas, but is there something better optimized?
I store all vertexes in array double **v;
And then I calculate all points, where my polygon crosses X and Y axises:
void cross() { //calculates buf (crossing with Y)
act = 0;
for (int i = 0; i < n; ++i) {
buf[act][0]=v[i][0];
buf[act][1]=v[i][1];
act++;
if (v[i][0]*v[(i+1)%n][0] < 0) {
buf[act][0] = 0;
buf[act][1] = v[i][1] + std::abs(v[i][0])*(v[(i+1)%n][1]-v[i][1])/(std::abs(v[i][0])+std::abs(v[(i+1)%n][0]));
act++;
}
}
}
void vert() { /calculates buf2 (crossing with X)
act2 =0;
for (int i = 0; i < act; ++i) {
buf2[act2][0]=buf[i][0];
buf2[act2][1]=buf[i][1];
act2++;
if (buf[i][1]*buf[(i+1)%act][1] < 0) {
buf2[act2][1] = 0;
buf2[act2][0] = buf[i][0] + std::abs(buf[i][1])*(buf[(i+1)%act][0] - buf[i][0])/ (std::abs(buf[i][1])+std::abs(buf[(i+1)%act][1]));
act2++;
}
}
}
After calling cross(); vert(); I get an array buf2 and number of elements there is act2;
After this I'am triangilating polygon and detect in what squad does traingle lay.
double s_trian (double a, double b, double c, double d) {
//area of triangle
double s =0;
s=0.5*std::abs((a)*(d)-(c)*(b));
return s;
}
void triang() { //calculate areas of s1,s2,s3,s4 by
//triangulating
bool rotation;
double temror;
s1=0, s2 =0, s3 =0, s4 =0;
int a,b;
for (int i =0; i < act2; ++i) {
a=i%act2;
b=(i+1)%act2;
temror = s_trian(buf2[a][0], buf2[a][1], buf2[b][0], buf2[b][1]);
if ((buf2[a][0]+buf2[b][0]) > 0) {
if((buf2[a][1]+buf2[b][1] > 0))
s1+=temror;
else
s4+=temror;
} else {
if ((buf2[a][1]+buf2[b][1] > 0))
s2+=temror;
else
s3+=temror;
}
}
}
Can I optimize something here?
Given that your polygon is convex, just select an arbitrary point P inside the polygon and split it up in triangles with one corner in P.
Then calculate the area of each triangle: http://www.mathopenref.com/heronsformula.html and sum them up.
You can do slightly better.
Ignore X to begin with.
Project horizontally every vertex on Y. This way, you define trapezoids. The sum of the algebraic areas of these trapezoids gives the total surface. Add the positive and negative areas in separate accumulators, this will give you the areas on both sides of Y. But some trapezoids will cross Y and be skewed: compute the areas of the two triangles and accumulate where appropriate.
Now to deal with the horizontal axis, similarly you will add the contributions to a positive/negative accumulator, or to both.
In total there will be four accumulators, for all sign combinations, giving you the four requested areas.
This procedure will cost you a little more than one accumulation per side, instead of four. It can be done in a single loop, avoiding the need to compute and store the four subpolygons.
[Following up on my comment yesterday; has much in common with Dan Bystrom's answer.]
Loop over all sides, and compute the area of the triangle made of the side and the origin. Add to the appropriate quad area. Where a side crosses the axis, compute the intercept and split the triangle. Compute both triangle areas parts and add each to the appropriate quad.
Using the origin as a point for a triangle vertex makes the cross product based formula for a triangle area very fast and simple. You don't even need the call to fabs() if you take care to pass the parameters in the right order.
This code has not handled the problem where a vertex lies on an axis or cases where no point lies in a given quadrant.
struct Point
{
double x;
double y;
};
double areaOfTriangle(double ax, double ay, double bx, double by)
{
return fabs(by*ax - bx *ay)/2;
}
unsigned getQuad(double x, double y)
{
int xPos = (x > 0) ? 0 : 1;
int yPos = (y > 0) ? 0 : 1 ;
int quad = xPos + yPos;
if (!xPos && yPos)
quad = 3;
return quad;
}
Point getIntercept(const Point& a, const Point& b)
{
Point intercept;
if ( (a.x * b.x) < 0)
{
// Crosses y axis.
intercept.x = 0;
intercept.y = a.y - (b.y - a.y) / (b.x - a.x)*a.x;
}
else
{
// Crosses x axis.
intercept.y = 0;
intercept.x = a.x - (b.x - a.x) / (b.y - a.y)*a.y;
}
return intercept;
}
void getAreaOfQuads(double* retQuadArea, const Point* points, unsigned numPts)
{
for (unsigned i = 0; i != 4; ++i)
retQuadArea[i] = 0;
const Point* a = &points[numPts - 1];
unsigned quadA = getQuad(a->x, a->y);
for (unsigned i = 0; i != numPts; ++i)
{
const Point* b = &points[i];
unsigned quadB = getQuad(b->x, b->y);
if (quadA == quadB)
{
retQuadArea[quadA] += areaOfTriangle(a->x, a->y, b->x, b->y);
}
else
{
// The side a->b crosses an axis.
// First, find out where.
Point c = getIntercept(*a, *b);
retQuadArea[quadA] += areaOfTriangle(a->x, a->y, c.x, c.y);
retQuadArea[quadB] += areaOfTriangle(c.x, c.y, b->x, b->y);
}
a = b;
quadA = quadB;
}
}
void test(Point* polygon, unsigned n)
{
double areas[4] = {};
getAreaOfQuads(areas, polygon, n);
for (unsigned i = 0; i != 4; ++i)
std::cout << areas[i] << ", ";
std::cout << std::endl;
}
Point polygon[]
{
{0.6, 0.2},
{ 0.2, 0.8 },
{ -0.2, 0.7 },
{ -0.6, 0.6 },
{ -1.0, 0.1 },
{ -0.6, -0.5 },
{ 0.1, -0.5 },
{ 0.9, -0.1 }
};
Point square[]
{
{1, 1},
{ -1, 1 },
{ -1, -1 },
{ 1, -1 }
};
int main()
{
test(square, 4);
test(polygon, 8);
return 0;
}

Connected Component Labeling - Implementation

I have asked a similar question some days ago, but I have yet to find an efficient way of solving my problem.
I'm developing a simple console game, and I have a 2D array like this:
1,0,0,0,1
1,1,0,1,1
0,1,0,0,1
1,1,1,1,0
0,0,0,1,0
I am trying to find all the areas that consist of neighboring 1's (4-way connectivity). So, in this example the 2 areas are as following:
1
1,1
1
1,1,1,1
1
and :
1
1,1
1
The algorithm, that I've been working on, finds all the neighbors of the neighbors of a cell and works perfectly fine on this kind of matrices. However, when I use bigger arrays (like 90*90) the program is very slow and sometimes the huge arrays that are used cause stack overflows.
One guy on my other question told me about connected-component labelling as an efficient solution to my problem.
Can somebody show me any C++ code which uses this algorithm, because I'm kinda confused about how it actually works along with this disjoint-set data structure thing...
Thanks a lot for your help and time.
I'll first give you the code and then explain it a bit:
// direction vectors
const int dx[] = {+1, 0, -1, 0};
const int dy[] = {0, +1, 0, -1};
// matrix dimensions
int row_count;
int col_count;
// the input matrix
int m[MAX][MAX];
// the labels, 0 means unlabeled
int label[MAX][MAX];
void dfs(int x, int y, int current_label) {
if (x < 0 || x == row_count) return; // out of bounds
if (y < 0 || y == col_count) return; // out of bounds
if (label[x][y] || !m[x][y]) return; // already labeled or not marked with 1 in m
// mark the current cell
label[x][y] = current_label;
// recursively mark the neighbors
for (int direction = 0; direction < 4; ++direction)
dfs(x + dx[direction], y + dy[direction], current_label);
}
void find_components() {
int component = 0;
for (int i = 0; i < row_count; ++i)
for (int j = 0; j < col_count; ++j)
if (!label[i][j] && m[i][j]) dfs(i, j, ++component);
}
This is a common way of solving this problem.
The direction vectors are just a nice way to find the neighboring cells (in each of the four directions).
The dfs function performs a depth-first-search of the grid. That simply means it will visit all the cells reachable from the starting cell. Each cell will be marked with current_label
The find_components function goes through all the cells of the grid and starts a component labeling if it finds an unlabeled cell (marked with 1).
This can also be done iteratively using a stack.
If you replace the stack with a queue, you obtain the bfs or breadth-first-search.
This can be solved with union find (although DFS, as shown in the other answer, is probably a bit simpler).
The basic idea behind this data structure is to repeatedly merge elements in the same component. This is done by representing each component as a tree (with nodes keeping track of their own parent, instead of the other way around), you can check whether 2 elements are in the same component by traversing to the root node and you can merge nodes by simply making the one root the parent of the other root.
A short code sample demonstrating this:
const int w = 5, h = 5;
int input[w][h] = {{1,0,0,0,1},
{1,1,0,1,1},
{0,1,0,0,1},
{1,1,1,1,0},
{0,0,0,1,0}};
int component[w*h];
void doUnion(int a, int b)
{
// get the root component of a and b, and set the one's parent to the other
while (component[a] != a)
a = component[a];
while (component[b] != b)
b = component[b];
component[b] = a;
}
void unionCoords(int x, int y, int x2, int y2)
{
if (y2 < h && x2 < w && input[x][y] && input[x2][y2])
doUnion(x*h + y, x2*h + y2);
}
int main()
{
for (int i = 0; i < w*h; i++)
component[i] = i;
for (int x = 0; x < w; x++)
for (int y = 0; y < h; y++)
{
unionCoords(x, y, x+1, y);
unionCoords(x, y, x, y+1);
}
// print the array
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
if (input[x][y] == 0)
{
cout << ' ';
continue;
}
int c = x*h + y;
while (component[c] != c) c = component[c];
cout << (char)('a'+c);
}
cout << "\n";
}
}
Live demo.
The above will show each group of ones using a different letter of the alphabet.
p i
pp ii
p i
pppp
p
It should be easy to modify this to get the components separately or get a list of elements corresponding to each component. One idea is to replace cout << (char)('a'+c); above with componentMap[c].add(Point(x,y)) with componentMap being a map<int, list<Point>> - each entry in this map will then correspond to a component and give a list of points.
There are various optimisations to improve the efficiency of union find, the above is just a basic implementation.
You could also try this transitive closure approach, however the triple loop for the transitive closure slows things up when there are many separated objects in the image, suggested code changes welcome
Cheers
Dave
void CC(unsigned char* pBinImage, unsigned char* pOutImage, int width, int height, int CON8)
{
int i, j, x, y, k, maxIndX, maxIndY, sum, ct, newLabel=1, count, maxVal=0, sumVal=0, maxEQ=10000;
int *eq=NULL, list[4];
int bAdd;
memcpy(pOutImage, pBinImage, width*height*sizeof(unsigned char));
unsigned char* equivalences=(unsigned char*) calloc(sizeof(unsigned char), maxEQ*maxEQ);
// modify labels this should be done with iterators to modify elements
// current column
for(j=0; j<height; j++)
{
// current row
for(i=0; i<width; i++)
{
if(pOutImage[i+j*width]>0)
{
count=0;
// go through blocks
list[0]=0;
list[1]=0;
list[2]=0;
list[3]=0;
if(j>0)
{
if((i>0))
{
if((pOutImage[(i-1)+(j-1)*width]>0) && (CON8 > 0))
list[count++]=pOutImage[(i-1)+(j-1)*width];
}
if(pOutImage[i+(j-1)*width]>0)
{
for(x=0, bAdd=true; x<count; x++)
{
if(pOutImage[i+(j-1)*width]==list[x])
bAdd=false;
}
if(bAdd)
list[count++]=pOutImage[i+(j-1)*width];
}
if(i<width-1)
{
if((pOutImage[(i+1)+(j-1)*width]>0) && (CON8 > 0))
{
for(x=0, bAdd=true; x<count; x++)
{
if(pOutImage[(i+1)+(j-1)*width]==list[x])
bAdd=false;
}
if(bAdd)
list[count++]=pOutImage[(i+1)+(j-1)*width];
}
}
}
if(i>0)
{
if(pOutImage[(i-1)+j*width]>0)
{
for(x=0, bAdd=true; x<count; x++)
{
if(pOutImage[(i-1)+j*width]==list[x])
bAdd=false;
}
if(bAdd)
list[count++]=pOutImage[(i-1)+j*width];
}
}
// has a neighbour label
if(count==0)
pOutImage[i+j*width]=newLabel++;
else
{
pOutImage[i+j*width]=list[0];
if(count>1)
{
// store equivalences in table
for(x=0; x<count; x++)
for(y=0; y<count; y++)
equivalences[list[x]+list[y]*maxEQ]=1;
}
}
}
}
}
// floyd-Warshall algorithm - transitive closure - slow though :-(
for(i=0; i<newLabel; i++)
for(j=0; j<newLabel; j++)
{
if(equivalences[i+j*maxEQ]>0)
{
for(k=0; k<newLabel; k++)
{
equivalences[k+j*maxEQ]= equivalences[k+j*maxEQ] || equivalences[k+i*maxEQ];
}
}
}
eq=(int*) calloc(sizeof(int), newLabel);
for(i=0; i<newLabel; i++)
for(j=0; j<newLabel; j++)
{
if(equivalences[i+j*maxEQ]>0)
{
eq[i]=j;
break;
}
}
free(equivalences);
// label image with equivalents
for(i=0; i<width*height; i++)
{
if(pOutImage[i]>0&&eq[pOutImage[i]]>0)
pOutImage[i]=eq[pOutImage[i]];
}
free(eq);
}
very useful Document => https://docs.google.com/file/d/0B8gQ5d6E54ZDM204VFVxMkNtYjg/edit
java application - open source - extract objects from image - connected componen labeling => https://drive.google.com/file/d/0B8gQ5d6E54ZDTVdsWE1ic2lpaHM/edit?usp=sharing
import java.util.ArrayList;
public class cclabeling
{
int neighbourindex;ArrayList<Integer> Temp;
ArrayList<ArrayList<Integer>> cc=new ArrayList<>();
public int[][][] cclabel(boolean[] Main,int w){
/* this method return array of arrays "xycc" each array contains
the x,y coordinates of pixels of one connected component
– Main => binary array of image
– w => width of image */
long start=System.nanoTime();
int len=Main.length;int id=0;
int[] dir={-w-1,-w,-w+1,-1,+1,+w-1,+w,+w+1};
for(int i=0;i<len;i+=1){
if(Main[i]){
Temp=new ArrayList<>();
Temp.add(i);
for(int x=0;x<Temp.size();x+=1){
id=Temp.get(x);
for(int u=0;u<8;u+=1){
neighbourindex=id+dir[u];
if(Main[neighbourindex]){
Temp.add(neighbourindex);
Main[neighbourindex]=false;
}
}
Main[id]=false;
}
cc.add(Temp);
}
}
int[][][] xycc=new int[cc.size()][][];
int x;int y;
for(int i=0;i<cc.size();i+=1){
xycc[i]=new int[cc.get(i).size()][2];
for(int v=0;v<cc.get(i).size();v+=1){
y=Math.round(cc.get(i).get(v)/w);
x=cc.get(i).get(v)-y*w;
xycc[i][v][0]=x;
xycc[i][v][1]=y;
}
}
long end=System.nanoTime();
long time=end-start;
System.out.println("Connected Component Labeling Time =>"+time/1000000+" milliseconds");
System.out.println("Number Of Shapes => "+xycc.length);
return xycc;
}
}
Please find below the sample code for connected component labeling . The code is written in JAVA
package addressextraction;
public class ConnectedComponentLabelling {
int[] dx={+1, 0, -1, 0};
int[] dy={0, +1, 0, -1};
int row_count=0;
int col_count=0;
int[][] m;
int[][] label;
public ConnectedComponentLabelling(int row_count,int col_count) {
this.row_count=row_count;
this.col_count=col_count;
m=new int[row_count][col_count];
label=new int[row_count][col_count];
}
void dfs(int x, int y, int current_label) {
if (x < 0 || x == row_count) return; // out of bounds
if (y < 0 || y == col_count) return; // out of bounds
if (label[x][y]!=0 || m[x][y]!=1) return; // already labeled or not marked with 1 in m
// mark the current cell
label[x][y] = current_label;
// System.out.println("****************************");
// recursively mark the neighbors
int direction = 0;
for (direction = 0; direction < 4; ++direction)
dfs(x + dx[direction], y + dy[direction], current_label);
}
void find_components() {
int component = 0;
for (int i = 0; i < row_count; ++i)
for (int j = 0; j < col_count; ++j)
if (label[i][j]==0 && m[i][j]==1) dfs(i, j, ++component);
}
public static void main(String[] args) {
ConnectedComponentLabelling l=new ConnectedComponentLabelling(4,4);
l.m[0][0]=0;
l.m[0][1]=0;
l.m[0][2]=0;
l.m[0][3]=0;
l.m[1][0]=0;
l.m[1][1]=1;
l.m[1][2]=0;
l.m[1][3]=0;
l.m[2][0]=0;
l.m[2][1]=0;
l.m[2][2]=0;
l.m[2][3]=0;
l.m[3][0]=0;
l.m[3][1]=1;
l.m[3][2]=0;
l.m[3][3]=0;
l.find_components();
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
System.out.print(l.label[i][j]);
}
System.out.println("");
}
}
}

Resources