I am trying to make blobs finding algorithm with 8 connectivity for binary image(monochrome) (coordinates of bounding boxes up-left and down-right dots)which use small amount of memory (needed because the large resolution of the image) on C++.
There are such tools like OpenCV, but it has a lot of filters and is too slow if you want to detect each blob in binary image, there is also CvBlobsLib but the support is outdated(last version is before 5 years) and I couldn't set it up for Visual Studio 2013 (it must be compiled with Cmake, and it is giving errors). In wikipedia there are two types of algorithms - "one component of a time" and "two-pass" connected-component , but they both use labels, which mean you will have another 2D array of integers, but this will take a lot of memory because of the size of int(4 bytes), and we need int because of the image size and possibility of more than 65535 labels(which is short). If it is even short it will take twice less memory, which is again a lot of it. I found a "quickblob" written in C quicblobsalgol but I couldn't run it from the source(but exe is working properly), tried to analyze the code, and I got something, but the whole idea behind it stayed vague for me, so I tried also something like floodFill algorithm and something like "disjoined-set data structure" link which to hold the blobs, and this means the used memory theoretically is defined of the number of blobs(single black pixels are not recognize as blobs). Here is the C++ code:
#include <cstdlib>
#include <iostream>
#include <ctime>
#include <math.h>
#define ROWS 4000
#define COLS 4000
#define BLOBS 1000000
using namespace std;
void floodFillAlgorithm(short(&arr)[ROWS][COLS]);
int recurciveMarkBlob(short(&arr)[ROWS][COLS], int **ptr_labels, int i, int j, int group);
int main(){
short arr[ROWS][COLS];
srand((unsigned int)time(0)); // use current time as seed for random generator
for (int i = 0; i < ROWS; i++)
{
for (int j = 0; j < COLS; j++)
{
arr[i][j] = rand() % 2;
}
}
/*for (int i = 0; i < ROWS; i++)
{
for (int j = 0; j < COLS; j++)
{
cout << arr[i][j] << '\t';
}
cout << '\n';
}*/
floodFillAlgorithm(arr);
cout << '\n';
cout << '\n';
/*for (int i = 0; i < ROWS; i++)
{
for (int j = 0; j < COLS; j++)
{
cout << arr[i][j] << '\t';
}
cout << '\n';
}*/
system("PAUSE");
return 0;}
void floodFillAlgorithm(short(&arr)[ROWS][COLS])
{
int group = 0;
int **ptr_labels;
ptr_labels = (int **)malloc(BLOBS * sizeof(int*));
if (ptr_labels == 0)
{
printf("ERROR: Out of memory\n");
}
for (int i = 0; i < BLOBS; i++)
{
ptr_labels[i] = NULL;
}
for (int i = 0; i < ROWS; i++)
{
for (int j = 0; j < COLS; j++)
{
if (arr[i][j] == 1)
{
recurciveMarkBlob(arr, ptr_labels,i, j, ++group);
arr[i][j] = 1;
}
}
}
int count = 0;
for (int i = 0; i < BLOBS; i++)
{
if (ptr_labels[i] != NULL)
{
count++;
//cout << "Label: " << i << " ; X1: " << ptr_labels[i][0] << " ; Y1: " << ptr_labels[i][1] << " ; X2: " << ptr_labels[i][2] << " ; Y2: " << ptr_labels[i][3] << " ; X3: " << ptr_labels[i][4] << " ; Y3: " << ptr_labels[i][5] << " ; POINTS: " << ptr_labels[i][6] << endl;
}
}
cout << "Count: " << count << endl;
system("PAUSE");
for (int i = 0; i < BLOBS; i++)
{
if (ptr_labels[i] != NULL)
{
free(ptr_labels[i]);
}
}
free(ptr_labels);
}
int recurciveMarkBlob(short(&arr)[ROWS][COLS], int **ptr_labels, int i, int j, int group)
{
//cout << " i : " << i << " j: " << j << endl;
if (j != 0)
{
if ((arr[i][j] == arr[i][j - 1]) && (arr[i][j - 1] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j - 1;
ptr_labels[group][1] = i;
ptr_labels[group][2] = j;
ptr_labels[group][3] = i;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][0] > j - 1)
{
ptr_labels[group][0] = j - 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i, j - 1, group);
arr[i][j] = 1;
}
}
if (j != COLS - 1)
{
if ((arr[i][j] == arr[i][j + 1]) && (arr[i][j + 1] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j;
ptr_labels[group][1] = i;
ptr_labels[group][2] = j + 1;
ptr_labels[group][3] = i;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][2] < j + 1)
{
ptr_labels[group][2] = j + 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i, j + 1, group);
arr[i][j] = 1;
}
}
if (i != 0)
{
if ((arr[i][j] == arr[i - 1][j]) && (arr[i - 1][j] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j;
ptr_labels[group][1] = i - 1;
ptr_labels[group][2] = j;
ptr_labels[group][3] = i;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][1] > i - 1)
{
ptr_labels[group][1] = i - 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i - 1, j, group);
arr[i][j] = 1;
}
}
if (i != ROWS - 1)
{
if ((arr[i][j] == arr[i + 1][j]) && (arr[i + 1][j] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j;
ptr_labels[group][1] = i;
ptr_labels[group][2] = j;
ptr_labels[group][3] = i + 1;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][3] < i + 1)
{
ptr_labels[group][3] = i + 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i + 1, j, group);
arr[i][j] = 1;
}
}
if ((i != 0) && (j != 0))
{
if ((arr[i][j] == arr[i - 1][j - 1]) && (arr[i - 1][j - 1] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j - 1;
ptr_labels[group][1] = i - 1;
ptr_labels[group][2] = j;
ptr_labels[group][3] = i;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][0] > j - 1)
{
ptr_labels[group][0] = j - 1;
}
if (ptr_labels[group][1] > i - 1)
{
ptr_labels[group][1] = i - 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i - 1, j - 1, group);
arr[i][j] = 1;
}
}
if ((i != 0) && (j != COLS - 1))
{
//cout << "i: " << i << " ; j: " << j << endl;
if ((arr[i][j] == arr[i - 1][j + 1]) && (arr[i - 1][j + 1] == 1))
{
//cout << "i: " << i << " ; j: " << j << endl;
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j;
ptr_labels[group][1] = i - 1;
ptr_labels[group][2] = j + 1;
ptr_labels[group][3] = i;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
//cout << "Label: " << group << " ; X1: " << ptr_labels[group][0] << " ; Y1: " << ptr_labels[group][1] << " ; X2: " << ptr_labels[group][2] << " ; Y2: " << ptr_labels[group][3] << endl;
}
else
{
if (ptr_labels[group][2] < j + 1)
{
ptr_labels[group][2] = j + 1;
}
if (ptr_labels[group][1] > i - 1)
{
ptr_labels[group][1] = i - 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i - 1, j + 1, group);
arr[i][j] = 1;
}
}
if ((i != ROWS - 1) && (j != 0))
{
if ((arr[i][j] == arr[i + 1][j - 1]) && (arr[i + 1][j - 1] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j - 1;
ptr_labels[group][1] = i;
ptr_labels[group][2] = j;
ptr_labels[group][3] = i + 1;
ptr_labels[group][4] = j;
ptr_labels[group][5] = i;
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][0] > j - 1)
{
ptr_labels[group][0] = j - 1;
}
if (ptr_labels[group][3] < i + 1)
{
ptr_labels[group][3] = i + 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i + 1, j - 1, group);
arr[i][j] = 1;
}
}
if ((i != ROWS - 1) && (j != COLS - 1))
{
if ((arr[i][j] == arr[i + 1][j + 1]) && (arr[i + 1][j + 1] == 1))
{
if (ptr_labels[group] == NULL)
{
ptr_labels[group] = (int *)malloc(7 * sizeof(int*));
ptr_labels[group][0] = j;
ptr_labels[group][1] = i;
ptr_labels[group][2] = j + 1;
ptr_labels[group][3] = i + 1;
ptr_labels[group][4] = j; // x of pixel in black
ptr_labels[group][5] = i; // y of pixel in black
ptr_labels[group][6] = 2; // taken points (area) for current shape
}
else
{
if (ptr_labels[group][2] < j + 1)
{
ptr_labels[group][2] = j + 1;
}
if (ptr_labels[group][3] < i + 1)
{
ptr_labels[group][3] = i + 1;
}
ptr_labels[group][6]++;
}
arr[i][j] = 0;
recurciveMarkBlob(arr, ptr_labels, i + 1, j + 1, group);
arr[i][j] = 1;
}
}
/**/
arr[i][j] = 0;
return 0;
}
The main question is why before end of the main function so much RAM is still in use(147 MB). The tail recursion "recurciveMarkBlob()" is using parameters by value i,j, group, and dynamic allocation of memory and that is why the memory temporary jumps to 600 MB(mostly from the parameters), after freeing the dynamically allocated memory it still takes 148 MB, the image is 4 000 x 4 000 x 2 bytes = 16 000 000 bytes = 16 MB. I have read about "function taken memory" here but I still cant understand why. If someone can explain it with assembler code what is happening and is this occurrence normal. I am using Release mode release vs debug
system("PAUSE") in main()
In process of recursion
Also everyone can give idea for fast and low memory taking algorithm for blob detection of large binary images.
The elementary recursive solution requires a lot of stack space, on the order of the size of the blobs. Multiply that by the size of the stack frame, and you get horrible bytes/pixel requirements.
The scanline filling principle can reduce that requirement by orders of magnitude. Anyway, blob detection in textures ("porous" blobs) remains problematic.
You may also consider implementing this gem: "A Linear-Time Component-Labeling Algorithm Using Contour Tracing Technique, Fu Chang, Chun-Jen Chen, and Chi-Jen Lu."
Related
what is the time complexity of below-written code-
i think the complexity should be O(rcn)
where n= maximum size island
but on gfg they had written the complexity is O(V+E)
void bfs(int r, int c, vector < vector < char >> & grid) {
queue < pair < int, int >> q;
int row[4] = {-1,0,1,0};
int col[4] = {0,1,0,-1};
q.push(make_pair(r, c));
while (!q.empty()) {
pair < int, int > p = q.front();
int x = p.first;
int y = p.second;
q.pop();
for (int i = 0; i < 4; i++) {
if (x + row[i] < grid.size() && y + col[i] < grid[0].size() && x + row[i] >= 0 && y + col[i] >= 0 && grid[x + row[i]][y + col[i]] == '1') {
q.push(make_pair(x + row[i], y + col[i]));
grid[x + row[i]][y + col[i]] = '0';
}
}
}
}
int numIslands(vector < vector < char >> & grid) {
int count = 0;
int r = grid.size();
if (r == 0)
return count;
int c = grid[0].size();
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++) {
if (grid[i][j] == '1') {
bfs(i, j, grid);
count++;
}
}
}
return count;
}
I'm trying to program a code decoder. But I get the following error for all the comparisons in the if statements:
'error: ISO C++ forbids comparison between pointer and integer
[-fpermissive]'
The examples for the input string are ".-.--" and "-..-.--".
#include <iostream>
#include <string>
using namespace std;
int main() {
string s;
int c[100], t = 0, l, i = 0;
l = s.length();
cin >> s;
if (s[0] == '.') {
c[0] = 0;
t += 1;
while (i < l) {
if (s[i] == '-' && s[i + 1] == '.') {
c[t] = 1;
t += 1;
i += 2;
}
if (s[i] == '.') {
c[t] = 0;
t += 1;
i++;
}
if (s[i] == '-' && s[i + 1] == '-') {
c[t] = 2;
t += 1;
i += 2;
}
}
}
if (s[0] == '-' && s[1] == '.') {
c[0] = 1;
t += 1;
while (i < l) {
if (s[i] == '-' && s[i + 1] == '.'
'){
c[t] = 1; t += 1; i += 2;
}
if (s[i] == '.') {
c[t] = 0;
t += 1;
i++;
}
if (s[i] == '-' && s[i + 1] == '-') {
c[t] = 2;
t += 1;
i += 2;
}
}
}
if (s[0] == '-' && s[1] == '-') {
c[0] = 2;
t += 1;
while (i < l) {
if (s[i] == '-' && s[i + 1] == '.') {
c[t] = 1;
t += 1;
i += 2;
}
if (s[i] == ".") {
c[t] = 0;
t += 1;
i++;
}
if (s[i] == "-" && s[i + 1] == "-") {
c[t] = 2;
t += 1;
i += 2;
}
}
}
for (i = 0; i < t; i++) {
cout << s[t];
}
return 0;
}
How do I resolve this issue?
You were using single quotes until you got here:
if(s[i]=="-"&&s[i+1]=="-"){
You need to change it to single quotes so you have an int to int comparison.
if(s[i]=='-'&&s[i+1]=='-'){
When you say
"-"
you are creating a pointer.
When you say
'='
you are creating an int.
(" ") is a string literal which is char const * which is a pointer and (' ') is char which get promoted to int, so you can't compare them. They must be type compatible.
I have an array which is constituted of only 0s and 1s. Task is to find index of a 0, replacing which with a 1 results in the longest possible sequence of ones for the given array.
Solution has to work within O(n) time and O(1) space.
Eg:
Array - 011101101001
Answer - 4 ( that produces 011111101001)
My Approach gives me a result better than O(n2) but times out on long string inputs.
int findIndex(int[] a){
int maxlength = 0; int maxIndex= -1;
int n=a.length;
int i=0;
while(true){
if( a[i] == 0 ){
int leftLenght=0;
int j=i-1;
//finding count of 1s to left of this zero
while(j>=0){
if(a[j]!=1){
break;
}
leftLenght++;
j--;
}
int rightLenght=0;
j=i+1;
// finding count of 1s to right of this zero
while(j<n){
if(a[j]!=1){
break;
}
rightLenght++;
j++;
}
if(maxlength < leftLenght+rightLenght + 1){
maxlength = leftLenght+rightLenght + 1;
maxIndex = i;
}
}
if(i == n-1){
break;
}
i++;
}
return maxIndex;
}
The approach is simple, you just need to maintain two numbers while iterating through the array, the current count of the continuous block of one, and the last continuous block of one, which separated by zero.
Note: this solution assumes that there will be at least one zero in the array, otherwise, it will return -1
int cal(int[]data){
int last = 0;
int cur = 0;
int max = 0;
int start = -1;
int index = -1;
for(int i = 0; i < data.length; i++){
if(data[i] == 0){
if(max < 1 + last + cur){
max = 1 + last + cur;
if(start != -1){
index = start;
}else{
index = i;
}
}
last = cur;
start = i;
cur = 0;
}else{
cur++;
}
}
if(cur != 0 && start != -1){
if(max < 1 + last + cur){
return start;
}
}
return index;
}
O(n) time, O(1) space
Live demo: https://ideone.com/1hjS25
I believe the problem can we solved by just maintaining a variable which stores the last trails of 1's that we saw before reaching a '0'.
int last_trail = 0;
int cur_trail = 0;
int last_seen = -1;
int ans = 0, maxVal = 0;
for(int i = 0; i < a.size(); i++) {
if(a[i] == '0') {
if(cur_trail + last_trail + 1 > maxVal) {
maxVal = cur_trail + last_trail + 1;
ans = last_seen;
}
last_trail = cur_trail;
cur_trail = 0;
last_seen = i;
} else {
cur_trail++;
}
}
if(cur_trail + last_trail + 1 > maxVal && last_seen > -1) {
maxVal = cur_trail + last_trail + 1;
ans = last_seen;
}
This can be solved by a technique that is known as two pointers. Most two-pointers use O(1) space and O(n) time.
Code : https://www.ideone.com/N8bznU
#include <iostream>
#include <string>
using namespace std;
int findOptimal(string &s) {
s += '0'; // add a sentinel 0
int best_zero = -1;
int prev_zero = -1;
int zeros_in_interval = 0;
int start = 0;
int best_answer = -1;
for(int i = 0; i < (int)s.length(); ++i) {
if(s[i] == '1') continue;
else if(s[i] == '0' and zeros_in_interval == 0) {
zeros_in_interval++;
prev_zero = i;
}
else if(s[i] == '0' and zeros_in_interval == 1) {
int curr_answer = i - start; // [start, i) only contains one 0
cout << "tried this : [" << s.substr(start, i - start) << "]\n";
if(curr_answer > best_answer) {
best_answer = curr_answer;
best_zero = prev_zero;
}
start = prev_zero + 1;
prev_zero = i;
}
}
cout << "Answer = " << best_zero << endl;
return best_zero;
}
int main() {
string input = "011101101001";
findOptimal(input);
return 0;
}
This is an implementation in C++. The output looks like this:
tried this : [0111]
tried this : [111011]
tried this : [1101]
tried this : [10]
tried this : [01]
Answer = 4
pgcc is showing "Invalid accelerator data region: branching into or out of region is not allowed" for the lines that I put my acc pragmas, but I don't understand why.
I'm using copy, copyin, and create for all arrays that the loops would utilize.
What am I missing? Thanks!
#pragma acc data copy(graph->pagerank), copyin(graph->indegree, graph->outdegree), create(pagerankNew)
while (1) {
#pragma acc kernels
{
for (i = 0; i < n; ++i) {
double sum = 0;
for (k = 0; k < graph->indegree[i]; ++k) {
//int j = graph->inlinks[i][k];
int j = 0;
sum += (1.0 / graph->outdegree[j]) * graph->pagerank[j];
}
pagerankNew[i] = firstterm + damping * sum;
double diff = fabs(graph->pagerank[i] - pagerankNew[i]);
// if(iterations > 50) {
if (diff != 0.000000 && diff < epsilon) {
return iterations;
}
}
for (k = 0; k < n; ++k) {
graph->pagerank[k] = pagerankNew[k];
}
}
++iterations;
}
In the draft section 7.2.1.3 of The art of computer programming, generating all combinations, Knuth introduced Algorithm C for generating Chase's sequence.
He also mentioned a similar algorithm (based on the following equation) working with index-list without source code (exercise 45 of the draft).
I finally worked out a c++ version which I think is quite ugly. To generate all C_n^m combination, the memory complexity is about 3 (m+1) and the time complexity is bounded by O(m n^m)
class chase_generator_t{
public:
using size_type = ptrdiff_t;
enum class GET : char{ VALUE, INDEX };
chase_generator_t(size_type _n) : n(_n){}
void choose(size_type _m){
m = _m;
++_m;
index.resize(_m);
threshold.resize(_m + 1);
tag.resize(_m);
for (size_type i = 0, j = n - m; i != _m; ++i){
index[i] = j + i;
tag[i] = tag_t::DECREASE;
using std::max;
threshold[i] = max(i - 1, (index[i] - 3) | 1);
}
threshold[_m] = n;
}
bool get(size_type &x, size_type &y, GET const which){
if (which == GET::VALUE) return __get<false>(x, y);
return __get<true>(x, y);
}
size_type get_n() const{
return n;
}
size_type get_m() const{
return m;
}
size_type operator[](size_t const i) const{
return index[i];
}
private:
enum class tag_t : char{ DECREASE, INCREASE };
size_type n, m;
std::vector<size_type> index, threshold;
std::vector<tag_t> tag;
template<bool GetIndex>
bool __get(size_type &x, size_type &y){
using std::max;
size_type p = 0, i, q;
find:
q = p + 1;
if (index[p] == threshold[q]){
if (q >= m) return false;
p = q;
goto find;
}
x = GetIndex ? p : index[p];
if (tag[p] == tag_t::INCREASE){
using std::min;
increase:
index[p] = min(index[p] + 2, threshold[q]);
threshold[p] = index[p] - 1;
}
else if (index[p] && (i = (index[p] - 1) & ~1) >= p){
index[p] = i;
threshold[p] = max(p - 1, (index[p] - 3) | 1);
}
else{
tag[p] = tag_t::INCREASE;
i = p | 1;
if (index[p] == i) goto increase;
index[p] = i;
threshold[p] = index[p] - 1;
}
y = index[p];
for (q = 0; q != p; ++q){
tag[q] = tag_t::DECREASE;
threshold[q] = max(q - 1, (index[q] - 3) | 1);
}
return true;
}
};
Does any one has a better implementation, i.e. run faster with the same memory or use less memory with the same speed?
I think that the C code below is closer to what Knuth had in mind. Undoubtedly there are ways to make it more elegant (in particular, I'm leaving some scaffolding in case it helps with experimentation), though I'm skeptical that the array w can be disposed of. If storage is really important for some reason, then steal the sign bit from the a array.
#include <stdbool.h>
#include <stdio.h>
enum {
N = 10,
T = 5
};
static void next(int a[], bool w[], int *r) {
bool found_r = false;
int j;
for (j = *r; !w[j]; j++) {
int b = a[j] + 1;
int n = a[j + 1];
if (b < (w[j + 1] ? n - (2 - (n & 1)) : n)) {
if ((b & 1) == 0 && b + 1 < n) b++;
a[j] = b;
if (!found_r) *r = j > 1 ? j - 1 : 0;
return;
}
w[j] = a[j] - 1 >= j;
if (w[j] && !found_r) {
*r = j;
found_r = true;
}
}
int b = a[j] - 1;
if ((b & 1) != 0 && b - 1 >= j) b--;
a[j] = b;
w[j] = b - 1 >= j;
if (!found_r) *r = j;
}
int main(void) {
typedef char t_less_than_n[T < N ? 1 : -1];
int a[T + 1];
bool w[T + 1];
for (int j = 0; j < T + 1; j++) {
a[j] = N - (T - j);
w[j] = true;
}
int r = 0;
do {
for (int j = T - 1; j > -1; j--) printf("%x", a[j]);
putchar('\n');
if (false) {
for (int j = T - 1; j > -1; j--) printf("%d", w[j]);
putchar('\n');
}
next(a, w, &r);
} while (a[T] == N);
}