Related
I search for non iterative, closed form, algorithm to find Least squares solution for point closest to the set of 3d lines. It is similar to 3d point triangulation (to minimize re-projections) but seems to be be simpler and faster?
Lines can be described in any form, 2 points, point and unit direction or similar.
Let the i th line be given by point ai and unit direction vector di. We need to find the single point that minimizes the sum of squared point to line distances. This is where the gradient is the zero vector:
Expanding the gradient,
Algebra yields a canonical 3x3 linear system,
where the k'th row (a 3-element row vector) of matrix M is
with vector ek the respective unit basis vector, and
It's not hard to turn this into code. I borrowed (and fixed a small bug in) a Gaussian elimination function from Rosettacode to solve the system. Thanks to the author!
#include <stdio.h>
#include <math.h>
typedef double VEC[3];
typedef VEC MAT[3];
void solve(double *a, double *b, double *x, int n); // linear solver
double dot(VEC a, VEC b) { return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; }
void find_nearest_point(VEC p, VEC a[], VEC d[], int n) {
MAT m = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
VEC b = {0, 0, 0};
for (int i = 0; i < n; ++i) {
double d2 = dot(d[i], d[i]), da = dot(d[i], a[i]);
for (int ii = 0; ii < 3; ++ii) {
for (int jj = 0; jj < 3; ++jj) m[ii][jj] += d[i][ii] * d[i][jj];
m[ii][ii] -= d2;
b[ii] += d[i][ii] * da - a[i][ii] * d2;
}
}
solve(&m[0][0], b, p, 3);
}
// Debug printing.
void pp(VEC v, char *l, char *r) {
printf("%s%.3lf, %.3lf, %.3lf%s", l, v[0], v[1], v[2], r);
}
void pv(VEC v) { pp(v, "(", ")"); }
void pm(MAT m) { for (int i = 0; i < 3; ++i) pp(m[i], "\n[", "]"); }
// A simple verifier.
double dist2(VEC p, VEC a, VEC d) {
VEC pa = { a[0]-p[0], a[1]-p[1], a[2]-p[2] };
double dpa = dot(d, pa);
return dot(d, d) * dot(pa, pa) - dpa * dpa;
}
double sum_dist2(VEC p, VEC a[], VEC d[], int n) {
double sum = 0;
for (int i = 0; i < n; ++i) sum += dist2(p, a[i], d[i]);
return sum;
}
// Check 26 nearby points and verify the provided one is nearest.
int is_nearest(VEC p, VEC a[], VEC d[], int n) {
double min_d2 = 1e100;
int ii = 2, jj = 2, kk = 2;
#define D 0.01
for (int i = -1; i <= 1; ++i)
for (int j = -1; j <= 1; ++j)
for (int k = -1; k <= 1; ++k) {
VEC pp = { p[0] + D * i, p[1] + D * j, p[2] + D * k };
double d2 = sum_dist2(pp, a, d, n);
// Prefer provided point among equals.
if (d2 < min_d2 || i == 0 && j == 0 && k == 0 && d2 == min_d2) {
min_d2 = d2;
ii = i; jj = j; kk = k;
}
}
return ii == 0 && jj == 0 && kk == 0;
}
void normalize(VEC v) {
double len = sqrt(dot(v, v));
v[0] /= len;
v[1] /= len;
v[2] /= len;
}
int main(void) {
VEC a[] = {{-14.2, 17, -1}, {1, 1, 1}, {2.3, 4.1, 9.8}, {1,2,3}};
VEC d[] = {{1.3, 1.3, -10}, {12.1, -17.2, 1.1}, {19.2, 31.8, 3.5}, {4,5,6}};
int n = 4;
for (int i = 0; i < n; ++i) normalize(d[i]);
VEC p;
find_nearest_point(p, a, d, n);
pv(p);
printf("\n");
if (!is_nearest(p, a, d, n)) printf("Woops. Not nearest.\n");
return 0;
}
// A linear solver from rosettacode (with bug fix: added a missing fabs())
#define mat_elem(a, y, x, n) (a + ((y) * (n) + (x)))
void swap_row(double *a, double *b, int r1, int r2, int n)
{
double tmp, *p1, *p2;
int i;
if (r1 == r2) return;
for (i = 0; i < n; i++) {
p1 = mat_elem(a, r1, i, n);
p2 = mat_elem(a, r2, i, n);
tmp = *p1, *p1 = *p2, *p2 = tmp;
}
tmp = b[r1], b[r1] = b[r2], b[r2] = tmp;
}
void solve(double *a, double *b, double *x, int n)
{
#define A(y, x) (*mat_elem(a, y, x, n))
int i, j, col, row, max_row, dia;
double max, tmp;
for (dia = 0; dia < n; dia++) {
max_row = dia, max = fabs(A(dia, dia));
for (row = dia + 1; row < n; row++)
if ((tmp = fabs(A(row, dia))) > max) max_row = row, max = tmp;
swap_row(a, b, dia, max_row, n);
for (row = dia + 1; row < n; row++) {
tmp = A(row, dia) / A(dia, dia);
for (col = dia+1; col < n; col++)
A(row, col) -= tmp * A(dia, col);
A(row, dia) = 0;
b[row] -= tmp * b[dia];
}
}
for (row = n - 1; row >= 0; row--) {
tmp = b[row];
for (j = n - 1; j > row; j--) tmp -= x[j] * A(row, j);
x[row] = tmp / A(row, row);
}
#undef A
}
This isn't extensively tested, but seems to be working fine.
Let base point of line is p and unit direction vector is d.
Then distance from point v to this line might be calculated using cross product
SquaredDist = ((v - p) x d)^2
Using Maple packet symbolic calculation, we can get
d := <dx, dy, dz>;
v := <vx, vy, vz>;
p := <px, py, pz>;
w := v - p;
cp := CrossProduct(d, w);
nrm := BilinearForm(cp, cp, conjugate=false); //squared dist
nr := expand(nrm);
//now partial derivatives
nrx := diff(nr, vx);
//results:
nrx := -2*dz^2*px-2*dy^2*px+2*dz^2*vx+2*dy^2*vx
+2*dx*py*dy-2*dx*vy*dy+2*dz*dx*pz-2*dz*dx*vz
nry := -2*dx^2*py-2*dz^2*py-2*dy*vz*dz+2*dx^2*vy
+2*dz^2*vy+2*dy*pz*dz+2*dx*dy*px-2*dx*dy*vx
nrz := -2*dy^2*pz+2*dy^2*vz-2*dy*dz*vy+2*dx^2*vz
-2*dx^2*pz-2*dz*vx*dx+2*dy*dz*py+2*dz*px*dx
To minimize sum of squared distances, we have to make system of linear equations for zero partial derivatives like this:
vx*2*(Sum(dz^2)+Sum(dy^2)) + vy * (-2*Sum(dx*dy)) + vz *(-2*Sum(dz*dx)) =
2*Sum(dz^2*px)-2*Sum(dy^2*px) -2*Sum(dx*py*dy)-2*Sum(dz*dx*pz)
where
Sum(dz^2) = Sum{over all i in line indexes} {dz[i] * dz[i]}
and solve it for unknowns vx, vy, vz
Edit: Old erroneous answer for planes instead of lines, left for reference
If we use general equation of line
A * x + B * y + C * z + D = 0
then distance from point (x, y, z) to this line is
Dist = Abs(A * x + B * y + C * z + D) / Sqrt(A^2 + B^2 + C^2)
To simplify - just normalize all line equations dividing by Norm's
Norm = Sqrt(A^2 + B^2 + C^2)
a = A / Norm
b = B / Norm
c = C / Norm
d = D / Norm
now equation is
a * x + b * y + c * z + d = 0
and distance
Dist = Abs(a * x + b * y + c * z + d)
and we can use squared distances like LS method (ai, bi, ci, di are coefficients for i-th line)
F = Sum(ai*x + bi*y + ci * z + d)^2 =
Sum(ai^2*x^2 + bi^2*y^2 + ci^2*z^2 + d^2 +
2 * (ai*bi*x*y + ai*ci*x*z + bi*y*ci*z + ai*x*di + bi*y*di + ci*z*di))
partial derivatives
dF/dx = 2*Sum(ai^2*x + ai*bi*y + ai*ci*z + ai*di) = 0
dF/dy = 2*Sum(bi^2*y + ai*bi*x + bi*ci*z + bi*di) = 0
dF/dz = 2*Sum(ci^2*z + ai*ci*x + bi*ci*y + ci*di) = 0
so we have system of linear equation
x * Sum(ai^2) + y * Sum(ai*bi) + z * Sum(ai*ci)= - Sum(ai*di)
y * Sum(bi^2) + x * Sum(ai*bi) + z * Sum(bi*ci)= - Sum(bi*di)
z * Sum(ci^2) + x * Sum(ai*ci) + y * Sum(bi*ci)= - Sum(ci*di)
x * Saa + y * Sab + z * Sac = - Sad
x * Sab + y * Sbb + z * Sbc = - Sbd
x * Sac + y * Sbc + z * Scc = - Scd
where S** are corresponding sums
and can solve it for unknowns x, y, z
I needed this for a sketch in Processing, so I ported Gene's answer. Works great and thought it might save someone else a little time. Unfortunately PVector/PMatrix don't have array accessors for vectors or matrices so I had to add these as local functions.
float getv(PVector v, int i) {
if(i == 0) return v.x;
if(i == 1) return v.y;
return v.z;
}
void setv(PVector v, int i, float value) {
if (i == 0) v.x = value;
else if (i == 1) v.y = value;
else v.z = value;
}
void incv(PVector v, int i, float value) {
setv(v,i,getv(v,i) + value);
}
float getm(float[] mm, int r, int c) { return mm[c + r*4]; }
void setm(float[] mm, int r, int c, float value) { mm[c + r*4] = value; }
void incm(float[] mm, int r, int c, float value) { mm[c + r*4] += value; }
PVector findNearestPoint(PVector a[], PVector d[]) {
var mm = new float[16];
var b = new PVector();
var n = a.length;
for (int i = 0; i < n; ++i) {
var d2 = d[i].dot(d[i]);
var da = d[i].dot(a[i]);
for (int ii = 0; ii < 3; ++ii) {
for (int jj = 0; jj < 3; ++jj) {
incm(mm,ii,jj, getv(d[i],ii) * getv(d[i],jj));
}
incm(mm, ii,ii, -d2);
incv(b, ii, getv(d[i], ii) * da - getv(a[i], ii) * d2);
}
}
var p = solve(mm, new float[] {b.x, b.y, b.z});
return new PVector(p[0],p[1],p[2]);
}
// Verifier
float dist2(PVector p, PVector a, PVector d) {
PVector pa = new PVector( a.x-p.x, a.y-p.y, a.z-p.z );
float dpa = d.dot(pa);
return d.dot(d) * pa.dot(pa) - dpa * dpa;
}
//double sum_dist2(VEC p, VEC a[], VEC d[], int n) {
float sum_dist2(PVector p, PVector a[], PVector d[]) {
int n = a.length;
float sum = 0;
for (int i = 0; i < n; ++i) {
sum += dist2(p, a[i], d[i]);
}
return sum;
}
// Check 26 nearby points and verify the provided one is nearest.
boolean isNearest(PVector p, PVector a[], PVector d[]) {
float min_d2 = 3.4028235E38;
int ii = 2, jj = 2, kk = 2;
final float D = 0.1f;
for (int i = -1; i <= 1; ++i)
for (int j = -1; j <= 1; ++j)
for (int k = -1; k <= 1; ++k) {
PVector pp = new PVector( p.x + D * i, p.y + D * j, p.z + D * k );
float d2 = sum_dist2(pp, a, d);
// Prefer provided point among equals.
if (d2 < min_d2 || i == 0 && j == 0 && k == 0 && d2 == min_d2) {
min_d2 = d2;
ii = i; jj = j; kk = k;
}
}
return ii == 0 && jj == 0 && kk == 0;
}
void setup() {
PVector a[] = {
new PVector(-14.2, 17, -1),
new PVector(1, 1, 1),
new PVector(2.3, 4.1, 9.8),
new PVector(1,2,3)
};
PVector d[] = {
new PVector(1.3, 1.3, -10),
new PVector(12.1, -17.2, 1.1),
new PVector(19.2, 31.8, 3.5),
new PVector(4,5,6)
};
int n = 4;
for (int i = 0; i < n; ++i)
d[i].normalize();
PVector p = findNearestPoint(a, d);
println(p);
if (!isNearest(p, a, d))
println("Woops. Not nearest.\n");
}
// From rosettacode (with bug fix: added a missing fabs())
int mat_elem(int y, int x) { return y*4+x; }
void swap_row(float[] a, float[] b, int r1, int r2, int n)
{
float tmp;
int p1, p2;
int i;
if (r1 == r2) return;
for (i = 0; i < n; i++) {
p1 = mat_elem(r1, i);
p2 = mat_elem(r2, i);
tmp = a[p1];
a[p1] = a[p2];
a[p2] = tmp;
}
tmp = b[r1];
b[r1] = b[r2];
b[r2] = tmp;
}
float[] solve(float[] a, float[] b)
{
float[] x = new float[] {0,0,0};
int n = x.length;
int i, j, col, row, max_row, dia;
float max, tmp;
for (dia = 0; dia < n; dia++) {
max_row = dia;
max = abs(getm(a, dia, dia));
for (row = dia + 1; row < n; row++) {
if ((tmp = abs(getm(a, row, dia))) > max) {
max_row = row;
max = tmp;
}
}
swap_row(a, b, dia, max_row, n);
for (row = dia + 1; row < n; row++) {
tmp = getm(a, row, dia) / getm(a, dia, dia);
for (col = dia+1; col < n; col++) {
incm(a, row, col, -tmp * getm(a, dia, col));
}
setm(a,row,dia, 0);
b[row] -= tmp * b[dia];
}
}
for (row = n - 1; row >= 0; row--) {
tmp = b[row];
for (j = n - 1; j > row; j--) {
tmp -= x[j] * getm(a, row, j);
}
x[row] = tmp / getm(a, row, row);
}
return x;
}
I am using Sutherland Hodgman's Algorithm inorder to clip the polygon overlay for clipping Spatial Query. And I want to implement erase overlay function as well. So suggest me some algorithm like Sutherland Hodgman Algorithm or Can anyone modify this Sutherland Hodgman Algorithm for me.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct { double x, y; } vec_t, *vec;
inline double dot(vec a, vec b)
{
return a->x * b->x + a->y * b->y;
}
inline double cross(vec a, vec b)
{
return a->x * b->y - a->y * b->x;
}
inline vec vsub(vec a, vec b, vec res)
{
res->x = a->x - b->x;
res->y = a->y - b->y;
return res;
}
/* tells if vec c lies on the left side of directed edge a->b
* 1 if left, -1 if right, 0 if colinear
*/
int left_of(vec a, vec b, vec c)
{
vec_t tmp1, tmp2;
double x;
vsub(b, a, &tmp1);
vsub(c, b, &tmp2);
x = cross(&tmp1, &tmp2);
return x < 0 ? -1 : x > 0;
}
int line_sect(vec x0, vec x1, vec y0, vec y1, vec res)
{
vec_t dx, dy, d;
vsub(x1, x0, &dx);
vsub(y1, y0, &dy);
vsub(x0, y0, &d);
/* x0 + a dx = y0 + b dy ->
x0 X dx = y0 X dx + b dy X dx ->
b = (x0 - y0) X dx / (dy X dx) */
double dyx = cross(&dy, &dx);
if (!dyx) return 0;
dyx = cross(&d, &dx) / dyx;
if (dyx <= 0 || dyx >= 1) return 0;
res->x = y0->x + dyx * dy.x;
res->y = y0->y + dyx * dy.y;
return 1;
}
/* === polygon stuff === */
typedef struct { int len, alloc; vec v; } poly_t, *poly;
poly poly_new()
{
poly p = (poly)malloc(sizeof(poly_t));
p->len = p->alloc = 0;
p->v = 0;
return p;
}
void poly_free(poly p)
{
if (p->alloc) {
free(p->v);
free(p);
}
}
void poly_append(poly p, vec v)
{
if (p->len >= p->alloc) {
p->alloc *= 2;
if (!p->alloc) p->alloc = 4;
p->v = (vec)realloc(p->v, sizeof(vec_t) * p->alloc);
}
p->v[p->len++] = *v;
}
/* this works only if all of the following are true:
* 1. poly has no colinear edges;
* 2. poly has no duplicate vertices;
* 3. poly has at least three vertices;
* 4. poly is convex (implying 3).
*/
int poly_winding(poly p)
{
return left_of(&p->v[0], &p->v[1], &p->v[2]);
}
void poly_edge_clip(poly sub, vec x0, vec x1, int left, poly res)
{
int i, side0, side1;
vec_t tmp;
vec v0 = &sub->v[sub->len - 1], v1;
res->len = 0;
side0 = left_of(x0, x1, v0);
if (side0 != -left) poly_append(res, v0);
for (i = 0; i < sub->len; i++) {
v1 = &sub->v[i];
side1 = left_of(x0, x1, v1);
if (side0 + side1 == 0 && side0)
/* last point and current straddle the edge */
if (line_sect(x0, x1, v0, v1, &tmp))
poly_append(res, &tmp);
if (i == sub->len - 1) break;
if (side1 != -left) poly_append(res, v1);
v0 = v1;
side0 = side1;
}
}
poly poly_clip(poly sub, poly clip)
{
int i;
poly p1 = poly_new(), p2 = poly_new(), tmp;
int dir = poly_winding(clip);
poly_edge_clip(sub, &clip->v[clip->len - 1], &clip->v[0], dir, p2);
for (i = 0; i < clip->len - 1; i++) {
tmp = p2; p2 = p1; p1 = tmp;
poly_edge_clip(p1, &clip->v[i], &clip->v[i + 1], dir, p2);
}
poly_free(p1);
return p2;
}
int main()
{
int i;
vec_t c[] = {{200,200}, {400,200}, {400,400}, {200,400}};
//vec_t c[] = {{100,300}, {300,300}, {300,100}, {100,100}};
vec_t s[] = { {50,150}, {200,50}, {350,150},
{350,300},{250,300},{200,250},
{150,350},{100,250},{100,200}};
#define clen (sizeof(c)/sizeof(vec_t))
#define slen (sizeof(s)/sizeof(vec_t))
poly_t clipper = {clen, 0, c};
poly_t subject = {slen, 0, s};
poly res = poly_clip(&subject, &clipper);
for (i = 0; i < res->len; i++)
printf("%g %g\n", res->v[i].x, res->v[i].y);
/* long and arduous EPS printout */
FILE * eps = fopen("test.eps", "w");
fprintf(eps, "%%!PS-Adobe-3.0\n%%%%BoundingBox: 40 40 360 360\n"
"/l {lineto} def /m{moveto} def /s{setrgbcolor} def"
"/c {closepath} def /gs {fill grestore stroke} def\n");
fprintf(eps, "0 setlinewidth %g %g m ", c[0].x, c[0].y);
for (i = 1; i < clen; i++)
fprintf(eps, "%g %g l ", c[i].x, c[i].y);
fprintf(eps, "c .5 0 0 s gsave 1 .7 .7 s gs\n");
fprintf(eps, "%g %g m ", s[0].x, s[0].y);
for (i = 1; i < slen; i++)
fprintf(eps, "%g %g l ", s[i].x, s[i].y);
fprintf(eps, "c 0 .2 .5 s gsave .4 .7 1 s gs\n");
fprintf(eps, "2 setlinewidth [10 8] 0 setdash %g %g m ",
res->v[0].x, res->v[0].y);
for (i = 1; i < res->len; i++)
fprintf(eps, "%g %g l ", res->v[i].x, res->v[i].y);
fprintf(eps, "c .5 0 .5 s gsave .7 .3 .8 s gs\n");
fprintf(eps, "%%%%EOF");
fclose(eps);
printf("test.eps written\n");
return 0;
}
I was trying to parallelize the gaussian blur function using OpenMP,
but I am new at OpenMP, and when I tried to parallelize the two for loops (I don't think there are any variables that need to be private for each thread), it ended up
running even slower than before, and the output was different. So did I do anything wrong? What should I do to make it run faster?
void gaussian_blur(float **src, float **dst, int w, int h, float sigma)
{
int x, y, i;
int ksize = (int)(sigma * 2.f * 4.f + 1) | 1;
int halfk = ksize / 2;
float scale = -0.5f/(sigma*sigma);
float sum = 0.f;
float *kernel, *ringbuf;
int xmax = w - halfk;
int ymax = h - halfk;
// if sigma too small, just copy src to dst
if (ksize <= 1)
{
for (y = 0; y < h; y++)
for (x = 0; x < w; x++)
dst[y][x] = src[y][x];
return;
}
// create Gaussian kernel
kernel = malloc(ksize * sizeof(float));
ringbuf = malloc(ksize * sizeof(float));
#pragma omp parallel for reduction(+ : sum)
for (i = 0; i < ksize; i++)
{
float x = (float)(i - halfk);
float t = expf(scale * x * x);
kernel[i] = t;
sum += t;
}
scale = 1.f / sum;
#pragma omp parallel for
for (i = 0; i < ksize; i++)
kernel[i] *= scale;
// blur each row
#pragma omp parallel for // this is the for loop I parallelized but ended up with wrong output and running slower
for (y = 0; y < h; y++)
{
int x1;
int bufi0 = ksize-1;
float tmp = src[y][0];
for (x1 = 0; x1 < halfk ; x1++) ringbuf[x1] = tmp;
for (; x1 < ksize-1; x1++) ringbuf[x1] = src[y][x1-halfk];
for (x1 = 0; x1 < w; x1++)
{
if(x1 < xmax)
ringbuf[bufi0++] = src[y][x1+halfk];
else
ringbuf[bufi0++] = src[y][w-1];
if (bufi0 == ksize) bufi0 = 0;
dst[y][x1] = convolve(kernel, ringbuf, ksize, bufi0);
}
}
// blur each column
#pragma omp parallel for // this is the for loop I parallelized but ended up with wrong output and running slower
for (x = 0; x < w; x++)
{
int y1;
int bufi0 = ksize-1;
float tmp = dst[0][x];
for (y1 = 0; y1 < halfk ; y1++) ringbuf[y1] = tmp;
for ( ; y1 < ksize-1; y1++) ringbuf[y1] = dst[y1-halfk][x];
for (y1 = 0; y1 < h; y1++)
{
if(y1 < ymax)
ringbuf[bufi0++] = dst[y1+halfk][x];
else
ringbuf[bufi0++] = dst[h-1][x];
if (bufi0 == ksize) bufi0 = 0;
dst[y1][x] = convolve(kernel, ringbuf, ksize, bufi0);
}
}
// clean up
free(kernel);
free(ringbuf);
}
Besides the need to properly identify private and shared data, there are several things that you could do in order to speed up your program.
As a first step you should remove any unnecessary concurrency. For example, how big ksize happens to be on average? If it is less than several hundred elements, it makes absolutely no sense to employ OpenMP for such simple operations as computing the kernel and then normalising it:
#pragma omp parallel for reduction(+ : sum)
for (i = 0; i < ksize; i++)
{
float x = (float)(i - halfk);
float t = expf(scale * x * x);
kernel[i] = t;
sum += t;
}
scale = 1.f / sum;
#pragma omp parallel for
for (i = 0; i < ksize; i++)
kernel[i] *= scale;
On a typical modern CPU it would take more cycles to bootstrap the parallel regions than to compute this on a single core. Also on modern CPUs these loops can be unrolled and vectorised and you can get up to 8x boost on a single core. If the kernel is too small, then besides OpenMP overhead you will also get slowdown from excessive false sharing. You have to make sure that each thread gets an exact multiple of 16 elements (64 bytes of cache line size / sizeof(float)) to work on in order to prevent false sharing.
You also have to make sure that threads do not share cache lines in the column blur section.
// blur each column
#pragma omp parallel for
for (x = 0; x < w; x++)
{
...
for (y1 = 0; y1 < h; y1++)
{
...
dst[y1][x] = convolve(kernel, ringbuf, ksize, bufi0);
}
}
Because of the access pattern here, you have to make sure that each thread gets a chunk of columns that is a multiple of 16 or else there will be a border overlap area of 16*y1 pixels shared by every two consecutive threads where excessive false sharing will occur. If you cannot guarantee that w is divisible by 16, then you can give each thread a starting offset in the y direction, e.g. the innermost loop becomes:
int tid = omp_get_thread_num();
for (y1 = 2*tid; y1 < h; y1++)
{
...
}
for (y1 = 0; y1 < 2*tid; y1++)
{
...
}
The multiplier 2 is arbitrary. The idea is to give the next thread several rows of advance in comparison to the current one so that both threads will not be processing the same line at once at any moment in time. You could also use addition and modulo arithmetic to compute y1, i.e.
for (y2 = 0; y2 < h; y2++)
{
y1 = (y2 + 2*tid) % h;
...
}
but this is generally slower than just separating the loop in two parts.
Also mind your data size. The last level cache (LLC) has very high but still limited bandwidth. If data cannot fit in the private cache of each core then compiler optimisations such as loop vectorisations can put very high pressure on the LLC. Things get more ugly if data doesn't fit in the LLC and therefore the main memory has to be accessed.
If you don't know what false sharing is, there is an article in Dr.Dobb's that kind of explains it here.
I may have fixed your code. You did not post your convolve function so it's difficult to say for sure but I'm not sure it matters. There are at least two bugs. There is a race condition in the ringbuf array. To fix this I extend the array times the number of threads.
ringbuf = (float*)malloc(nthreads*ksize * sizeof(float));
To access the array do something like this
int ithread = omp_get_thread_num();
ringbuf[ksize*ithread + x1]
Edit: I added some code which defines ringbuf inside the parallel block. That way you don't have to access ringbuf based on the thread number.
The second errors is the ibufi0 variable. I defined a new one like this
const int ibufi0_fix = (x1+ksize-1)%ksize;
Below is the code I used to check it. Replace with your convolve function. Note, this may still be quite inefficient. There are probably cache issues such as cache misses and false sharing (particularly when you convolve vertically). Hopefully, though, the image will be correct now.
Edit: here is a paper by Intel that shows how to do this best with AVX. It's optimized to minimize the cache misses. I'm not sure it's optimized for threading though.
http://software.intel.com/en-us/articles/iir-gaussian-blur-filter-implementation-using-intel-advanced-vector-extensions
I'm writing my own function on this (it's actually the reason I started learning OpenMP) which uses SSE/AVX as well. There are a lot of similarities with matrix multiplication and image filtering so I learned how to optimized matrix multiplication first and will do Gaussian Blur shortly...
#include "math.h"
#include "omp.h"
#include "stdio.h"
#include <nmmintrin.h>
float convolve(const float *kernel, const float *ringbuf, const int ksize, const int bufi0) {
float sum = 0.0f;
for(int i=0; i<ksize; i++) {
sum += kernel[i]*ringbuf[i];
}
return sum;
}
void gaussian_blur(float *src, float *dst, int w, int h, float sigma, int nthreads)
{
int x, y, i;
int ksize = (int)(sigma * 2.f * 4.f + 1) | 1;
int halfk = ksize / 2;
printf("ksize %d\n", ksize);
float scale = -0.5f/(sigma*sigma);
float sum = 0.f;
float *kernel, *ringbuf;
int xmax = w - halfk;
int ymax = h - halfk;
// if sigma too small, just copy src to dst
if (ksize <= 1)
{
for (y = 0; y < h; y++)
for (x = 0; x < w; x++)
dst[y*w + x] = src[y*w + x];
return;
}
// create Gaussian kernel
//kernel = malloc(ksize * sizeof(float));
kernel = (float*)_mm_malloc(ksize * sizeof(float),16);
//ringbuf = malloc(ksize * sizeof(float));
ringbuf = (float*)_mm_malloc(nthreads*ksize * sizeof(float),16);
#pragma omp parallel for reduction(+ : sum) if(nthreads>1)
for (i = 0; i < ksize; i++)
{
float x = (float)(i - halfk);
float t = expf(scale * x * x);
kernel[i] = t;
sum += t;
}
scale = 1.f / sum;
#pragma omp parallel for if(nthreads>1)
for (i = 0; i < ksize; i++)
kernel[i] *= scale;
// blur each row
#pragma omp parallel for if(nthreads>1)// this is the for loop I parallelized but ended up with wrong output and running slower
for (y = 0; y < h; y++)
{
int ithread = omp_get_thread_num();
//printf("nthread %d\n", nthread);
int x1;
int bufi0 = ksize-1;
float tmp = src[y*w + 0];
for (x1 = 0; x1 < halfk ; x1++) ringbuf[ksize*ithread + x1] = tmp;
for (; x1 < ksize-1; x1++) ringbuf[ksize*ithread + x1] = src[y*w + x1-halfk];
for (x1 = 0; x1 < w; x1++)
{
const int ibufi0_fix = (x1+ksize-1)%ksize;
if(x1 < xmax)
ringbuf[ksize*ithread + ibufi0_fix] = src[y*w + x1+halfk];
else
ringbuf[ksize*ithread + ibufi0_fix] = src[y*w + w-1];
if (bufi0 == ksize) bufi0 = 0;
dst[y*w + x1] = convolve(kernel, &ringbuf[ksize*ithread], ksize, bufi0);
}
}
// blur each column
#pragma omp parallel for if(nthreads>1)// this is the for loop I parallelized but ended up with wrong output and running slower
for (x = 0; x < w; x++)
{
int ithread = omp_get_thread_num();
int y1;
int bufi0 = ksize-1;
float tmp = dst[0*w + x];
for (y1 = 0; y1 < halfk ; y1++) ringbuf[ksize*ithread + y1] = tmp;
for ( ; y1 < ksize-1; y1++) ringbuf[ksize*ithread + y1] = dst[(y1-halfk)*w + x];
for (y1 = 0; y1 < h; y1++)
{
const int ibufi0_fix = (y1+ksize-1)%ksize;
if(y1 < ymax)
ringbuf[ibufi0_fix] = dst[(y1+halfk)*w + x];
else
ringbuf[ibufi0_fix] = dst[(h-1)*w + x];
if (bufi0 == ksize) bufi0 = 0;
dst[y1*w + x] = convolve(kernel, &ringbuf[ksize*ithread], ksize, bufi0);
}
}
// clean up
_mm_free(kernel);
_mm_free(ringbuf);
}
int compare(float *dst1, float *dst2, const int n) {
int error = 0;
for(int i=0; i<n; i++) {
if(*dst1 != *dst2) error++;
}
return error;
}
int main() {
const int w = 20;
const int h = 20;
float *src = (float*)_mm_malloc(w*h*sizeof(float),16);
float *dst1 = (float*)_mm_malloc(w*h*sizeof(float),16);
float *dst2 = (float*)_mm_malloc(w*h*sizeof(float),16);
for(int i=0; i<w*h; i++) {
src[i] = i;
}
gaussian_blur(src, dst1, w, h, 1.0f, 1);
gaussian_blur(src, dst2, w, h, 1.0f, 4);
int error = compare(dst1, dst2, w*h);
printf("error %d\n", error);
_mm_free(src);
_mm_free(dst1);
_mm_free(dst2);
}
Edit: here is code which defines ringbuf inside the parallel block based on the comment by Hristo. It should be equivalent.
#include "math.h"
#include "omp.h"
#include "stdio.h"
#include <nmmintrin.h>
float convolve(const float *kernel, const float *ringbuf, const int ksize, const int bufi0) {
float sum = 0.0f;
for(int i=0; i<ksize; i++) {
sum += kernel[i]*ringbuf[i];
}
return sum;
}
void gaussian_blur(float *src, float *dst, int w, int h, float sigma, int nthreads)
{
int x, y, i;
int ksize = (int)(sigma * 2.f * 4.f + 1) | 1;
int halfk = ksize / 2;
printf("ksize %d\n", ksize);
float scale = -0.5f/(sigma*sigma);
float sum = 0.f;
float *kernel;
int xmax = w - halfk;
int ymax = h - halfk;
// if sigma too small, just copy src to dst
if (ksize <= 1)
{
for (y = 0; y < h; y++)
for (x = 0; x < w; x++)
dst[y*w + x] = src[y*w + x];
return;
}
// create Gaussian kernel
//kernel = malloc(ksize * sizeof(float));
kernel = (float*)_mm_malloc(ksize * sizeof(float),16);
#pragma omp parallel for reduction(+ : sum) if(nthreads>1)
for (i = 0; i < ksize; i++)
{
float x = (float)(i - halfk);
float t = expf(scale * x * x);
kernel[i] = t;
sum += t;
}
scale = 1.f / sum;
#pragma omp parallel for if(nthreads>1)
for (i = 0; i < ksize; i++)
kernel[i] *= scale;
// blur each row
//#pragma omp parallel for if(nthreads>1)// this is the for loop I parallelized but ended up with wrong output and running slower
#pragma omp parallel if(nthreads>1)
{
float *ringbuf = (float*)_mm_malloc(ksize * sizeof(float),16);
#pragma omp for// this is the for loop I parallelized but ended up with wrong output and running slower
for (y = 0; y < h; y++)
{
//printf("nthread %d\n", nthread);
int x1;
int bufi0 = ksize-1;
float tmp = src[y*w + 0];
for (x1 = 0; x1 < halfk ; x1++) ringbuf[x1] = tmp;
for (; x1 < ksize-1; x1++) ringbuf[x1] = src[y*w + x1-halfk];
for (x1 = 0; x1 < w; x1++)
{
const int ibufi0_fix = (x1+ksize-1)%ksize;
if(x1 < xmax)
ringbuf[ibufi0_fix] = src[y*w + x1+halfk];
else
ringbuf[ibufi0_fix] = src[y*w + w-1];
if (bufi0 == ksize) bufi0 = 0;
dst[y*w + x1] = convolve(kernel, ringbuf, ksize, bufi0);
}
}
_mm_free(ringbuf);
}
// blur each column
#pragma omp parralel if(ntheads>1)
{
float *ringbuf = (float*)_mm_malloc(ksize * sizeof(float),16);
#pragma omp for// this is the for loop I parallelized but ended up with wrong output and running slower
for (x = 0; x < w; x++)
{
int y1;
int bufi0 = ksize-1;
float tmp = dst[0*w + x];
for (y1 = 0; y1 < halfk ; y1++) ringbuf[y1] = tmp;
for ( ; y1 < ksize-1; y1++) ringbuf[y1] = dst[(y1-halfk)*w + x];
for (y1 = 0; y1 < h; y1++)
{
const int ibufi0_fix = (y1+ksize-1)%ksize;
if(y1 < ymax)
ringbuf[ibufi0_fix] = dst[(y1+halfk)*w + x];
else
ringbuf[ibufi0_fix] = dst[(h-1)*w + x];
if (bufi0 == ksize) bufi0 = 0;
dst[y1*w + x] = convolve(kernel, ringbuf, ksize, bufi0);
}
}
_mm_free(ringbuf);
}
// clean up
_mm_free(kernel);
}
int compare(float *dst1, float *dst2, const int n) {
int error = 0;
for(int i=0; i<n; i++) {
if(*dst1 != *dst2) error++;
}
return error;
}
int main() {
const int w = 20;
const int h = 20;
float *src = (float*)_mm_malloc(w*h*sizeof(float),16);
float *dst1 = (float*)_mm_malloc(w*h*sizeof(float),16);
float *dst2 = (float*)_mm_malloc(w*h*sizeof(float),16);
for(int i=0; i<w*h; i++) {
src[i] = i;
}
gaussian_blur(src, dst1, w, h, 1.0f, 1);
gaussian_blur(src, dst2, w, h, 1.0f, 4);
int error = compare(dst1, dst2, w*h);
printf("error %d\n", error);
_mm_free(src);
_mm_free(dst1);
_mm_free(dst2);
}
I want to split a bezier curve into a polygonal chain with n straight lines. The number of lines being dependent on a maximum allowed angle between 2 connecting lines.
I'm looking for an algorithm to find the most optimal solution (ie to reduce as much as possible the number of straight lines).
I know how to split a bezier curve using Casteljau or Bernstein polynomals. I tried dividing the bezier into half calculate the angle between the straight lines, and split again if the angle between the connecting lines is within a certain threshold range, but i may run into shortcuts.
Is there a known algorithm or pseudo code available to do this conversion?
Use de Casteljau algorithm recursively until the control points are approximately collinear. See for instance http://www.antigrain.com/research/adaptive_bezier/index.html.
This was a fascinating topic. The only thing I'm adding is tested C# code, to perhaps save somebody the trouble. And I tried to write for clarity as opposed to speed, so it mostly follows the AGG web site's PDF doc (see above) on the Casteljau algorithm. The Notation follows the diagram in that PDF.
public class Bezier
{
public PointF P1; // Begin Point
public PointF P2; // Control Point
public PointF P3; // Control Point
public PointF P4; // End Point
// Made these global so I could diagram the top solution
public Line L12;
public Line L23;
public Line L34;
public PointF P12;
public PointF P23;
public PointF P34;
public Line L1223;
public Line L2334;
public PointF P123;
public PointF P234;
public Line L123234;
public PointF P1234;
public Bezier(PointF p1, PointF p2, PointF p3, PointF p4)
{
P1 = p1; P2 = p2; P3 = p3; P4 = p4;
}
/// <summary>
/// Consider the classic Casteljau diagram
/// with the bezier points p1, p2, p3, p4 and lines l12, l23, l34
/// and their midpoint of line l12 being p12 ...
/// and the line between p12 p23 being L1223
/// and the midpoint of line L1223 being P1223 ...
/// </summary>
/// <param name="lines"></param>
public void SplitBezier( List<Line> lines)
{
L12 = new Line(this.P1, this.P2);
L23 = new Line(this.P2, this.P3);
L34 = new Line(this.P3, this.P4);
P12 = L12.MidPoint();
P23 = L23.MidPoint();
P34 = L34.MidPoint();
L1223 = new Line(P12, P23);
L2334 = new Line(P23, P34);
P123 = L1223.MidPoint();
P234 = L2334.MidPoint();
L123234 = new Line(P123, P234);
P1234 = L123234.MidPoint();
if (CurveIsFlat())
{
lines.Add(new Line(this.P1, this.P4));
return;
}
else
{
Bezier bz1 = new Bezier(this.P1, P12, P123, P1234);
bz1.SplitBezier(lines);
Bezier bz2 = new Bezier(P1234, P234, P34, this.P4);
bz2.SplitBezier(lines);
}
return;
}
/// <summary>
/// Check if points P1, P1234 and P2 are colinear (enough).
/// This is very simple-minded algo... there are better...
/// </summary>
/// <returns></returns>
public bool CurveIsFlat()
{
float t1 = (P2.Y - P1.Y) * (P3.X - P2.X);
float t2 = (P3.Y - P2.Y) * (P2.X - P1.X);
float delta = Math.Abs(t1 - t2);
return delta < 0.1; // Hard-coded constant
}
The PointF is from System.Drawing, and the Line class follows:
public class Line
{
PointF P1; PointF P2;
public Line(PointF pt1, PointF pt2)
{
P1 = pt1; P2 = pt2;
}
public PointF MidPoint()
{
return new PointF((P1.X + P2.X) / 2f, (P1.Y + P2.Y) / 2f);
}
}
A sample call creates the Bezier object with 4 points (begin, 2 control, and end), and returns a list of lines that approximate the Bezier:
TopBezier = new Bezier(Point1, Point2, Point3, Point4 );
List<Line> lines = new List<Line>();
TopBezier.SplitBezier(lines);
Thanks to Dr Jerry, AGG, and all the other contributors.
There are some alternatives for RSA flattening that are reported to be faster:
RSA vs PAA:
http://www.cis.usouthal.edu/~hain/general/Theses/Ahmad_thesis.pdf
RSA vs CAA vs PAA:
http://www.cis.usouthal.edu/~hain/general/Theses/Racherla_thesis.pdf
RSA = Recursive Subdivision Algorithm
PAA = Parabolic Approximation Algorithm
CAA = Circular Approximation Algorithm
According to Rachela, CAA is slower than the PAA by a factor of 1.5–2. CAA is as slow as RSA, but achieves required flatness better in offset curves.
It seems that PAA is best choice for actual curve and CAA is best for offset's of curve (when stroking curves).
I have tested PAA of both thesis, but they fail in some cases. Ahmad's PAA fails in collinear cases (all points on same line) and Rachela's PAA fails in collinear cases and in cases where both control points are equal. With some fixes, it may be possible to get them work as expected.
A visual example on my website -> DXF -> polybezier.
it is basically a recursive split with casteljau.
Bezier2Poly.prototype.convert = function(array,init) {
if (init) {
this.vertices = [];
}
if (!init && (Math.abs(this.controlPointsDiff(array[0], array[2])) < this.threshold
|| Math.abs(this.controlPointsDiff({x:array[2].x-array[1].x, y:array[2]-array[1].y}, array[2])) < this.threshold)) {
this.vertices.push(array[2]);
} else {
var split = this.splitBezier(array);
this.convert(split.b1);
this.convert(split.b2);
}
return this.vertices;
}
And judgement by: calculating the angle between the controlpoints and the line through the endpoint.
Bezier2Poly.prototype.controlPointsDiff = function (vector1, vector2) {
var angleCp1 = Math.atan2(vector1.y, vector1.x);
var angleCp2 = Math.atan2(vector2.y, vector2.x);
return angleCp1 - angleCp2;
}
i solve it with qt for any svg path including bezier curve , i found in svg module a static function in qsvghandler.cpp which parsePathDataFast from your svg path to QPainterPath and the cherry on the cake!! QPainterPath have three native functions to convert your path to polygon (the big one toFillPolygon and the others which split in a list of polygon toSubpathPolygons or toFillPolygons) along with nice stuff like bounding box, intersected, translate ... ready to use with Boost::Geometry now, not so bad!
the header parsepathdatafast.h
#ifndef PARSEPATHDATAFAST_H
#define PARSEPATHDATAFAST_H
#include <QPainterPath>
#include <QString>
bool parsePathDataFast(const QStringRef &dataStr, QPainterPath &path);
#endif // PARSEPATHDATAFAST_H
the code parsepathdatafast.cpp
#include <QtCore/qmath.h>
#include <QtMath>
#include <QChar>
#include <QByteArray>
#include <QMatrix>
#include <parsepathdatafast.h>
Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
// '0' is 0x30 and '9' is 0x39
static inline bool isDigit(ushort ch)
{
static quint16 magic = 0x3ff;
return ((ch >> 4) == 3) && (magic >> (ch & 15));
}
static qreal toDouble(const QChar *&str)
{
const int maxLen = 255;//technically doubles can go til 308+ but whatever
char temp[maxLen+1];
int pos = 0;
if (*str == QLatin1Char('-')) {
temp[pos++] = '-';
++str;
} else if (*str == QLatin1Char('+')) {
++str;
}
while (isDigit(str->unicode()) && pos < maxLen) {
temp[pos++] = str->toLatin1();
++str;
}
if (*str == QLatin1Char('.') && pos < maxLen) {
temp[pos++] = '.';
++str;
}
while (isDigit(str->unicode()) && pos < maxLen) {
temp[pos++] = str->toLatin1();
++str;
}
bool exponent = false;
if ((*str == QLatin1Char('e') || *str == QLatin1Char('E')) && pos < maxLen) {
exponent = true;
temp[pos++] = 'e';
++str;
if ((*str == QLatin1Char('-') || *str == QLatin1Char('+')) && pos < maxLen) {
temp[pos++] = str->toLatin1();
++str;
}
while (isDigit(str->unicode()) && pos < maxLen) {
temp[pos++] = str->toLatin1();
++str;
}
}
temp[pos] = '\0';
qreal val;
if (!exponent && pos < 10) {
int ival = 0;
const char *t = temp;
bool neg = false;
if(*t == '-') {
neg = true;
++t;
}
while(*t && *t != '.') {
ival *= 10;
ival += (*t) - '0';
++t;
}
if(*t == '.') {
++t;
int div = 1;
while(*t) {
ival *= 10;
ival += (*t) - '0';
div *= 10;
++t;
}
val = ((qreal)ival)/((qreal)div);
} else {
val = ival;
}
if (neg)
val = -val;
} else {
bool ok = false;
val = qstrtod(temp, 0, &ok);
}
return val;
}
static inline void parseNumbersArray(const QChar *&str, QVarLengthArray<qreal, 8> &points)
{
while (str->isSpace())
++str;
while (isDigit(str->unicode()) ||
*str == QLatin1Char('-') || *str == QLatin1Char('+') ||
*str == QLatin1Char('.')) {
points.append(toDouble(str));
while (str->isSpace())
++str;
if (*str == QLatin1Char(','))
++str;
//eat the rest of space
while (str->isSpace())
++str;
}
}
/**
static QVector<qreal> parsePercentageList(const QChar *&str)
{
QVector<qreal> points;
if (!str)
return points;
while (str->isSpace())
++str;
while ((*str >= QLatin1Char('0') && *str <= QLatin1Char('9')) ||
*str == QLatin1Char('-') || *str == QLatin1Char('+') ||
*str == QLatin1Char('.')) {
points.append(toDouble(str));
while (str->isSpace())
++str;
if (*str == QLatin1Char('%'))
++str;
while (str->isSpace())
++str;
if (*str == QLatin1Char(','))
++str;
//eat the rest of space
while (str->isSpace())
++str;
}
return points;
}
**/
static void pathArcSegment(QPainterPath &path,
qreal xc, qreal yc,
qreal th0, qreal th1,
qreal rx, qreal ry, qreal xAxisRotation)
{
qreal sinTh, cosTh;
qreal a00, a01, a10, a11;
qreal x1, y1, x2, y2, x3, y3;
qreal t;
qreal thHalf;
sinTh = qSin(xAxisRotation * (M_PI / 180.0));
cosTh = qCos(xAxisRotation * (M_PI / 180.0));
a00 = cosTh * rx;
a01 = -sinTh * ry;
a10 = sinTh * rx;
a11 = cosTh * ry;
thHalf = 0.5 * (th1 - th0);
t = (8.0 / 3.0) * qSin(thHalf * 0.5) * qSin(thHalf * 0.5) / qSin(thHalf);
x1 = xc + qCos(th0) - t * qSin(th0);
y1 = yc + qSin(th0) + t * qCos(th0);
x3 = xc + qCos(th1);
y3 = yc + qSin(th1);
x2 = x3 + t * qSin(th1);
y2 = y3 - t * qCos(th1);
path.cubicTo(a00 * x1 + a01 * y1, a10 * x1 + a11 * y1,
a00 * x2 + a01 * y2, a10 * x2 + a11 * y2,
a00 * x3 + a01 * y3, a10 * x3 + a11 * y3);
}
// the arc handling code underneath is from XSVG (BSD license)
/*
* Copyright 2002 USC/Information Sciences Institute
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without
* fee, provided that the above copyright notice appear in all copies
* and that both that copyright notice and this permission notice
* appear in supporting documentation, and that the name of
* Information Sciences Institute not be used in advertising or
* publicity pertaining to distribution of the software without
* specific, written prior permission. Information Sciences Institute
* makes no representations about the suitability of this software for
* any purpose. It is provided "as is" without express or implied
* warranty.
*
* INFORMATION SCIENCES INSTITUTE DISCLAIMS ALL WARRANTIES WITH REGARD
* TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL INFORMATION SCIENCES
* INSTITUTE BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
* OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*
*/
static void pathArc(QPainterPath &path,
qreal rx,
qreal ry,
qreal x_axis_rotation,
int large_arc_flag,
int sweep_flag,
qreal x,
qreal y,
qreal curx, qreal cury)
{
qreal sin_th, cos_th;
qreal a00, a01, a10, a11;
qreal x0, y0, x1, y1, xc, yc;
qreal d, sfactor, sfactor_sq;
qreal th0, th1, th_arc;
int i, n_segs;
qreal dx, dy, dx1, dy1, Pr1, Pr2, Px, Py, check;
rx = qAbs(rx);
ry = qAbs(ry);
sin_th = qSin(x_axis_rotation * (M_PI / 180.0));
cos_th = qCos(x_axis_rotation * (M_PI / 180.0));
dx = (curx - x) / 2.0;
dy = (cury - y) / 2.0;
dx1 = cos_th * dx + sin_th * dy;
dy1 = -sin_th * dx + cos_th * dy;
Pr1 = rx * rx;
Pr2 = ry * ry;
Px = dx1 * dx1;
Py = dy1 * dy1;
/* Spec : check if radii are large enough */
check = Px / Pr1 + Py / Pr2;
if (check > 1) {
rx = rx * qSqrt(check);
ry = ry * qSqrt(check);
}
a00 = cos_th / rx;
a01 = sin_th / rx;
a10 = -sin_th / ry;
a11 = cos_th / ry;
x0 = a00 * curx + a01 * cury;
y0 = a10 * curx + a11 * cury;
x1 = a00 * x + a01 * y;
y1 = a10 * x + a11 * y;
/* (x0, y0) is current point in transformed coordinate space.
(x1, y1) is new point in transformed coordinate space.
The arc fits a unit-radius circle in this space.
*/
d = (x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0);
sfactor_sq = 1.0 / d - 0.25;
if (sfactor_sq < 0) sfactor_sq = 0;
sfactor = qSqrt(sfactor_sq);
if (sweep_flag == large_arc_flag) sfactor = -sfactor;
xc = 0.5 * (x0 + x1) - sfactor * (y1 - y0);
yc = 0.5 * (y0 + y1) + sfactor * (x1 - x0);
/* (xc, yc) is center of the circle. */
th0 = qAtan2(y0 - yc, x0 - xc);
th1 = qAtan2(y1 - yc, x1 - xc);
th_arc = th1 - th0;
if (th_arc < 0 && sweep_flag)
th_arc += 2 * M_PI;
else if (th_arc > 0 && !sweep_flag)
th_arc -= 2 * M_PI;
n_segs = qCeil(qAbs(th_arc / (M_PI * 0.5 + 0.001)));
for (i = 0; i < n_segs; i++) {
pathArcSegment(path, xc, yc,
th0 + i * th_arc / n_segs,
th0 + (i + 1) * th_arc / n_segs,
rx, ry, x_axis_rotation);
}
}
bool parsePathDataFast(const QStringRef &dataStr, QPainterPath &path)
{
qreal x0 = 0, y0 = 0; // starting point
qreal x = 0, y = 0; // current point
char lastMode = 0;
QPointF ctrlPt;
const QChar *str = dataStr.constData();
const QChar *end = str + dataStr.size();
while (str != end) {
while (str->isSpace())
++str;
QChar pathElem = *str;
++str;
QChar endc = *end;
*const_cast<QChar *>(end) = 0; // parseNumbersArray requires 0-termination that QStringRef cannot guarantee
QVarLengthArray<qreal, 8> arg;
parseNumbersArray(str, arg);
*const_cast<QChar *>(end) = endc;
if (pathElem == QLatin1Char('z') || pathElem == QLatin1Char('Z'))
arg.append(0);//dummy
const qreal *num = arg.constData();
int count = arg.count();
while (count > 0) {
qreal offsetX = x; // correction offsets
qreal offsetY = y; // for relative commands
switch (pathElem.unicode()) {
case 'm': {
if (count < 2) {
num++;
count--;
break;
}
x = x0 = num[0] + offsetX;
y = y0 = num[1] + offsetY;
num += 2;
count -= 2;
path.moveTo(x0, y0);
// As per 1.2 spec 8.3.2 The "moveto" commands
// If a 'moveto' is followed by multiple pairs of coordinates without explicit commands,
// the subsequent pairs shall be treated as implicit 'lineto' commands.
pathElem = QLatin1Char('l');
}
break;
case 'M': {
if (count < 2) {
num++;
count--;
break;
}
x = x0 = num[0];
y = y0 = num[1];
num += 2;
count -= 2;
path.moveTo(x0, y0);
// As per 1.2 spec 8.3.2 The "moveto" commands
// If a 'moveto' is followed by multiple pairs of coordinates without explicit commands,
// the subsequent pairs shall be treated as implicit 'lineto' commands.
pathElem = QLatin1Char('L');
}
break;
case 'z':
case 'Z': {
x = x0;
y = y0;
count--; // skip dummy
num++;
path.closeSubpath();
}
break;
case 'l': {
if (count < 2) {
num++;
count--;
break;
}
x = num[0] + offsetX;
y = num[1] + offsetY;
num += 2;
count -= 2;
path.lineTo(x, y);
}
break;
case 'L': {
if (count < 2) {
num++;
count--;
break;
}
x = num[0];
y = num[1];
num += 2;
count -= 2;
path.lineTo(x, y);
}
break;
case 'h': {
x = num[0] + offsetX;
num++;
count--;
path.lineTo(x, y);
}
break;
case 'H': {
x = num[0];
num++;
count--;
path.lineTo(x, y);
}
break;
case 'v': {
y = num[0] + offsetY;
num++;
count--;
path.lineTo(x, y);
}
break;
case 'V': {
y = num[0];
num++;
count--;
path.lineTo(x, y);
}
break;
case 'c': {
if (count < 6) {
num += count;
count = 0;
break;
}
QPointF c1(num[0] + offsetX, num[1] + offsetY);
QPointF c2(num[2] + offsetX, num[3] + offsetY);
QPointF e(num[4] + offsetX, num[5] + offsetY);
num += 6;
count -= 6;
path.cubicTo(c1, c2, e);
ctrlPt = c2;
x = e.x();
y = e.y();
break;
}
case 'C': {
if (count < 6) {
num += count;
count = 0;
break;
}
QPointF c1(num[0], num[1]);
QPointF c2(num[2], num[3]);
QPointF e(num[4], num[5]);
num += 6;
count -= 6;
path.cubicTo(c1, c2, e);
ctrlPt = c2;
x = e.x();
y = e.y();
break;
}
case 's': {
if (count < 4) {
num += count;
count = 0;
break;
}
QPointF c1;
if (lastMode == 'c' || lastMode == 'C' ||
lastMode == 's' || lastMode == 'S')
c1 = QPointF(2*x-ctrlPt.x(), 2*y-ctrlPt.y());
else
c1 = QPointF(x, y);
QPointF c2(num[0] + offsetX, num[1] + offsetY);
QPointF e(num[2] + offsetX, num[3] + offsetY);
num += 4;
count -= 4;
path.cubicTo(c1, c2, e);
ctrlPt = c2;
x = e.x();
y = e.y();
break;
}
case 'S': {
if (count < 4) {
num += count;
count = 0;
break;
}
QPointF c1;
if (lastMode == 'c' || lastMode == 'C' ||
lastMode == 's' || lastMode == 'S')
c1 = QPointF(2*x-ctrlPt.x(), 2*y-ctrlPt.y());
else
c1 = QPointF(x, y);
QPointF c2(num[0], num[1]);
QPointF e(num[2], num[3]);
num += 4;
count -= 4;
path.cubicTo(c1, c2, e);
ctrlPt = c2;
x = e.x();
y = e.y();
break;
}
case 'q': {
if (count < 4) {
num += count;
count = 0;
break;
}
QPointF c(num[0] + offsetX, num[1] + offsetY);
QPointF e(num[2] + offsetX, num[3] + offsetY);
num += 4;
count -= 4;
path.quadTo(c, e);
ctrlPt = c;
x = e.x();
y = e.y();
break;
}
case 'Q': {
if (count < 4) {
num += count;
count = 0;
break;
}
QPointF c(num[0], num[1]);
QPointF e(num[2], num[3]);
num += 4;
count -= 4;
path.quadTo(c, e);
ctrlPt = c;
x = e.x();
y = e.y();
break;
}
case 't': {
if (count < 2) {
num += count;
count = 0;
break;
}
QPointF e(num[0] + offsetX, num[1] + offsetY);
num += 2;
count -= 2;
QPointF c;
if (lastMode == 'q' || lastMode == 'Q' ||
lastMode == 't' || lastMode == 'T')
c = QPointF(2*x-ctrlPt.x(), 2*y-ctrlPt.y());
else
c = QPointF(x, y);
path.quadTo(c, e);
ctrlPt = c;
x = e.x();
y = e.y();
break;
}
case 'T': {
if (count < 2) {
num += count;
count = 0;
break;
}
QPointF e(num[0], num[1]);
num += 2;
count -= 2;
QPointF c;
if (lastMode == 'q' || lastMode == 'Q' ||
lastMode == 't' || lastMode == 'T')
c = QPointF(2*x-ctrlPt.x(), 2*y-ctrlPt.y());
else
c = QPointF(x, y);
path.quadTo(c, e);
ctrlPt = c;
x = e.x();
y = e.y();
break;
}
case 'a': {
if (count < 7) {
num += count;
count = 0;
break;
}
qreal rx = (*num++);
qreal ry = (*num++);
qreal xAxisRotation = (*num++);
qreal largeArcFlag = (*num++);
qreal sweepFlag = (*num++);
qreal ex = (*num++) + offsetX;
qreal ey = (*num++) + offsetY;
count -= 7;
qreal curx = x;
qreal cury = y;
pathArc(path, rx, ry, xAxisRotation, int(largeArcFlag),
int(sweepFlag), ex, ey, curx, cury);
x = ex;
y = ey;
}
break;
case 'A': {
if (count < 7) {
num += count;
count = 0;
break;
}
qreal rx = (*num++);
qreal ry = (*num++);
qreal xAxisRotation = (*num++);
qreal largeArcFlag = (*num++);
qreal sweepFlag = (*num++);
qreal ex = (*num++);
qreal ey = (*num++);
count -= 7;
qreal curx = x;
qreal cury = y;
pathArc(path, rx, ry, xAxisRotation, int(largeArcFlag),
int(sweepFlag), ex, ey, curx, cury);
x = ex;
y = ey;
}
break;
default:
return false;
}
lastMode = pathElem.toLatin1();
}
}
return true;
}
One question, i doesn't find Q_PI constant in the standard qt headers and i replace it with M_PI hope is OK!!
I'm looking for an algorithm to find bounding box (max/min points) of a closed quadratic bezier curve in Cartesian axis:
input: C (a closed bezier curve)
output: A B C D points
Image http://www.imagechicken.com/uploads/1270586513022388700.jpg
Note: above image shows a smooth curve. it could be not smooth. (have corners)
Ivan Kuckir's DeCasteljau is a brute force, but works in many cases. The problem with it is the count of iterations. The actual shape and the distance between coordinates affect to the precision of the result. And to find a precise enough answer, you have to iterate tens of times, may be more. And it may fail if there are sharp turns in curve.
Better solution is to find first derivative roots, as is described on the excellent site http://processingjs.nihongoresources.com/bezierinfo/. Please read the section Finding the extremities of the curves.
The link above has the algorithm for both quadratic and cubic curves.
The asker of question is interested in quadratic curves, so the rest of this answer may be irrelevant, because I provide codes for calculating extremities of Cubic curves.
Below are three Javascript codes of which the first (CODE 1) is the one I suggest to use.
** CODE 1 **
After testing processingjs and Raphael's solutions I find they had some restrictions and/or bugs. Then more search and found Bonsai and it's bounding box function, which is based on NISHIO Hirokazu's Python script. Both have a downside where double equality is tested using ==. When I changed these to numerically robust comparisons, then script succeeds 100% right in all cases. I tested the script with thousands of random paths and also with all collinear cases and all succeeded:
Various cubic curves
Random cubic curves
Collinear cubic curves
The code is as follows. Usually left, right, top and bottom values are the all needed, but in some cases it's fine to know the coordinates of local extreme points and corresponding t values. So I added there two variables: tvalues and points. Remove code regarding them and you have fast and stable bounding box calculation function.
// Source: http://blog.hackers-cafe.net/2009/06/how-to-calculate-bezier-curves-bounding.html
// Original version: NISHIO Hirokazu
// Modifications: Timo
var pow = Math.pow,
sqrt = Math.sqrt,
min = Math.min,
max = Math.max;
abs = Math.abs;
function getBoundsOfCurve(x0, y0, x1, y1, x2, y2, x3, y3)
{
var tvalues = new Array();
var bounds = [new Array(), new Array()];
var points = new Array();
var a, b, c, t, t1, t2, b2ac, sqrtb2ac;
for (var i = 0; i < 2; ++i)
{
if (i == 0)
{
b = 6 * x0 - 12 * x1 + 6 * x2;
a = -3 * x0 + 9 * x1 - 9 * x2 + 3 * x3;
c = 3 * x1 - 3 * x0;
}
else
{
b = 6 * y0 - 12 * y1 + 6 * y2;
a = -3 * y0 + 9 * y1 - 9 * y2 + 3 * y3;
c = 3 * y1 - 3 * y0;
}
if (abs(a) < 1e-12) // Numerical robustness
{
if (abs(b) < 1e-12) // Numerical robustness
{
continue;
}
t = -c / b;
if (0 < t && t < 1)
{
tvalues.push(t);
}
continue;
}
b2ac = b * b - 4 * c * a;
sqrtb2ac = sqrt(b2ac);
if (b2ac < 0)
{
continue;
}
t1 = (-b + sqrtb2ac) / (2 * a);
if (0 < t1 && t1 < 1)
{
tvalues.push(t1);
}
t2 = (-b - sqrtb2ac) / (2 * a);
if (0 < t2 && t2 < 1)
{
tvalues.push(t2);
}
}
var x, y, j = tvalues.length,
jlen = j,
mt;
while (j--)
{
t = tvalues[j];
mt = 1 - t;
x = (mt * mt * mt * x0) + (3 * mt * mt * t * x1) + (3 * mt * t * t * x2) + (t * t * t * x3);
bounds[0][j] = x;
y = (mt * mt * mt * y0) + (3 * mt * mt * t * y1) + (3 * mt * t * t * y2) + (t * t * t * y3);
bounds[1][j] = y;
points[j] = {
X: x,
Y: y
};
}
tvalues[jlen] = 0;
tvalues[jlen + 1] = 1;
points[jlen] = {
X: x0,
Y: y0
};
points[jlen + 1] = {
X: x3,
Y: y3
};
bounds[0][jlen] = x0;
bounds[1][jlen] = y0;
bounds[0][jlen + 1] = x3;
bounds[1][jlen + 1] = y3;
tvalues.length = bounds[0].length = bounds[1].length = points.length = jlen + 2;
return {
left: min.apply(null, bounds[0]),
top: min.apply(null, bounds[1]),
right: max.apply(null, bounds[0]),
bottom: max.apply(null, bounds[1]),
points: points, // local extremes
tvalues: tvalues // t values of local extremes
};
};
// Usage:
var bounds = getBoundsOfCurve(532,333,117,305,28,93,265,42);
console.log(JSON.stringify(bounds));
// Prints: {"left":135.77684049079755,"top":42,"right":532,"bottom":333,"points":[{"X":135.77684049079755,"Y":144.86387466397255},{"X":532,"Y":333},{"X":265,"Y":42}],"tvalues":[0.6365030674846626,0,1]}
CODE 2 (which fails in collinear cases):
I translated the code from http://processingjs.nihongoresources.com/bezierinfo/sketchsource.php?sketch=tightBoundsCubicBezier to Javascript. The code works fine in normal cases, but not in collinear cases where all points lie on the same line.
For reference, here is the Javascript code.
function computeCubicBaseValue(a,b,c,d,t) {
var mt = 1-t;
return mt*mt*mt*a + 3*mt*mt*t*b + 3*mt*t*t*c + t*t*t*d;
}
function computeCubicFirstDerivativeRoots(a,b,c,d) {
var ret = [-1,-1];
var tl = -a+2*b-c;
var tr = -Math.sqrt(-a*(c-d) + b*b - b*(c+d) +c*c);
var dn = -a+3*b-3*c+d;
if(dn!=0) { ret[0] = (tl+tr)/dn; ret[1] = (tl-tr)/dn; }
return ret;
}
function computeCubicBoundingBox(xa,ya,xb,yb,xc,yc,xd,yd)
{
// find the zero point for x and y in the derivatives
var minx = 9999;
var maxx = -9999;
if(xa<minx) { minx=xa; }
if(xa>maxx) { maxx=xa; }
if(xd<minx) { minx=xd; }
if(xd>maxx) { maxx=xd; }
var ts = computeCubicFirstDerivativeRoots(xa, xb, xc, xd);
for(var i=0; i<ts.length;i++) {
var t = ts[i];
if(t>=0 && t<=1) {
var x = computeCubicBaseValue(t, xa, xb, xc, xd);
var y = computeCubicBaseValue(t, ya, yb, yc, yd);
if(x<minx) { minx=x; }
if(x>maxx) { maxx=x; }}}
var miny = 9999;
var maxy = -9999;
if(ya<miny) { miny=ya; }
if(ya>maxy) { maxy=ya; }
if(yd<miny) { miny=yd; }
if(yd>maxy) { maxy=yd; }
ts = computeCubicFirstDerivativeRoots(ya, yb, yc, yd);
for(i=0; i<ts.length;i++) {
var t = ts[i];
if(t>=0 && t<=1) {
var x = computeCubicBaseValue(t, xa, xb, xc, xd);
var y = computeCubicBaseValue(t, ya, yb, yc, yd);
if(y<miny) { miny=y; }
if(y>maxy) { maxy=y; }}}
// bounding box corner coordinates
var bbox = [minx,miny, maxx,miny, maxx,maxy, minx,maxy ];
return bbox;
}
CODE 3 (works in most cases):
To handle also collinear cases, I found Raphael's solution, which is based on the same first derivative method as the CODE 2. I added also a return value dots, which has the extrema points, because always it's not enough to know bounding boxes min and max coordinates, but we want to know the exact extrema coordinates.
EDIT: found another bug. Fails eg. in 532,333,117,305,28,93,265,42 and also many other cases.
The code is here:
Array.max = function( array ){
return Math.max.apply( Math, array );
};
Array.min = function( array ){
return Math.min.apply( Math, array );
};
var findDotAtSegment = function (p1x, p1y, c1x, c1y, c2x, c2y, p2x, p2y, t) {
var t1 = 1 - t;
return {
x: t1*t1*t1*p1x + t1*t1*3*t*c1x + t1*3*t*t * c2x + t*t*t * p2x,
y: t1*t1*t1*p1y + t1*t1*3*t*c1y + t1*3*t*t * c2y + t*t*t * p2y
};
};
var cubicBBox = function (p1x, p1y, c1x, c1y, c2x, c2y, p2x, p2y) {
var a = (c2x - 2 * c1x + p1x) - (p2x - 2 * c2x + c1x),
b = 2 * (c1x - p1x) - 2 * (c2x - c1x),
c = p1x - c1x,
t1 = (-b + Math.sqrt(b * b - 4 * a * c)) / 2 / a,
t2 = (-b - Math.sqrt(b * b - 4 * a * c)) / 2 / a,
y = [p1y, p2y],
x = [p1x, p2x],
dot, dots=[];
Math.abs(t1) > "1e12" && (t1 = 0.5);
Math.abs(t2) > "1e12" && (t2 = 0.5);
if (t1 >= 0 && t1 <= 1) {
dot = findDotAtSegment(p1x, p1y, c1x, c1y, c2x, c2y, p2x, p2y, t1);
x.push(dot.x);
y.push(dot.y);
dots.push({X:dot.x, Y:dot.y});
}
if (t2 >= 0 && t2 <= 1) {
dot = findDotAtSegment(p1x, p1y, c1x, c1y, c2x, c2y, p2x, p2y, t2);
x.push(dot.x);
y.push(dot.y);
dots.push({X:dot.x, Y:dot.y});
}
a = (c2y - 2 * c1y + p1y) - (p2y - 2 * c2y + c1y);
b = 2 * (c1y - p1y) - 2 * (c2y - c1y);
c = p1y - c1y;
t1 = (-b + Math.sqrt(b * b - 4 * a * c)) / 2 / a;
t2 = (-b - Math.sqrt(b * b - 4 * a * c)) / 2 / a;
Math.abs(t1) > "1e12" && (t1 = 0.5);
Math.abs(t2) > "1e12" && (t2 = 0.5);
if (t1 >= 0 && t1 <= 1) {
dot = findDotAtSegment(p1x, p1y, c1x, c1y, c2x, c2y, p2x, p2y, t1);
x.push(dot.x);
y.push(dot.y);
dots.push({X:dot.x, Y:dot.y});
}
if (t2 >= 0 && t2 <= 1) {
dot = findDotAtSegment(p1x, p1y, c1x, c1y, c2x, c2y, p2x, p2y, t2);
x.push(dot.x);
y.push(dot.y);
dots.push({X:dot.x, Y:dot.y});
}
// remove duplicate dots
var dots2 = [];
var l = dots.length;
for(var i=0; i<l; i++) {
for(var j=i+1; j<l; j++) {
if (dots[i].X === dots[j].X && dots[i].Y === dots[j].Y)
j = ++i;
}
dots2.push({X: dots[i].X, Y: dots[i].Y});
}
return {
min: {x: Array.min(x), y: Array.min(y)},
max: {x: Array.max(x), y: Array.max(y)},
dots: dots2 // these are the extrema points
};
};
Well, I would say you start by adding all endpoints to your bounding box. Then, you go through all the bezier elements. I assume the formula in question is this one:
From this, extract two formulas for X and Y, respectively. Test both for extrema by taking the derivative (zero crossings). Then add the corresponding points to your bounding box as well.
Use De Casteljau algorithm to approximate the curve of higher orders. Here is how it works for cubic curve
http://jsfiddle.net/4VCVX/25/
function getCurveBounds(ax, ay, bx, by, cx, cy, dx, dy)
{
var px, py, qx, qy, rx, ry, sx, sy, tx, ty,
tobx, toby, tocx, tocy, todx, tody, toqx, toqy,
torx, tory, totx, toty;
var x, y, minx, miny, maxx, maxy;
minx = miny = Number.POSITIVE_INFINITY;
maxx = maxy = Number.NEGATIVE_INFINITY;
tobx = bx - ax; toby = by - ay; // directions
tocx = cx - bx; tocy = cy - by;
todx = dx - cx; tody = dy - cy;
var step = 1/40; // precision
for(var d=0; d<1.001; d+=step)
{
px = ax +d*tobx; py = ay +d*toby;
qx = bx +d*tocx; qy = by +d*tocy;
rx = cx +d*todx; ry = cy +d*tody;
toqx = qx - px; toqy = qy - py;
torx = rx - qx; tory = ry - qy;
sx = px +d*toqx; sy = py +d*toqy;
tx = qx +d*torx; ty = qy +d*tory;
totx = tx - sx; toty = ty - sy;
x = sx + d*totx; y = sy + d*toty;
minx = Math.min(minx, x); miny = Math.min(miny, y);
maxx = Math.max(maxx, x); maxy = Math.max(maxy, y);
}
return {x:minx, y:miny, width:maxx-minx, height:maxy-miny};
}
I believe that the control points of a Bezier curve form a convex hull that encloses the curve. If you just want a axis-aligned bounding box, I think you need to find the min and max of each (x, y) for each control point of all the segments.
I suppose that might not be a tight box. That is, the box might be slightly larger than it needs to be, but it's simple and fast to compute. I guess it depends on your requirements.
I think the accepted answer is fine, but just wanted to offer a little more explanation for anyone else trying to do this.
Consider a quadratic Bezier with starting point p1, ending point p2 and "control point" pc. This curve has three parametric equations:
pa(t) = p1 + t(pc-p1)
pb(t) = pc + t(p2-pc)
p(t) = pa(t) + t*(pb(t) - pa(t))
In all cases, t runs from 0 to 1, inclusive.
The first two are linear, defining line segments from p1 to pc and from pc to p2, respectively. The third is quadratic once you substitute in the expressions for pa(t) and pb(t); this is the one that actually defines points on the curve.
Actually, each of these equations is a pair of equations, one for the horizontal dimension, and one for the vertical. The nice thing about parametric curves is that the x and y can be handled independently of one another. The equations are exactly the same, just substitute x or y for p in the above equations.
The important point is that the line segment defined in equation 3, that runs from pa(t) to pb(t) for a specific value of t is tangent to the curve at the corresponding point p(t). To find the local extrema of the curve, you need to find the parameter value where the tangent is flat (i.e., a critical point). For the vertical dimension, you want to find the value of t such that ya(t) = yb(t), which gives the tangent a slope of 0. For the horizontal dimension, find t such that xa(t) = xb(t), which gives the tangent an infinite slope (i.e., a vertical line). In each case, you can just plug the value of t back into equation 1 (or 2, or even 3) to get the location of that extrema.
In other words, to find the vertical extrema of the curve, take just the y-component of equations 1 and 2, set them equal to each other and solve for t; plug this back into the y-component of equation 1, to get the y-value of that extrema. To get the complete y-range of the curve, find the minimum of this extreme y value and the y-components of the two end points, and likewise find the maximum of all three. Repeat for x to get the horizontal limits.
Remember that t only runs in [0, 1], so if you get a value outside of this range, it means there is no local extrema on the curve (at least not between your two endpoints). This includes the case where you end up dividing by zero when solving for t, which you will probably need to check for before you do it.
The same idea can be applied to higher-order Beziers, there are just more equations of higher degree, which also means there are potentially more local extrema per curve. For instance, on a cubic Bezier (two control points), solving for t to find the local extrema is a quadratic equation, so you could get 0, 1, or 2 values (remember to check for 0-denominators, and for negative square-roots, both of which indicate that there are no local extrema for that dimension). To find the range, you just need to find the min/max of all the local extrema, and the two end points.
I answered this question in Calculating the bounding box of cubic bezier curve
this article explain the details and also has a live html5 demo:
Calculating / Computing the Bounding Box of Cubic Bezier
I found a javascript in Snap.svg to calculate that: here
see the bezierBBox and curveDim functions.
I rewrite a javascript function.
//(x0,y0) is start point; (x1,y1),(x2,y2) is control points; (x3,y3) is end point.
function bezierMinMax(x0, y0, x1, y1, x2, y2, x3, y3) {
var tvalues = [], xvalues = [], yvalues = [],
a, b, c, t, t1, t2, b2ac, sqrtb2ac;
for (var i = 0; i < 2; ++i) {
if (i == 0) {
b = 6 * x0 - 12 * x1 + 6 * x2;
a = -3 * x0 + 9 * x1 - 9 * x2 + 3 * x3;
c = 3 * x1 - 3 * x0;
} else {
b = 6 * y0 - 12 * y1 + 6 * y2;
a = -3 * y0 + 9 * y1 - 9 * y2 + 3 * y3;
c = 3 * y1 - 3 * y0;
}
if (Math.abs(a) < 1e-12) {
if (Math.abs(b) < 1e-12) {
continue;
}
t = -c / b;
if (0 < t && t < 1) {
tvalues.push(t);
}
continue;
}
b2ac = b * b - 4 * c * a;
if (b2ac < 0) {
continue;
}
sqrtb2ac = Math.sqrt(b2ac);
t1 = (-b + sqrtb2ac) / (2 * a);
if (0 < t1 && t1 < 1) {
tvalues.push(t1);
}
t2 = (-b - sqrtb2ac) / (2 * a);
if (0 < t2 && t2 < 1) {
tvalues.push(t2);
}
}
var j = tvalues.length, mt;
while (j--) {
t = tvalues[j];
mt = 1 - t;
xvalues[j] = (mt * mt * mt * x0) + (3 * mt * mt * t * x1) + (3 * mt * t * t * x2) + (t * t * t * x3);
yvalues[j] = (mt * mt * mt * y0) + (3 * mt * mt * t * y1) + (3 * mt * t * t * y2) + (t * t * t * y3);
}
xvalues.push(x0,x3);
yvalues.push(y0,y3);
return {
min: {x: Math.min.apply(0, xvalues), y: Math.min.apply(0, yvalues)},
max: {x: Math.max.apply(0, xvalues), y: Math.max.apply(0, yvalues)}
};
}
Timo-s first variant adapted to Objective-C
CGPoint CubicBezierPointAt(CGPoint p1, CGPoint p2, CGPoint p3, CGPoint p4, CGFloat t) {
CGFloat x = CubicBezier(p1.x, p2.x, p3.x, p4.x, t);
CGFloat y = CubicBezier(p1.y, p2.y, p3.y, p4.y, t);
return CGPointMake(x, y);
}
// array containing TopLeft and BottomRight points for curve`s enclosing bounds
NSArray* CubicBezierExtremums(CGPoint p1, CGPoint p2, CGPoint p3, CGPoint p4) {
CGFloat a, b, c, t, t1, t2, b2ac, sqrtb2ac;
NSMutableArray *tValues = [NSMutableArray new];
for (int i = 0; i < 2; i++) {
if (i == 0) {
a = 3 * (-p1.x + 3 * p2.x - 3 * p3.x + p4.x);
b = 6 * (p1.x - 2 * p2.x + p3.x);
c = 3 * (p2.x - p1.x);
}
else {
a = 3 * (-p1.y + 3 * p2.y - 3 * p3.y + p4.y);
b = 6 * (p1.y - 2 * p2.y + p3.y);
c = 3 * (p2.y - p1.y);
}
if(ABS(a) < CGFLOAT_MIN) {// Numerical robustness
if (ABS(b) < CGFLOAT_MIN) {// Numerical robustness
continue;
}
t = -c / b;
if (t > 0 && t < 1) {
[tValues addObject:[NSNumber numberWithDouble:t]];
}
continue;
}
b2ac = pow(b, 2) - 4 * c * a;
if (b2ac < 0) {
continue;
}
sqrtb2ac = sqrt(b2ac);
t1 = (-b + sqrtb2ac) / (2 * a);
if (t1 > 0.0 && t1 < 1.0) {
[tValues addObject:[NSNumber numberWithDouble:t1]];
}
t2 = (-b - sqrtb2ac) / (2 * a);
if (t2 > 0.0 && t2 < 1.0) {
[tValues addObject:[NSNumber numberWithDouble:t2]];
}
}
int j = (int)tValues.count;
CGFloat x = 0;
CGFloat y = 0;
NSMutableArray *xValues = [NSMutableArray new];
NSMutableArray *yValues = [NSMutableArray new];
while (j--) {
t = [[tValues objectAtIndex:j] doubleValue];
x = CubicBezier(p1.x, p2.x, p3.x, p4.x, t);
y = CubicBezier(p1.y, p2.y, p3.y, p4.y, t);
[xValues addObject:[NSNumber numberWithDouble:x]];
[yValues addObject:[NSNumber numberWithDouble:y]];
}
[xValues addObject:[NSNumber numberWithDouble:p1.x]];
[xValues addObject:[NSNumber numberWithDouble:p4.x]];
[yValues addObject:[NSNumber numberWithDouble:p1.y]];
[yValues addObject:[NSNumber numberWithDouble:p4.y]];
//find minX, minY, maxX, maxY
CGFloat minX = [[xValues valueForKeyPath:#"#min.self"] doubleValue];
CGFloat minY = [[yValues valueForKeyPath:#"#min.self"] doubleValue];
CGFloat maxX = [[xValues valueForKeyPath:#"#max.self"] doubleValue];
CGFloat maxY = [[yValues valueForKeyPath:#"#max.self"] doubleValue];
CGPoint origin = CGPointMake(minX, minY);
CGPoint bottomRight = CGPointMake(maxX, maxY);
NSArray *toReturn = [NSArray arrayWithObjects:
[NSValue valueWithCGPoint:origin],
[NSValue valueWithCGPoint:bottomRight],
nil];
return toReturn;
}
Timo's CODE 2 answer has a small bug: the t parameter in computeCubicBaseValue function should be last. Nevertheless good job, works like a charm ;)
Solution in C# :
double computeCubicBaseValue(double a, double b, double c, double d, double t)
{
var mt = 1 - t;
return mt * mt * mt * a + 3 * mt * mt * t * b + 3 * mt * t * t * c + t * t * t * d;
}
double[] computeCubicFirstDerivativeRoots(double a, double b, double c, double d)
{
var ret = new double[2] { -1, -1 };
var tl = -a + 2 * b - c;
var tr = -Math.Sqrt(-a * (c - d) + b * b - b * (c + d) + c * c);
var dn = -a + 3 * b - 3 * c + d;
if (dn != 0) { ret[0] = (tl + tr) / dn; ret[1] = (tl - tr) / dn; }
return ret;
}
public double[] ComputeCubicBoundingBox(Point start, Point firstControl, Point secondControl, Point end)
{
double xa, ya, xb, yb, xc, yc, xd, yd;
xa = start.X;
ya = start.Y;
xb = firstControl.X;
yb = firstControl.Y;
xc = secondControl.X;
yc = secondControl.Y;
xd = end.X;
yd = end.Y;
// find the zero point for x and y in the derivatives
double minx = Double.MaxValue;
double maxx = Double.MinValue;
if (xa < minx) { minx = xa; }
if (xa > maxx) { maxx = xa; }
if (xd < minx) { minx = xd; }
if (xd > maxx) { maxx = xd; }
var ts = computeCubicFirstDerivativeRoots(xa, xb, xc, xd);
for (var i = 0; i < ts.Length; i++)
{
var t = ts[i];
if (t >= 0 && t <= 1)
{
var x = computeCubicBaseValue(xa, xb, xc, xd,t);
var y = computeCubicBaseValue(ya, yb, yc, yd,t);
if (x < minx) { minx = x; }
if (x > maxx) { maxx = x; }
}
}
double miny = Double.MaxValue;
double maxy = Double.MinValue;
if (ya < miny) { miny = ya; }
if (ya > maxy) { maxy = ya; }
if (yd < miny) { miny = yd; }
if (yd > maxy) { maxy = yd; }
ts = computeCubicFirstDerivativeRoots(ya, yb, yc, yd);
for (var i = 0; i < ts.Length; i++)
{
var t = ts[i];
if (t >= 0 && t <= 1)
{
var x = computeCubicBaseValue(xa, xb, xc, xd,t);
var y = computeCubicBaseValue(ya, yb, yc, yd,t);
if (y < miny) { miny = y; }
if (y > maxy) { maxy = y; }
}
}
// bounding box corner coordinates
var bbox = new double[] { minx, miny, maxx, maxy};
return bbox;
}