I'm looking for a way to estimate the distance to the boundary of the Mandelbrot set from a point inside of it for use in a GLSL shader.
This page links to various resources online touching on the subject of interior distance estimation such as the underlying mathematical formula, a Haskell implementation, some other blogs, forum posts and a C99 implementation, but I got the impression that they are all either very complex to implement or very computationally heavy to run.
After many hours of trying, I managed to make this code that runs in Shadertoy:
void mainImage( out vec4 fragColor, in vec2 fragCoord ) {
float zoom = 1.;
vec2 c = vec2(-0.75, 0.0) + zoom * (2.*fragCoord-iResolution.xy)/iResolution.y;
vec2 z = c;
float ar = 0.; // average of reciprocals
float i;
for (i = 0.; i < 1000.; i++) {
ar += 1./length(z);
z = vec2(z.x * z.x - z.y * z.y, 2.0 * z.x * z.y) + c;
}
ar = ar / i;
fragColor = vec4(vec3(2. / ar), 1.0);
}
It does produce a gradient in every bulb, but it is clear that it's not usable as a distance estimator by itself because values in smaller bulbs have inconsistent magnitude (brightness) compared to bigger bulbs. So it's clear that a parameter is missing but I don't know what it is.
I don't require a perfect solution nor one that converges into a perfect solution like in this image.
Something that at least guarantees a lower bound is plenty.
My bet is that 1./length(z) is hitting the precision of float try to use double and dvec2 instead of float,vec2 if it makes any difference. If it does then I would ignore too small values of length(z).
Alternatively you can render just the boundary into texture in one pass and then just scan neighbors in all directions until boundary found returning the ray length. (may require some morphology operators before safe use)
This can be speed up with another pass where you "flood" fill incrementing distance into texture until its filled (better done on CPU side as you need R/W access to the same texture) its similar to A* filling however your precision will be limited by texture resolution.
If I ported my mandlebrot from link above to your computation ported to doubles and added the threshold:
// Fragment
#version 450 core
uniform dvec2 p0=vec2(0.0,0.0); // mouse position <-1,+1>
uniform double zoom=1.000; // zoom [-]
uniform int n=100; // iterations [-]
in smooth vec2 p32;
out vec4 col;
vec3 spectral_color(float l) // RGB <0,1> <- lambda l <400,700> [nm]
{
float t; vec3 c=vec3(0.0,0.0,0.0);
if ((l>=400.0)&&(l<410.0)) { t=(l-400.0)/(410.0-400.0); c.r= +(0.33*t)-(0.20*t*t); }
else if ((l>=410.0)&&(l<475.0)) { t=(l-410.0)/(475.0-410.0); c.r=0.14 -(0.13*t*t); }
else if ((l>=545.0)&&(l<595.0)) { t=(l-545.0)/(595.0-545.0); c.r= +(1.98*t)-( t*t); }
else if ((l>=595.0)&&(l<650.0)) { t=(l-595.0)/(650.0-595.0); c.r=0.98+(0.06*t)-(0.40*t*t); }
else if ((l>=650.0)&&(l<700.0)) { t=(l-650.0)/(700.0-650.0); c.r=0.65-(0.84*t)+(0.20*t*t); }
if ((l>=415.0)&&(l<475.0)) { t=(l-415.0)/(475.0-415.0); c.g= +(0.80*t*t); }
else if ((l>=475.0)&&(l<590.0)) { t=(l-475.0)/(590.0-475.0); c.g=0.8 +(0.76*t)-(0.80*t*t); }
else if ((l>=585.0)&&(l<639.0)) { t=(l-585.0)/(639.0-585.0); c.g=0.84-(0.84*t) ; }
if ((l>=400.0)&&(l<475.0)) { t=(l-400.0)/(475.0-400.0); c.b= +(2.20*t)-(1.50*t*t); }
else if ((l>=475.0)&&(l<560.0)) { t=(l-475.0)/(560.0-475.0); c.b=0.7 -( t)+(0.30*t*t); }
return c;
}
void main()
{
int i,j;
dvec2 pp,p;
double x,y,q,xx,yy,mu,cx,cy;
p=dvec2(p32);
pp=(p/zoom)-p0; // y (-1.0, 1.0)
pp.x-=0.5; // x (-1.5, 0.5)
cx=pp.x; // normal
cy=pp.y;
/*
// single pass mandelbrot integer escape
for (x=0.0,y=0.0,xx=0.0,yy=0.0,i=0;(i<n)&&(xx+yy<4.0);i++)
{
q=xx-yy+cx;
y=(2.0*x*y)+cy;
x=q;
xx=x*x;
yy=y*y;
}
float f=float(i)/float(n);
f=pow(f,0.2);
col=vec4(spectral_color(400.0+(300.0*f)),1.0);
*/
// distance to boundary
double ar=0.0,aa,nn=0.0; // *** this is what I added
for (x=0.0,y=0.0,xx=0.0,yy=0.0,i=0;(i<n)&&(xx+yy<4.0);i++)
{
aa=length(dvec2(x,y)); // *** this is what I added
if (aa>1e-3){ ar+=1.0/aa; nn++; } // *** this is what I added
q=xx-yy+cx;
y=(2.0*x*y)+cy;
x=q;
xx=x*x;
yy=y*y;
}
ar=ar/nn; // *** this is what I added
col=vec4(vec3(1.0-(2.0/ar)),1.0); // *** this is what I added
}
I got these outputs:
Just look for // *** this is what I added comment in the code that is what is added to the standard mandelbrot rendering to render distance instead. ps my (x,y) is your z and (cx,cy) is your c
anyway the distance is still highly nonlinear and depends on the position
[Edit1] non-isotropic scale
The black dot is the thresholds size you can lover it to 1e-20 ... Now I added level lines to show the distribution of distance scale (as I did not know how non isotropic and non linear it is...) here the output:
And coloring part of fragment (after the for loop):
ar=1.0-(2.0*nn/ar);
aa=10.0*ar; // 10 level lines per unit
aa-=floor(aa);
if (abs(aa)<0.05) col=vec4(0.0,1.0,0.0,1.0); // width and color of level line
else col=vec4(ar,ar,ar,1.0);
As you can see its not very parallel to border but still locally "constant" (the level lines are equidistant to each in local feature of fractal) so if gradient (derivate) used the result will be just very rough estimate (but should work). If that is enough what you should do is:
compute non linear distance for queried position and few point d distant to it in "all" directions.
pick that neighbor that has bigest change in distance to original point
rescale the estimated distances so their substraction will give you d. then use the first distance rescaled as output.
When put to fragment code (using 8-neighbors):
// Fragment
#version 450 core
uniform dvec2 p0=vec2(0.0,0.0); // mouse position <-1,+1>
uniform double zoom=1.000; // zoom [-]
uniform int n=100; // iterations [-]
in smooth vec2 p32;
out vec4 col;
double mandelbrot_distance(double cx,double cy)
{
// distance to boundary
int i,j;
double x,y,q,xx,yy,ar=0.0,aa,nn=0.0;
for (x=0.0,y=0.0,xx=0.0,yy=0.0,i=0;(i<n)&&(xx+yy<4.0);i++)
{
aa=length(dvec2(x,y));
if (aa>1e-20){ ar+=1.0/aa; nn++; }
q=xx-yy+cx;
y=(2.0*x*y)+cy;
x=q;
xx=x*x;
yy=y*y;
}
return 1.0-(2.0*nn/ar);
}
void main()
{
dvec2 pp,p;
double cx,cy,d,dd,d0,d1,e;
p=dvec2(p32);
pp=(p/zoom)-p0; // y (-1.0, 1.0)
pp.x-=0.5; // x (-1.5, 0.5)
cx=pp.x; // normal
cy=pp.y;
d =0.01/zoom; // normalization distance
e =sqrt(0.5)*d;
dd=mandelbrot_distance(cx,cy);
if (dd>0.0)
{
d0=mandelbrot_distance(cx-d,cy ); if (d0>0.0) d0=abs(d0-dd);
d1=mandelbrot_distance(cx+d,cy ); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
d1=mandelbrot_distance(cx ,cy-d); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
d1=mandelbrot_distance(cx ,cy+d); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
d1=mandelbrot_distance(cx-e,cy-e); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
d1=mandelbrot_distance(cx+e,cy-e); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
d1=mandelbrot_distance(cx-e,cy+e); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
d1=mandelbrot_distance(cx+e,cy+e); if (d1>0.0){ d1=abs(d1-dd); if (d0<d1) d0=d1; }
dd*=d/d0;
}
dd*=zoom; // just for visualization of small details real distance should not be scaled by this
col=vec4(dd,dd,dd,1.0);
}
here the result:
As you can see its now much more correct (but very close to border is inaccurate due to non isotropy mentioned above). The 8 neighbors produces the 8 diagonal like lines pattern in the circular blobs. If you want to get rid of them you should scan whole circle around the position instead of just 8 points.
Also there are still some white dots (they are not accuracy related) I think they are cases when the selected d distant neighbor is across the mandelbrot edge in different blob than original. That could be filtered out ... (you know d/2 distance in the same direction should be half if not you are in different blob)
However even 8 neighbors are pretty slow. So I for more accuracy I would recommend to go for the 2 pass "ray casting" method instead.
Related
I'm trying to fill an image with gyroid lines with certain thickness at certain spacing, but math is not my area. I was able to create a sine wave and shift a bit in the X direction to make it looks like a gyroid but it's not the same.
The idea behind is to stack some images with the same resolution and replicate gyroid into 2D images, so we still have XYZ, where Z can be 0.01mm to 0.1mm per layer
What i've tried:
int sineHeight = 100;
int sineWidth = 100;
int spacing = 100;
int radius = 10;
for (int y1 = 0; y1 < mat.Height; y1 += sineHeight+spacing)
for (int x = 0; x < mat.Width; x++)
{
// Simulating first image
int y2 = (int)(Math.Sin((double)x / sineWidth) * sineHeight / 2.0 + sineHeight / 2.0 + radius);
Circle(mat, new System.Drawing.Point(x, y1+y2), radius, EmguExtensions.WhiteColor, -1, LineType.AntiAlias);
// Simulating second image, shift by x to make it look a bit more with gyroid
y2 = (int)(Math.Sin((double)x / sineWidth + sineWidth) * sineHeight / 2.0 + sineHeight / 2.0 + radius);
Circle(mat, new System.Drawing.Point(x, y1 + y2), radius, EmguExtensions.GreyColor, -1, LineType.AntiAlias);
}
Resulting in: (White represents layer 1 while grey layer 2)
Still, this looks nothing like real gyroid, how can I replicate the formula to work in this space?
You have just single ugly slice because I do not see any z in your code (its correct the surface has horizontal and vertical sin waves like this every 0.5*pi in z).
To see the 3D surface you have to raycast z ...
I would expect some conditional testing of actually iterated x,y,z result of gyroid equation against some small non zero number like if (result<= 1e-6) and draw the stuff only then or compute color from the result instead. This is ideal to do in GLSL.
In case you are not familiar with GLSL and shaders the Fragment shader is executed for each pixel (called fragment) of the rendered QUAD so you just put the code inside your nested x,y for loops and use your x,y instead of pos (you can ignore the Vertex shader its not important).
You got 2 basic options to render this:
Blending the ray casted surface pixels together creating X-Ray like image. It can be combined with SSS techniques to get the impression of glass or semitransparent material. Here simple GLSL example for the blending:
Vertex:
#version 400 core
in vec2 position;
out vec2 pos;
void main(void)
{
pos=position;
gl_Position = vec4(position.xy,0.0,1.0);
}
Fragment:
#version 400 core
in vec2 pos;
out vec3 out_col;
void main(void)
{
float n,x,y,z,dz,d,i,di;
const float scale=2.0*3.1415926535897932384626433832795;
n=100.0; // layers
x=pos.x*scale; // x postion of pixel
y=pos.y*scale; // y postion of pixel
dz=2.0*scale/n; // z step
di=1.0/n; // color increment
i=0.0; // color intensity
for (z=-scale;z<=scale;z+=dz) // do all layers
{
d =sin(x)*cos(y); // compute gyroid equation
d+=sin(y)*cos(z);
d+=sin(z)*cos(x);
if (d<=1e-6) i+=di; // if near surface add to color
}
out_col=vec3(1.0,1.0,1.0)*i;
}
Usage is simple just render 2D quad covering screen without any matrices with corner pos points in range <-1,+1>. Here result:
Another technique is to render first hit to surface creating mesh like image. In order to see the details we need to add basic (double sided) directional lighting for which surface normal is needed. The normal can be computed by simply partialy derivate the equation by x,y,z. As now the surface is opaque then we can stop on first hit and also ray cast just single period in z as anything after that is hidden anyway. Here simple example:
Fragment:
#version 400 core
in vec2 pos; // input fragmen (pixel) position <-1,+1>
out vec3 col; // output fragment (pixel) RGB color <0,1>
void main(void)
{
bool _discard=true;
float N,x,y,z,dz,d,i;
vec3 n,l;
const float pi=3.1415926535897932384626433832795;
const float scale =3.0*pi; // 3.0 periods in x,y
const float scalez=2.0*pi; // 1.0 period in z
N=200.0; // layers per z (quality)
x=pos.x*scale; // <-1,+1> -> [rad]
y=pos.y*scale; // <-1,+1> -> [rad]
dz=2.0*scalez/N; // z step
l=vec3(0.0,0.0,1.0); // light unit direction
i=0.0; // starting color intensity
n=vec3(0.0,0.0,1.0); // starting normal only to get rid o warning
for (z=0.0;z>=-scalez;z-=dz) // raycast z through all layers in view direction
{
// gyroid equation
d =sin(x)*cos(y); // compute gyroid equation
d+=sin(y)*cos(z);
d+=sin(z)*cos(x);
// surface hit test
if (d>1e-6) continue; // skip if too far from surface
_discard=false; // remember that surface was hit
// compute normal
n.x =+cos(x)*cos(y); // partial derivate by x
n.x+=+sin(y)*cos(z);
n.x+=-sin(z)*sin(x);
n.y =-sin(x)*sin(y); // partial derivate by y
n.y+=+cos(y)*cos(z);
n.y+=+sin(z)*cos(x);
n.z =+sin(x)*cos(y); // partial derivate by z
n.z+=-sin(y)*sin(z);
n.z+=+cos(z)*cos(x);
break; // stop raycasting
}
// skip rendering if no hit with surface (hole)
if (_discard) discard;
// directional lighting
n=normalize(n);
i=abs(dot(l,n));
// ambient + directional lighting
i=0.3+(0.7*i);
// output fragment (render pixel)
gl_FragDepth=z; // depth (optional)
col=vec3(1.0,1.0,1.0)*i; // color
}
I hope I did not make error in partial derivates. Here result:
[Edit1]
Based on your code I see it like this (X-Ray like Blending)
var mat = EmguExtensions.InitMat(new System.Drawing.Size(2000, 1080));
double zz, dz, d, i, di = 0;
const double scalex = 2.0 * Math.PI / mat.Width;
const double scaley = 2.0 * Math.PI / mat.Height;
const double scalez = 2.0 * Math.PI;
uint layerCount = 100; // layers
for (int y = 0; y < mat.Height; y++)
{
double yy = y * scaley; // y position of pixel
for (int x = 0; x < mat.Width; x++)
{
double xx = x * scalex; // x position of pixel
dz = 2.0 * scalez / layerCount; // z step
di = 1.0 / layerCount; // color increment
i = 0.0; // color intensity
for (zz = -scalez; zz <= scalez; zz += dz) // do all layers
{
d = Math.Sin(xx) * Math.Cos(yy); // compute gyroid equation
d += Math.Sin(yy) * Math.Cos(zz);
d += Math.Sin(zz) * Math.Cos(xx);
if (d > 1e-6) continue;
i += di; // if near surface add to color
}
i*=255.0;
mat.SetByte(x, y, (byte)(i));
}
}
I need to develop an algorithm that connects points in a non-linear way, that is, with smooth curves, as in the image below:
The problem is that I can not find the best solution, either using Bezier Curves, Polimonial Interpolation, Curve Adjustment, among others.
In short, I need a formula that interpolates the points according to the figure above, generating N intermediate points between one coordinate and another.
In the image above, the first coordinate (c1) is (x = 1, y = 220) and the second (c2) is (x = 2, y = 40).
So if I want to create for example 4 intermediate coordinates between c1 and c2 I will have to get an array (x, y) of 4 elements something like this:
[1.2, 180], [1.4, 140], [1.6, 120], [1.8, 80]
Would anyone have any ideas?
I think any Piecewise curve interpolation should do it. Here small C++ example:
//---------------------------------------------------------------------------
const int n=7; // points
const int n2=n+n;
float pnt[n2]= // points x,y ...
{
1.0, 220.0,
2.0, 40.0,
3.0,-130.0,
4.0,-170.0,
5.0,- 40.0,
6.0, 90.0,
7.0, 110.0,
};
//---------------------------------------------------------------------------
void getpnt(float *p,float t) // t = <0,n-1>
{
int i,ii;
float *p0,*p1,*p2,*p3,a0,a1,a2,a3,d1,d2,tt,ttt;
// handle t out of range
if (t<= 0.0f){ p[0]=pnt[0]; p[1]=pnt[1]; return; }
if (t>=float(n-1)){ p[0]=pnt[n2-2]; p[1]=pnt[n2-1]; return; }
// select patch
i=floor(t); // start point of patch
t-=i; // parameter <0,1>
i<<=1; tt=t*t; ttt=tt*t;
// control points
ii=i-2; if (ii<0) ii=0; if (ii>=n2) ii=n2-2; p0=pnt+ii;
ii=i ; if (ii<0) ii=0; if (ii>=n2) ii=n2-2; p1=pnt+ii;
ii=i+2; if (ii<0) ii=0; if (ii>=n2) ii=n2-2; p2=pnt+ii;
ii=i+4; if (ii<0) ii=0; if (ii>=n2) ii=n2-2; p3=pnt+ii;
// loop all dimensions
for (i=0;i<2;i++)
{
// compute polynomial coeficients
d1=0.5*(p2[i]-p0[i]);
d2=0.5*(p3[i]-p1[i]);
a0=p1[i];
a1=d1;
a2=(3.0*(p2[i]-p1[i]))-(2.0*d1)-d2;
a3=d1+d2+(2.0*(-p2[i]+p1[i]));
// compute point coordinate
p[i]=a0+(a1*t)+(a2*tt)+(a3*ttt);
}
}
//---------------------------------------------------------------------------
void gl_draw()
{
glClearColor(1.0,1.0,1.0,1.0);
glClear(GL_COLOR_BUFFER_BIT);
glDisable(GL_DEPTH_TEST);
glDisable(GL_TEXTURE_2D);
// set 2D view
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glScalef(1.0/5.0,1.0/500.0,1.0);
glTranslatef(-4.0,0.0,0.0);
// render lines
glColor3f(1.0,0.0,0.0);
glBegin(GL_LINE_STRIP);
float p[2],t;
for (t=0.0;t<=float(n-1);t+=0.1f)
{
getpnt(p,t);
glVertex2fv(p);
}
glEnd();
// render points
glPointSize(4.0);
glColor3f(0.0,0.0,1.0);
glBegin(GL_POINTS);
for (int i=0;i<n2;i+=2) glVertex2fv(pnt+i);
glEnd();
glPointSize(1.0);
glFinish();
SwapBuffers(hdc);
}
//---------------------------------------------------------------------------
Here preview:
As you can see it is simple you just need n control points pnt (I extracted from your graph) and just interpolate ... The getpnt functions will compute any point on the curve addressed by parameter t=<0,n-1>. Internally it just select which cubic patch to use and compute as single cubic curve. In gl_draw you can see how to use it to obtain the points in between.
As your control points are uniformly distributed on the x axis:
x = <1,7>
t = <0,6>
I can write:
x = t+1
t = x-1
so you can compute any point for any x too...
The shape does not match your graph perfectly because the selected control points are not the correct ones. Any local minimum/maximum should be a control point and sometimes is safer to use also inflex points too. The starting and ending shape of the curve suggest hidden starting and ending control point which is not showed on the graph. You can use any number of points you need but beware if you break the x uniform distribution then you lose the ability to compute t from x directly!
As we do not know how the graph was created we can only guess ...
If linear interpolation happens during the rasterization stage in the OpenGL pipeline, and the vertices have already been transformed to screen-space, where does the depth information used for perspectively correct interpolation come from?
Can anybody give a detailed description of how OpenGL goes from screen-space primitives to fragments with correctly interpolated values?
The output of a vertex shader is a four component vector, vec4 gl_Position. From Section 13.6 Coordinate Transformations of core GL 4.4 spec:
Clip coordinates for a vertex result from shader execution, which yields a vertex coordinate gl_Position.
Perspective division on clip coordinates yields normalized device coordinates, followed by a viewport transformation (see section 13.6.1) to convert these coordinates into window coordinates.
OpenGL does the perspective divide as
device.xyz = gl_Position.xyz / gl_Position.w
But then keeps the 1 / gl_Position.w as the last component of gl_FragCoord:
gl_FragCoord.xyz = device.xyz scaled to viewport
gl_FragCoord.w = 1 / gl_Position.w
This transform is bijective, so no depth information is lost. In fact as we see below, the 1 / gl_Position.w is crucial for perspective correct interpolation.
Short introduction to barycentric coordinates
Given a triangle (P0, P1, P2) one can parametrize all the points inside the triangle by the linear combinations of the vertices:
P(b0,b1,b2) = P0*b0 + P1*b1 + P2*b2
where b0 + b1 + b2 = 1 and b0 ≥ 0, b1 ≥ 0, b2 ≥ 0.
Given a point P inside the triangle, the coefficients (b0, b1, b2) that satisfy the equation above are called the barycentric coordinates of that point. For non-degenerate triangles they are unique, and can be calculated as quotients of the areas of the following triangles:
b0(P) = area(P, P1, P2) / area(P0, P1, P2)
b1(P) = area(P0, P, P2) / area(P0, P1, P2)
b2(P) = area(P0, P1, P) / area(P0, P1, P2)
Each bi can be thought of as 'how much of Pi has to be mixed in'. So b = (1,0,0), (0,1,0) and (0,0,1) are the vertices of the triangle, (1/3, 1/3, 1/3) is the barycenter, and so on.
Given an attribute (f0, f1, f2) on the vertices of the triangle, we can now interpolate it over the interior:
f(P) = f0*b0(P) + f1*b1(P) + f2*b2(P)
This is a linear function of P, therefore it is the unique linear interpolant over the given triangle. The math also works in either 2D or 3D.
Perspective correct interpolation
Let's say we fill a projected 2D triangle on the screen. For every fragment we have its window coordinates. First we calculate its barycentric coordinates by inverting the P(b0,b1,b2) function, which is a linear function in window coordinates. This gives us the barycentric coordinates of the fragment on the 2D triangle projection.
Perspective correct interpolation of an attribute would vary linearly in the clip coordinates (and by extension, world coordinates). For that we need to get the barycentric coordinates of the fragment in clip space.
As it happens (see [1] and [2]), the depth of the fragment is not linear in window coordinates, but the depth inverse (1/gl_Position.w) is. Accordingly the attributes and the clip-space barycentric coordinates, when weighted by the depth inverse, vary linearly in window coordinates.
Therefore, we compute the perspective corrected barycentric by:
( b0 / gl_Position[0].w, b1 / gl_Position[1].w, b2 / gl_Position[2].w )
B = -------------------------------------------------------------------------
b0 / gl_Position[0].w + b1 / gl_Position[1].w + b2 / gl_Position[2].w
and then use it to interpolate the attributes from the vertices.
Note: GL_NV_fragment_shader_barycentric exposes the device-linear barycentric coordinates through gl_BaryCoordNoPerspNV and the perspective corrected through gl_BaryCoordNV.
Implementation
Here is a C++ code that rasterizes and shades a triangle on the CPU, in a manner similar to OpenGL. I encourage you to compare it with the shaders listed below:
struct Renderbuffer { int w, h, ys; void *data; };
struct Vert { vec4 position, texcoord, color; };
struct Varying { vec4 texcoord, color; };
void vertex_shader(const Vert &in, vec4 &gl_Position, Varying &OUT) {
OUT.texcoord = in.texcoord;
OUT.color = in.color;
gl_Position = vec4(in.position.x, in.position.y, -2*in.position.z - 2*in.position.w, -in.position.z);
}
void fragment_shader(vec4 &gl_FragCoord, const Varying &IN, vec4 &OUT) {
OUT = IN.color;
vec2 wrapped = IN.texcoord.xy - floor(IN.texcoord.xy);
bool brighter = (wrapped[0] < 0.5) != (wrapped[1] < 0.5);
if(!brighter)
OUT.rgb *= 0.5f;
}
// render output unit/render operations pipeline
void rop(Renderbuffer &buf, int x, int y, const vec4 &c) {
uint8_t *p = (uint8_t*)buf.data + buf.ys*(buf.h - y - 1) + 4*x;
p[0] = linear_to_srgb8(c[0]);
p[1] = linear_to_srgb8(c[1]);
p[2] = linear_to_srgb8(c[2]);
p[3] = lround(c[3]*255);
}
void draw_triangle(Renderbuffer &color_attachment, const box2 &viewport, const Vert *verts) {
auto area = [](const vec2 &p0, const vec2 &p1, const vec2 &p2) { return cross(p1 - p0, p2 - p0); };
auto interpolate = [](const auto a[3], auto p, const vec3 &coord) { return coord.x*a[0].*p + coord.y*a[1].*p + coord.z*a[2].*p; };
Varying perVertex[3];
vec4 gl_Position[3];
box2 aabb = { viewport.hi, viewport.lo };
for(int i = 0; i < 3; ++i) {
vertex_shader(verts[i], gl_Position[i], perVertex[i]);
// convert to normalized device coordinates
gl_Position[i].w = 1/gl_Position[i].w;
gl_Position[i].xyz *= gl_Position[i].w;
// convert to window coordinates
gl_Position[i].xy = mix(viewport.lo, viewport.hi, 0.5f*(gl_Position[i].xy + 1.0f));
aabb = join(aabb, gl_Position[i].xy);
}
const float denom = 1/area(gl_Position[0].xy, gl_Position[1].xy, gl_Position[2].xy);
// loop over all pixels in the rectangle bounding the triangle
const ibox2 iaabb = lround(aabb);
for(int y = iaabb.lo.y; y < iaabb.hi.y; ++y)
for(int x = iaabb.lo.x; x < iaabb.hi.x; ++x)
{
vec4 gl_FragCoord;
gl_FragCoord.xy = vec2(x, y) + 0.5f;
// fragment barycentric coordinates in window coordinates
const vec3 barycentric = denom*vec3(
area(gl_FragCoord.xy, gl_Position[1].xy, gl_Position[2].xy),
area(gl_Position[0].xy, gl_FragCoord.xy, gl_Position[2].xy),
area(gl_Position[0].xy, gl_Position[1].xy, gl_FragCoord.xy)
);
// discard fragment outside the triangle. this doesn't handle edges correctly.
if(barycentric.x < 0 || barycentric.y < 0 || barycentric.z < 0)
continue;
// interpolate inverse depth linearly
gl_FragCoord.z = interpolate(gl_Position, &vec4::z, barycentric);
gl_FragCoord.w = interpolate(gl_Position, &vec4::w, barycentric);
// clip fragments to the near/far planes (as if by GL_ZERO_TO_ONE)
if(gl_FragCoord.z < 0 || gl_FragCoord.z > 1)
continue;
// convert to perspective correct (clip-space) barycentric
const vec3 perspective = 1/gl_FragCoord.w*barycentric*vec3(gl_Position[0].w, gl_Position[1].w, gl_Position[2].w);
// interpolate attributes
Varying varying = {
interpolate(perVertex, &Varying::texcoord, perspective),
interpolate(perVertex, &Varying::color, perspective),
};
vec4 color;
fragment_shader(gl_FragCoord, varying, color);
rop(color_attachment, x, y, color);
}
}
int main(int argc, char *argv[]) {
Renderbuffer buffer = { 512, 512, 512*4 };
buffer.data = calloc(buffer.ys, buffer.h);
// VAO interleaved attributes buffer
Vert verts[] = {
{ { -1, -1, -2, 1 }, { 0, 0, 0, 1 }, { 0, 0, 1, 1 } },
{ { 1, -1, -1, 1 }, { 10, 0, 0, 1 }, { 1, 0, 0, 1 } },
{ { 0, 1, -1, 1 }, { 0, 10, 0, 1 }, { 0, 1, 0, 1 } },
};
box2 viewport = { 0, 0, buffer.w, buffer.h };
draw_triangle(buffer, viewport, verts);
stbi_write_png("out.png", buffer.w, buffer.h, 4, buffer.data, buffer.ys);
}
OpenGL shaders
Here are the OpenGL shaders used to generate the reference image.
Vertex shader:
#version 450 core
layout(location = 0) in vec4 position;
layout(location = 1) in vec4 texcoord;
layout(location = 2) in vec4 color;
out gl_PerVertex { vec4 gl_Position; };
layout(location = 0) out Varying { vec4 texcoord; vec4 color; } OUT;
void main() {
OUT.texcoord = texcoord;
OUT.color = color;
gl_Position = vec4(position.x, position.y, -2*position.z - 2*position.w, -position.z);
}
Fragment shader:
#version 450 core
layout(location = 0) in Varying { vec4 texcoord; vec4 color; } IN;
layout(location = 0) out vec4 OUT;
void main() {
OUT = IN.color;
vec2 wrapped = fract(IN.texcoord.xy);
bool brighter = (wrapped.x < 0.5) != (wrapped.y < 0.5);
if(!brighter)
OUT.rgb *= 0.5;
}
Results
Here are the almost identical images generated by the C++ (left) and OpenGL (right) code:
The differences are caused by different precision and rounding modes.
For comparison, here is one that is not perspective correct (uses barycentric instead of perspective for the interpolation in the code above):
The formula that you will find in the GL specification (look on page 427; the link is the current 4.4 spec, but it has always been that way) for perspective-corrected interpolation of the attribute value in a triangle is:
a * f_a / w_a + b * f_b / w_b + c * f_c / w_c
f=-----------------------------------------------------
a / w_a + b / w_b + c / w_c
where a,b,c denote the barycentric coordinates of the point in the triangle we are interpolating for (a,b,c >=0, a+b+c = 1), f_i the attribute value at vertex i, and w_i the clip space w coordinate of vertex i. Note that the barycentric coordinates are calculated only for the 2D projection of the window space coords of the triangle (so z is ignored).
This is what the formulas that ybungalowbill gave in his fine answer boils down to, in the general case, with an arbitrary projection axis. Actually, the last row of the projection matrix defines just the projection axis the image plane will be orthogonal to, and the clip space w component is just the dot product between the vertex coords and that axis.
In the typical case, the projection matrix has (0,0,-1,0) as the last row, so it transfroms so that w_clip = -z_eye, and this is what ybungalowbill used. However, since w is what we actually will do the division by (that is the only nonlinear step in the whole transformation chain), this will work for any projection axis. It will also work in the trivial case of orthogonal projections where w is always 1 (or at least constant).
Note a few things for an efficient implementation of this. The inversion 1/w_i can be pre-calculated per vertex (let's call them q_i in the following), it does not have to be re-evaluated per fragment. And it is totally free since we divide by w anyway, when going into NDC space, so we can save that value. The GL spec does never describe how a certain feature is to be implemented internally, but the fact that the screen space coordinates will be accessible in glFragCoord.xyz, and gl_FragCoord.w is guaranteed to give the (lineariliy interpolated) 1/w clip space coordinate is quite revealing here. That per-fragment 1_w value is actually the denominator of the formula given above.
The factors a/w_a, b/w_b and c/w_c are each used two times in the formula. And these are also constant for any attribute value, now matter how many attributes there are to be interpolated. So, per fragment, you can calculate a'=q_a * a, b'=q_b * b and c'=q_c and get
a' * f_a + b' * f_b + c' * f_c
f=------------------------------
a' + b' + c'
So the perspective interpolation boils down to
3 additional multiplications,
2 additional additions, and
1 additional division
per fragment.
I have a Sphere structure that looks like this
struct Sphere {
vec3 _center;
float _radius;
};
How do I apply a 4x4 transformation matrix to that sphere? The matrix may contain a scale factor, a rotation (which will obviously will not affect the sphere) and a translation.
The current approach I'm using contains three length() methods (that have sqrt() in them) which are pretty slow.
glm::vec3 extractTranslation(const glm::mat4 &m)
{
glm::vec3 translation;
// Extract the translation
translation.x = m[3][0];
translation.y = m[3][1];
translation.z = m[3][2];
return translation;
}
glm::vec3 extractScale(const glm::mat4 &m) //should work only if matrix is calculated as M = T * R * S
{
glm::vec3 scale;
scale.x = glm::length( glm::vec3(m[0][0], m[0][1], m[0][2]) );
scale.y = glm::length( glm::vec3(m[1][0], m[1][1], m[1][2]) );
scale.z = glm::length( glm::vec3(m[2][0], m[2][1], m[2][2]) );
return scale;
}
float extractLargestScale(const glm::mat4 &m)
{
glm::vec3 scale = extractScale(m);
return glm::max(scale.x, glm::max(scale.y, scale.z));
}
void Sphere::applyTransformation(const glm::mat4 &transformation)
{
glm::vec4 center = transformation * glm::vec4(_center, 1.0f);
float largestScale = extractLargestScale(transformation);
set(glm::vec3(center)/* / center.w */, _radius * largestScale);
}
I wonder if anyone knows of a more efficient way to do this?
This is a question about efficiency and specifically to avoid doing the square root. One idea would be to defer doing the square root until the last moment. Since length and length squared are increasing functions starting at 0, comparing length squared is the same as comparing length. So you could avoid the three calls to length and make it one.
#include <glm/gtx/norm.hpp>
#include <algorithm>
glm::vec3 extractScale(const glm::mat4 &m)
{
// length2 returns length squared i.e. v·v
// no square root involved
return glm::vec3(glm::length2( glm::vec3(m[0]) ),
glm::length2( glm::vec3(m[1]) ),
glm::length2( glm::vec3(m[2]) ));
}
void Sphere::applyTransformation(const glm::mat4 &transformation)
{
glm::vec4 center = transformation * glm::vec4(_center, 1.0f);
glm::vec3 scalesSq = extractScale(transformation);
float const maxScaleSq = std::max_element(&scalesSq[0], &scalesSq[0] + scalesSq.length()); // length gives the dimension here i.e. 3
// one sqrt when you know the largest of the three
float const largestScale = std::sqrt(maxScaleSq);
set(glm::vec3(center), _radius * largestScale);
}
Aside:
A non-uniform scale means the scaling ratios along the different axes aren't the same. E.g. S1, 2, 4 is non-uniform while S2, 2, 2 is uniform. See this intuitive primer on transformations to understand them better; it has animations to demonstrate such differences.
Can the scale be non-uniform too? From the code it looks like it could. Transforming the radius with the largest scale isn't right. If you'd a non-uniform scale, the sphere would actually become an ellipsoid and hence just scaling the radius isn't correct. You'd have to transform the sphere into an ellipsoid with semi-principle axes of differing lengths.
I'm working on a shader that generates little clouds based on some mask images. Right now it works well, but i feel the result is missing something, and i thought a blur would be nice. I remember a basic blur algorithm where you have to apply a convolution with a matrix of norm 1 (the bigger the matrix the greater the result) and an image. The thing is, I don't know how to treat the current outcome of the shader as an image. So basically I want to keep the shader as is, but getting it blurry. Any ideas?, how can I integrate the convolution algorithm to the shader? Or does anyone know of other algorithm?
Cg code:
float Luminance( float4 Color ){
return 0.6 * Color.r + 0.3 * Color.g + 0.1 * Color.b;
}
struct v2f {
float4 pos : SV_POSITION;
float2 uv_MainTex : TEXCOORD0;
};
float4 _MainTex_ST;
v2f vert(appdata_base v) {
v2f o;
o.pos = mul(UNITY_MATRIX_MVP, v.vertex);
o.uv_MainTex = TRANSFORM_TEX(v.texcoord, _MainTex);
return o;
}
sampler2D _MainTex;
sampler2D _Gradient;
sampler2D _NoiseO;
sampler2D _NoiseT;
float4 frag(v2f IN) : COLOR {
half4 nO = tex2D (_NoiseO, IN.uv_MainTex);
half4 nT = tex2D (_NoiseT, IN.uv_MainTex);
float4 turbulence = nO + nT;
float lum = Luminance(turbulence);
half4 c = tex2D (_MainTex, IN.uv_MainTex);
if (lum >= 1.0f){
float pos = lum - 1.0f;
if( pos > 0.98f ) pos = 0.98f;
if( pos < 0.02f ) pos = 0.02f;
float2 texCord = (pos, pos);
half4 turb = tex2D (_Gradient, texCord);
//turb.a = 0.0f;
return turb;
}
else return c;
}
It appears to me that this shader is emulating alpha testing between a backbuffer-like texture (passed via the sampler2D _MainTex) and a generated cloud luminance (represented by float lum) mapped onto a gradient. This makes things trickier because you can't just fake a blur and let alpha blending take care of the rest. You'll also need to change your alpha testing routine to emulate an alpha blend instead or restructure your rendering pipeline accordingly. We'll deal with blurring the clouds first.
The first question you need to ask yourself is if you need a screen-space blur. Seeing the mechanics of this fragment shader, I would think not -- you want to blur the clouds on the actual model. Given this, it should be sufficient to blur the underlying textures and result in a blurred result -- except you're emulating alpha clipping, so you'll get rough edges. The question is what to do about those rough edges. That's where alpha blending comes in.
You can emulate alpha blending by using a lerp (linear interpolation) between the turb color and c color with lerp() function (depending on which shader language you're using). You'll probably want something that looks like return lerp(c, turb, 1 - pos); instead of return turb; ... I'd expect you'll want to tweak this continually until you understand and start getting the results you want. (For example, you may prefer lerp(c, turb, 1 - pow(pos,4)))
In fact, you can try this last step (just adding the lerp) before modifying your textures to get an idea of what the alpha blending will do for you.
Edit: I hadn't considered the case where the _NoiseO and _NoiseT samplers were changing continually, so simply telling you to blur them was minimally useful advice. You can emulate blurring by using a multi-tap filter. The most simple way is to take uniformly spaced samples, weight them, and sum them together resulting in your final color. (Typically you'll want the weights themselves to sum to 1.)
This being said, you may or may not way to do this on the _NoiseO and _NoiseT textures themselves -- you may want to create a screen-space blur instead which may look more interesting to a viewer. In this case, the same concept applies, but you need to do the calculations for the offset coordinates for each tap and then perform a weighted summation.
For example if we were going with the first case and we wanted to sample from the _Noise0 sampler and blur it slightly, we could use this box filter (where all the weights are the same and sum to 1, thus performing an average):
// Untested code.
half4 nO = 0.25 * tex2D(_Noise0, IN.uv_MainTex + float2( 0, 0))
+ 0.25 * tex2D(_Noise0, IN.uv_MainTex + float2( 0, g_offset.y))
+ 0.25 * tex2D(_Noise0, IN.uv_MainTex + float2(g_offset.x, 0))
+ 0.25 * tex2D(_Noise0, IN.uv_MainTex + float2(g_offset.x, g_offset.y))
Alternatively, if we wanted the entire cloud output to appear blurry we'd wrap the cloud generation portion in a function and call it instead of tex2D() for the taps.
// More untested code.
half4 genCloud(float2 tc) {
half4 nO = tex2D (_NoiseO, IN.uv_MainTex);
half4 nT = tex2D (_NoiseT, IN.uv_MainTex);
float4 turbulence = nO + nT;
float lum = Luminance(turbulence);
float pos = lum - 1.0;
if( pos > 0.98f ) pos = 0.98f;
if( pos < 0.02f ) pos = 0.02f;
float2 texCord = (pos, pos);
half4 turb = tex2D (_Gradient, texCord);
// Figure out how you'd generate your alpha blending constant here for your lerp
turb.a = ACTUAL_ALPHA;
return turb;
}
And the multi-tap filtering would look like:
// And even more untested code.
half4 cloudcolor = 0.25 * genCloud(IN.uv_MainTex + float2( 0, 0))
+ 0.25 * genCloud(IN.uv_MainTex + float2( 0, g_offset.y))
+ 0.25 * genCloud(IN.uv_MainTex + float2(g_offset.x, 0))
+ 0.25 * genCloud(IN.uv_MainTex + float2(g_offset.x, g_offset.y))
return lerp(c, cloudcolor, cloudcolor.a);
However doing this is going to be relatively slow for calculations if you make the cloud function too complex. If you're bound by raster operations and texture reads (transferring texture/buffer data to and from memory) chances are this won't matter much unless you use a much more advanced blurring technique (such successful downsampling through ping-ponged buffers, useful for blurs/filters that are expensive because they have lots of taps). But performance is another entire consideration from just getting the look you want.