WEBGL compilation error on windows (ANGLE d3d9): indefinite logarithm

WEBGL compilation error on windows (ANGLE d3d9): indefinite logarithm - windows

My fragment shader doesn't compile on some windows machines. It complains about an "indefinite logarithm" that my shader doesn't contain.
Is there anything else than an indefinite logarithm that could trigger this error?
Live demo:
http://jsfiddle.net/w1r76rxm/
My fragment shader:
"#define GLSLIFY 1
uniform int uTextureSize;
uniform float uWindowCenterWidth[2];
uniform float uRescaleSlopeIntercept[2];
uniform sampler2D uTextureContainer[7];
uniform ivec3 uDataDimensions;
uniform mat4 uWorldToData;
uniform int uNumberOfChannels;
uniform int uPixelType;
uniform int uBitsAllocated;
uniform int uInvert;
// hack because can not pass arrays if too big
// best would be to pass texture but have to deal with 16bits
uniform int uLut;
uniform sampler2D uTextureLUT;
varying vec4 vPos;
// include functions
// unpack int 8
float uInt8(in float r){
return r * 256.;
}
// unpack int 16
float uInt16(in float r, in float a){
return r * 256. + a * 65536.;
}
// unpack int 32
float uInt32(in float r, in float g, in float b, in float a){
return r * 256. + g * 65536. + b * 16777216. + a * 4294967296.;
}
// unpack float 32
float uFloat32(in float r, in float g, in float b, in float a){
// create arrays containing bits for rgba values
// value between 0 and 255
float value = r * 255.;
int bytemeR[8];
bytemeR[0] = int(floor(value / 128.));
value -= float(bytemeR[0] * 128);
bytemeR[1] = int(floor(value / 64.));
value -= float(bytemeR[1] * 64);
bytemeR[2] = int(floor(value / 32.));
value -= float(bytemeR[2] * 32);
bytemeR[3] = int(floor(value / 16.));
value -= float(bytemeR[3] * 16);
bytemeR[4] = int(floor(value / 8.));
value -= float(bytemeR[4] * 8);
bytemeR[5] = int(floor(value / 4.));
value -= float(bytemeR[5] * 4);
bytemeR[6] = int(floor(value / 2.));
value -= float(bytemeR[6] * 2);
bytemeR[7] = int(floor(value));
value = g * 255.;
int bytemeG[8];
bytemeG[0] = int(floor(value / 128.));
value -= float(bytemeG[0] * 128);
bytemeG[1] = int(floor(value / 64.));
value -= float(bytemeG[1] * 64);
bytemeG[2] = int(floor(value / 32.));
value -= float(bytemeG[2] * 32);
bytemeG[3] = int(floor(value / 16.));
value -= float(bytemeG[3] * 16);
bytemeG[4] = int(floor(value / 8.));
value -= float(bytemeG[4] * 8);
bytemeG[5] = int(floor(value / 4.));
value -= float(bytemeG[5] * 4);
bytemeG[6] = int(floor(value / 2.));
value -= float(bytemeG[6] * 2);
bytemeG[7] = int(floor(value));
value = b * 255.;
int bytemeB[8];
bytemeB[0] = int(floor(value / 128.));
value -= float(bytemeB[0] * 128);
bytemeB[1] = int(floor(value / 64.));
value -= float(bytemeB[1] * 64);
bytemeB[2] = int(floor(value / 32.));
value -= float(bytemeB[2] * 32);
bytemeB[3] = int(floor(value / 16.));
value -= float(bytemeB[3] * 16);
bytemeB[4] = int(floor(value / 8.));
value -= float(bytemeB[4] * 8);
bytemeB[5] = int(floor(value / 4.));
value -= float(bytemeB[5] * 4);
bytemeB[6] = int(floor(value / 2.));
value -= float(bytemeB[6] * 2);
bytemeB[7] = int(floor(value));
value = a * 255.;
int bytemeA[8];
bytemeA[0] = int(floor(value / 128.));
value -= float(bytemeA[0] * 128);
bytemeA[1] = int(floor(value / 64.));
value -= float(bytemeA[1] * 64);
bytemeA[2] = int(floor(value / 32.));
value -= float(bytemeA[2] * 32);
bytemeA[3] = int(floor(value / 16.));
value -= float(bytemeA[3] * 16);
bytemeA[4] = int(floor(value / 8.));
value -= float(bytemeA[4] * 8);
bytemeA[5] = int(floor(value / 4.));
value -= float(bytemeA[5] * 4);
bytemeA[6] = int(floor(value / 2.));
value -= float(bytemeA[6] * 2);
bytemeA[7] = int(floor(value));
// compute float32 value from bit arrays
// sign
int issigned = int(pow(-1., float(bytemeR[0])));
// exponent
int exponent = 0;
exponent += bytemeR[1] * int(pow(2., 7.));
exponent += bytemeR[2] * int(pow(2., 6.));
exponent += bytemeR[3] * int(pow(2., 5.));
exponent += bytemeR[4] * int(pow(2., 4.));
exponent += bytemeR[5] * int(pow(2., 3.));
exponent += bytemeR[6] * int(pow(2., 2.));
exponent += bytemeR[7] * int(pow(2., 1.));
exponent += bytemeG[0];
// fraction
float fraction = 0.;
fraction = float(bytemeG[1]) * pow(2., -1.);
fraction += float(bytemeG[2]) * pow(2., -2.);
fraction += float(bytemeG[3]) * pow(2., -3.);
fraction += float(bytemeG[4]) * pow(2., -4.);
fraction += float(bytemeG[5]) * pow(2., -5.);
fraction += float(bytemeG[6]) * pow(2., -6.);
fraction += float(bytemeG[7]) * pow(2., -7.);
fraction += float(bytemeB[0]) * pow(2., -8.);
fraction += float(bytemeB[1]) * pow(2., -9.);
fraction += float(bytemeB[2]) * pow(2., -10.);
fraction += float(bytemeB[3]) * pow(2., -11.);
fraction += float(bytemeB[4]) * pow(2., -12.);
fraction += float(bytemeB[5]) * pow(2., -13.);
fraction += float(bytemeB[6]) * pow(2., -14.);
fraction += float(bytemeB[7]) * pow(2., -15.);
fraction += float(bytemeA[0]) * pow(2., -16.);
fraction += float(bytemeA[1]) * pow(2., -17.);
fraction += float(bytemeA[2]) * pow(2., -18.);
fraction += float(bytemeA[3]) * pow(2., -19.);
fraction += float(bytemeA[4]) * pow(2., -20.);
fraction += float(bytemeA[5]) * pow(2., -21.);
fraction += float(bytemeA[6]) * pow(2., -22.);
fraction += float(bytemeA[7]) * pow(2., -23.);
return float(issigned) * pow( 2., float(exponent - 127)) * (1. + fraction);
}
// entry point for the unpack function
vec4 unpack( vec4 packedRGBA,
int bitsAllocated,
int signedNumber,
int numberOfChannels,
int pixelType) {
// always return a vec4
vec4 unpacked = vec4(0, 0, 0, 0);
if(numberOfChannels == 1){
if(bitsAllocated == 8 || bitsAllocated == 1){
unpacked.x = uInt8(
packedRGBA.r);
}
else if(bitsAllocated == 16){
unpacked.x = uInt16(
packedRGBA.r,
packedRGBA.a);
}
else if(bitsAllocated == 32){
if(pixelType == 0){
unpacked.x = uInt32(
packedRGBA.r,
packedRGBA.g,
packedRGBA.b,
packedRGBA.a);
}
else{
unpacked.x = uFloat32(
packedRGBA.r,
packedRGBA.g,
packedRGBA.b,
packedRGBA.a);
}
}
}
else if(numberOfChannels == 3){
unpacked = packedRGBA;
}
return unpacked;
}
// Support up to textureSize*textureSize*7 voxels
vec4 texture3DPolyfill(ivec3 dataCoordinates,
ivec3 dataDimensions,
int textureSize,
sampler2D textureContainer0,
sampler2D textureContainer1,
sampler2D textureContainer2,
sampler2D textureContainer3,
sampler2D textureContainer4,
sampler2D textureContainer5,
sampler2D textureContainer6,
sampler2D textureContainer[7] // not working on Moto X 2014
) {
// Model coordinate to data index
int index = dataCoordinates.x
+ dataCoordinates.y * dataDimensions.x
+ dataCoordinates.z * dataDimensions.y * dataDimensions.x;
// Map data index to right sampler2D texture
int voxelsPerTexture = textureSize*textureSize;
int textureIndex = int(floor(float(index) / float(voxelsPerTexture)));
// modulo seems incorrect sometimes...
// int inTextureIndex = int(mod(float(index), float(textureSize*textureSize)));
int inTextureIndex = index - voxelsPerTexture*textureIndex;
bug
// Get row and column in the texture
int colIndex = int(mod(float(inTextureIndex), float(textureSize)));
int rowIndex = int(floor(float(inTextureIndex)/float(textureSize)));
// Map row and column to uv
vec2 uv = vec2(0,0);
uv.x = (0.5 + float(colIndex)) / float(textureSize);
uv.y = 1. - (0.5 + float(rowIndex)) / float(textureSize);
//
vec4 dataValue = vec4(0., 0., 0., 0.);
if(textureIndex == 0){ dataValue = texture2D(textureContainer0, uv); }
else if(textureIndex == 1){dataValue = texture2D(textureContainer1, uv);}
else if(textureIndex == 2){ dataValue = texture2D(textureContainer2, uv); }
else if(textureIndex == 3){ dataValue = texture2D(textureContainer3, uv); }
else if(textureIndex == 4){ dataValue = texture2D(textureContainer4, uv); }
else if(textureIndex == 5){ dataValue = texture2D(textureContainer5, uv); }
else if(textureIndex == 6){ dataValue = texture2D(textureContainer6, uv); }
return dataValue;
}
void main(void) {
// get texture coordinates of current pixel
// doesn't need that in theory
vec4 dataCoordinatesRaw = uWorldToData * vPos;
// rounding trick
// first center of first voxel in data space is CENTERED on (0,0,0)
dataCoordinatesRaw += 0.5;
ivec3 dataCoordinates = ivec3(int(floor(dataCoordinatesRaw.x)), int(floor(dataCoordinatesRaw.y)), int(floor(dataCoordinatesRaw.z)));
// index 100
// dataCoordinates.x = 26; //25
// dataCoordinates.y = 1;
// dataCoordinates.z = 0;
// if data in range, look it up in the texture!
if ( all(greaterThanEqual(dataCoordinates, ivec3(0))) &&
all(lessThan(dataCoordinates, uDataDimensions))) {
vec4 packedValue = texture3DPolyfill(
dataCoordinates,
uDataDimensions,
uTextureSize,
uTextureContainer[0],
uTextureContainer[1],
uTextureContainer[2],
uTextureContainer[3],
uTextureContainer[4],
uTextureContainer[5],
uTextureContainer[6],
uTextureContainer // not working on Moto X 2014
);
vec4 dataValue = unpack(
packedValue,
uBitsAllocated,
0,
uNumberOfChannels,
uPixelType);
// how do we deal wil more than 1 channel?
if(uNumberOfChannels == 1){
float intensity = dataValue.r;
// rescale/slope
intensity = intensity*uRescaleSlopeIntercept[0] + uRescaleSlopeIntercept[1];
// window level
// if(intensity < 2000.){
// gl_FragColor = vec4(1.0, 0., 0., 1.);
//return;
// }
float windowMin = uWindowCenterWidth[0] - uWindowCenterWidth[1] * 0.5;
float windowMax = uWindowCenterWidth[0] + uWindowCenterWidth[1] * 0.5;
intensity = ( intensity - windowMin ) / uWindowCenterWidth[1];
dataValue.r = dataValue.g = dataValue.b = intensity;
}
// Apply LUT table...
//
if(uLut == 1){
// should opacity be grabbed there?
dataValue = texture2D( uTextureLUT, vec2( dataValue.r , 1.0) );
}
if(uInvert == 1){
dataValue = vec4(1.) - dataValue;
// how do we deal with that and opacity?
dataValue.a = 1.;
}
gl_FragColor = dataValue;
}
else{
// should be able to choose what we want to do if not in range:
// discard or specific color
discard;
gl_FragColor = vec4(0.011, 0.662, 0.956, 1.0);
}
}"
Github issue: https://github.com/FNNDSC/ami/issues/39
Best,
Nicolas

int issigned = int(pow(-1., float(bytemeR[0])));
is invalid and should be replaced by:
int issigned = 1 - 2 * bytemeR[0];

Related

Why is matrix multiplication row x row 4-5 times slower than row x column on Mali's GPU？

Recently, I encountered a problem when using computer shader to develop matrix multiplication. A common matrix multiplication C = AB. in order to make the memory continuous, I transposed the B matrix. I think this can speed up the running speed. However, when I measured the speed, I found that the form of line X was several times slower than that of line X. I explored it for a long time and couldn't understand it, so I wrote down the problem for help!!!
My environment Mali G77 (MediaTek Dimensity 1200)
A matrix dimension: 4x2048x2048
B matrix dimension: 4x2048x2048
Time comparison:
Row x row: About 9s
Row x column: about 1.6s
Column x column: about 3.3s
question demo:https://github.com/yikox/ProfilerDemo
shader code:
//computer shader
#version 310 es
#define XLOCAL 8
#define YLOCAL 8
#define ZLOCAL 1
layout(binding = 0) writeonly buffer soutput{
vec4 data[];
} uOutput;
layout(binding = 1) readonly buffer sinput0{
vec4 data[];
} uInput0;
layout(binding = 2) readonly buffer sinput1{
vec4 data[];
} uInput1;
layout(location=3) uniform ivec4 uInputSize0;
layout(location=4) uniform ivec4 uInputSize1;
layout(location=5) uniform ivec4 uOutputSize;
layout (local_size_x = XLOCAL, local_size_y = YLOCAL, local_size_z = ZLOCAL) in;
//矩阵A和矩阵B相乘的某一列的第I个元素
vec4 PixelMul(int i, ivec3 pos)
{
// 行x行
// vec4 data0 = uInput0.data[i + pos.y * uInputSize0.x + pos.z * uInputSize0.x * uInputSize0.y];
// vec4 data1 = uInput1.data[i + pos.x * uInputSize1.y + pos.z * uInputSize1.x * uInputSize1.y];
// 行x列
// vec4 data0 = uInput0.data[i + pos.y * uInputSize0.x + pos.z * uInputSize0.x * uInputSize0.y];
// vec4 data1 = uInput1.data[pos.x + i * uInputSize1.y + pos.z * uInputSize1.x * uInputSize1.y];
// 列x列
vec4 data0 = uInput0.data[pos.y + i * uInputSize0.x + pos.z * uInputSize0.x * uInputSize0.y];
vec4 data1 = uInput1.data[pos.x + i * uInputSize1.y + pos.z * uInputSize1.x * uInputSize1.y];
return data0 * data1;
}
void main()
{
ivec3 pos = ivec3(gl_GlobalInvocationID) * ivec3(2, 2, 1);
if(all(lessThan(pos, uOutputSize.xyz)))
{
vec4 outData00 = vec4(0);
vec4 outData01 = vec4(0);
vec4 outData10 = vec4(0);
vec4 outData11 = vec4(0);
for(int i = 0; i < uInputSize0.x; i++)
{
outData00 += PixelMul(i, pos + ivec3(0, 0, 0));
outData01 += PixelMul(i, pos + ivec3(1, 0, 0));
outData10 += PixelMul(i, pos + ivec3(0, 1, 0));
outData11 += PixelMul(i, pos + ivec3(1, 1, 0));
}
uOutput.data[pos.x + 0 + (pos.y + 0) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData00;
uOutput.data[pos.x + 1 + (pos.y + 0) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData01;
uOutput.data[pos.x + 0 + (pos.y + 1) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData10;
uOutput.data[pos.x + 1 + (pos.y + 1) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData11;
}
}

What is this called and how to achieve! Visuals in processing

Hey does anyone know how to achieve this effect using processing or what this is called?
I have been trying to use the wave gradient example in the processing library and implementing Perlin noise but I can not get close to the gif quality.
I know the artist used processing but can not figure out how!
Link to gif:
https://giphy.com/gifs/processing-jodeus-QInYLzY33wMwM

The effect is reminescent of Op Art (optical illusion art): I recommend reading/learning more about this fascinating genre and artists like:
Bridget Riley
(Bridget Riley, Intake, 1964)
(Bridget Riley, Hesistate, 1964,
Copyright: (c) Bridget Riley 2018. All rights reserved. / Photo (c) Tate)
Victor Vasarely
(Victor Vasarely, Zebra Couple)
(Victor Vasarely, VegaII)
Frank Stella
(Frank Stella, Untitled 1965, Image curtesy of Art Gallery NSW)
and more
You notice this waves are reminiscent/heavily inspired by Bridget Riley's work.
I also recommend checking out San Charoenchai;s album visualiser for Beach House - 7
As mentioned in my comment: you should post your attempt.
Waves and perlin noise could work for sure.
There are many ways to achieve a similar look.
Here's tweaked version of Daniel Shiffman's Noise Wave example:
int numWaves = 24;
float[] yoff = new float[numWaves]; // 2nd dimension of perlin noise
float[] yoffIncrements = new float[numWaves];
void setup() {
size(640, 360);
noStroke();
for(int i = 0 ; i < numWaves; i++){
yoffIncrements[i] = map(i, 0, numWaves - 1, 0.01, 0.03);
}
}
void draw() {
background(0);
float waveHeight = height / numWaves;
for(int i = 0 ; i < numWaves; i++){
float waveY = i * waveHeight;
fill(i % 2 == 0 ? color(255) : color(0));
// We are going to draw a polygon out of the wave points
beginShape();
float xoff = 0; // Option #1: 2D Noise
// float xoff = yoff; // Option #2: 1D Noise
// Iterate over horizontal pixels
for (float x = 0; x <= width + 30; x += 20) {
// Calculate a y value according to noise, map to
float y = map(noise(xoff, yoff[i]), 0, 1, waveY , waveY + (waveHeight * 3)); // Option #1: 2D Noise
// float y = map(noise(xoff), 0, 1, 200,300); // Option #2: 1D Noise
// Set the vertex
vertex(x, y);
// Increment x dimension for noise
xoff += 0.05;
}
// increment y dimension for noise
yoff[i] += yoffIncrements[i];
vertex(width, height);
vertex(0, height);
endShape(CLOSE);
}
}
Notice the quality of the noise wave in comparison to the image you're trying to emulate: there is a constant rhythm to it. To me that is a hint that it's using cycling sine waves changing phase and amplitude (potentially even adding waves together).
I've written an extensive answer on animating sine waves here
(Reuben Margolin's kinectic sculpture system demo)
From your question it sounds like you would be comfortable implementing a sine wave animation. It it helps, here's an example of adding two waves together:
void setup(){
size(600,600);
noStroke();
}
void draw(){
background(0);
// how many waves per sketch height
int heightDivisions = 30;
// split the sketch height into equal height sections
float heightDivisionSize = (float)height / heightDivisions;
// for each height division
for(int j = 0 ; j < heightDivisions; j++){
// use % 2 to alternate between black and white
// see https://processing.org/reference/modulo.html and
// https://processing.org/reference/conditional.html for more
fill(j % 2 == 0 ? color(255) : color(0));
// offset drawing on Y axis
translate(0,(j * heightDivisionSize));
// start a wave shape
beginShape();
// first vertex is at the top left corner
vertex(0,height);
// how many horizontal (per wave) divisions ?
int widthDivisions = 12;
// equally space the points on the wave horizontally
float widthDivsionSize = (float)width / widthDivisions;
// for each point on the wave
for(int i = 0; i <= widthDivisions; i++){
// calculate different phases
// play with arithmetic operators to make interesting wave additions
float phase1 = (frameCount * 0.01) + ((i * j) * 0.025);
float phase2 = (frameCount * 0.05) + ((i + j) * 0.25);
// calculate vertex x position
float x = widthDivsionSize * i;
// multiple sine waves
// (can use cos() and use other ratios too
// 150 in this case is the wave amplitude (e.g. from -150 to + 150)
float y = ((sin(phase1) * sin(phase2) * 150));
// draw calculated vertex
vertex(x,y);
}
// last vertex is at bottom right corner
vertex(width,height);
// finish the shape
endShape();
}
}
The result:
Minor note on performance: this could be implemented more efficiently using PShape, however I recommend playing with the maths/geometry to find the form you're after, then as a last step think of optimizing it.
My intention is not to show you how to create an exact replica, but to show there's more to Op Art than an effect and hopefully inspire you to explore other methods of achieving something similar in the hope that you will discover your own methods and outcomes: something new and of your own through fun happy accidents.
In terms of other techniques/avenues to explore:
displacement maps:
Using an alternating black/white straight bars texture on wavy 3D geometry
using shaders:
Shaders are a huge topic on their own, but it's worth noting:
There's a very good Processing Shader Tutorial
You might be able to explore frament shaders on shadertoy, tweak the code in browser then make slight changes so you can run them in Processing.
Here are a few quick examples:
https://www.shadertoy.com/view/Wts3DB
tweaked for black/white waves in Processing as shader-Wts3DB.frag
// https://www.shadertoy.com/view/Wts3DB
uniform vec2 iResolution;
uniform float iTime;
#define COUNT 6.
#define COL_BLACK vec3(23,32,38) / 255.0
#define SF 1./min(iResolution.x,iResolution.y)
#define SS(l,s) smoothstep(SF,-SF,l-s)
#define hue(h) clamp( abs( fract(h + vec4(3,2,1,0)/3.) * 6. - 3.) -1. , 0., 1.)
// Original noise code from https://www.shadertoy.com/view/4sc3z2
#define MOD3 vec3(.1031,.11369,.13787)
vec3 hash33(vec3 p3)
{
p3 = fract(p3 * MOD3);
p3 += dot(p3, p3.yxz+19.19);
return -1.0 + 2.0 * fract(vec3((p3.x + p3.y)*p3.z, (p3.x+p3.z)*p3.y, (p3.y+p3.z)*p3.x));
}
float simplex_noise(vec3 p)
{
const float K1 = 0.333333333;
const float K2 = 0.166666667;
vec3 i = floor(p + (p.x + p.y + p.z) * K1);
vec3 d0 = p - (i - (i.x + i.y + i.z) * K2);
vec3 e = step(vec3(0.0), d0 - d0.yzx);
vec3 i1 = e * (1.0 - e.zxy);
vec3 i2 = 1.0 - e.zxy * (1.0 - e);
vec3 d1 = d0 - (i1 - 1.0 * K2);
vec3 d2 = d0 - (i2 - 2.0 * K2);
vec3 d3 = d0 - (1.0 - 3.0 * K2);
vec4 h = max(0.6 - vec4(dot(d0, d0), dot(d1, d1), dot(d2, d2), dot(d3, d3)), 0.0);
vec4 n = h * h * h * h * vec4(dot(d0, hash33(i)), dot(d1, hash33(i + i1)), dot(d2, hash33(i + i2)), dot(d3, hash33(i + 1.0)));
return dot(vec4(31.316), n);
}
void mainImage( vec4 fragColor, vec2 fragCoord )
{
}
void main(void) {
//vec2 uv = vec2(gl_FragColor.x / iResolution.y, gl_FragColor.y / iResolution.y);
vec2 uv = gl_FragCoord.xy / iResolution.y;
float m = 0.;
float t = iTime *.5;
vec3 col;
for(float i=COUNT; i>=0.; i-=1.){
float edge = simplex_noise(vec3(uv * vec2(2., 0.) + vec2(0, t + i*.15), 3.))*.2 + (.95/COUNT)*i;
float mi = SS(edge, uv.y) - SS(edge + .095, uv.y);
m += mi;
if(mi > 0.){
col = vec3(1.0);
}
}
col = mix(COL_BLACK, col, m);
gl_FragColor = vec4(col,1.0);
// mainImage(gl_FragColor,gl_FragCoord);
}
loaded in Processing as:
PShader shader;
void setup(){
size(300,300,P2D);
noStroke();
shader = loadShader("shader-Wts3DB.frag");
shader.set("iResolution",(float)width, float(height));
}
void draw(){
background(0);
shader.set("iTime",frameCount * 0.05);
shader(shader);
rect(0,0,width,height);
}
https://www.shadertoy.com/view/MtsXzl
tweaked as shader-MtsXzl.frag
//https://www.shadertoy.com/view/MtsXzl
#define SHOW_GRID 1
const float c_scale = 0.5;
const float c_rate = 2.0;
#define FLT_MAX 3.402823466e+38
uniform vec3 iMouse;
uniform vec2 iResolution;
uniform float iTime;
//=======================================================================================
float CubicHermite (float A, float B, float C, float D, float t)
{
float t2 = t*t;
float t3 = t*t*t;
float a = -A/2.0 + (3.0*B)/2.0 - (3.0*C)/2.0 + D/2.0;
float b = A - (5.0*B)/2.0 + 2.0*C - D / 2.0;
float c = -A/2.0 + C/2.0;
float d = B;
return a*t3 + b*t2 + c*t + d;
}
//=======================================================================================
float hash(float n) {
return fract(sin(n) * 43758.5453123);
}
//=======================================================================================
float GetHeightAtTile(vec2 T)
{
float rate = hash(hash(T.x) * hash(T.y))*0.5+0.5;
return (sin(iTime*rate*c_rate) * 0.5 + 0.5) * c_scale;
}
//=======================================================================================
float HeightAtPos(vec2 P)
{
vec2 tile = floor(P);
P = fract(P);
float CP0X = CubicHermite(
GetHeightAtTile(tile + vec2(-1.0,-1.0)),
GetHeightAtTile(tile + vec2(-1.0, 0.0)),
GetHeightAtTile(tile + vec2(-1.0, 1.0)),
GetHeightAtTile(tile + vec2(-1.0, 2.0)),
P.y
);
float CP1X = CubicHermite(
GetHeightAtTile(tile + vec2( 0.0,-1.0)),
GetHeightAtTile(tile + vec2( 0.0, 0.0)),
GetHeightAtTile(tile + vec2( 0.0, 1.0)),
GetHeightAtTile(tile + vec2( 0.0, 2.0)),
P.y
);
float CP2X = CubicHermite(
GetHeightAtTile(tile + vec2( 1.0,-1.0)),
GetHeightAtTile(tile + vec2( 1.0, 0.0)),
GetHeightAtTile(tile + vec2( 1.0, 1.0)),
GetHeightAtTile(tile + vec2( 1.0, 2.0)),
P.y
);
float CP3X = CubicHermite(
GetHeightAtTile(tile + vec2( 2.0,-1.0)),
GetHeightAtTile(tile + vec2( 2.0, 0.0)),
GetHeightAtTile(tile + vec2( 2.0, 1.0)),
GetHeightAtTile(tile + vec2( 2.0, 2.0)),
P.y
);
return CubicHermite(CP0X, CP1X, CP2X, CP3X, P.x);
}
//=======================================================================================
vec3 NormalAtPos( vec2 p )
{
float eps = 0.01;
vec3 n = vec3( HeightAtPos(vec2(p.x-eps,p.y)) - HeightAtPos(vec2(p.x+eps,p.y)),
2.0*eps,
HeightAtPos(vec2(p.x,p.y-eps)) - HeightAtPos(vec2(p.x,p.y+eps)));
return normalize( n );
}
//=======================================================================================
float RayIntersectSphere (vec4 sphere, in vec3 rayPos, in vec3 rayDir)
{
//get the vector from the center of this circle to where the ray begins.
vec3 m = rayPos - sphere.xyz;
//get the dot product of the above vector and the ray's vector
float b = dot(m, rayDir);
float c = dot(m, m) - sphere.w * sphere.w;
//exit if r's origin outside s (c > 0) and r pointing away from s (b > 0)
if(c > 0.0 && b > 0.0)
return -1.0;
//calculate discriminant
float discr = b * b - c;
//a negative discriminant corresponds to ray missing sphere
if(discr < 0.0)
return -1.0;
//ray now found to intersect sphere, compute smallest t value of intersection
float collisionTime = -b - sqrt(discr);
//if t is negative, ray started inside sphere so clamp t to zero and remember that we hit from the inside
if(collisionTime < 0.0)
collisionTime = -b + sqrt(discr);
return collisionTime;
}
//=======================================================================================
vec3 DiffuseColor (in vec3 pos)
{
#if SHOW_GRID
pos = mod(floor(pos),2.0);
return vec3(mod(pos.x, 2.0) < 1.0 ? 1.0 : 0.0);
#else
return vec3(0.1, 0.8, 0.9);
#endif
}
//=======================================================================================
vec3 ShadePoint (in vec3 pos, in vec3 rayDir, float time, bool fromUnderneath)
{
vec3 diffuseColor = DiffuseColor(pos);
vec3 reverseLightDir = normalize(vec3(1.0,1.0,-1.0));
vec3 lightColor = vec3(1.0);
vec3 ambientColor = vec3(0.05);
vec3 normal = NormalAtPos(pos.xz);
normal *= fromUnderneath ? -1.0 : 1.0;
// diffuse
vec3 color = diffuseColor;
float dp = dot(normal, reverseLightDir);
if(dp > 0.0)
color += (diffuseColor * lightColor);
return color;
}
//=======================================================================================
vec3 HandleRay (in vec3 rayPos, in vec3 rayDir, in vec3 pixelColor, out float hitTime)
{
float time = 0.0;
float lastHeight = 0.0;
float lastY = 0.0;
float height;
bool hitFound = false;
hitTime = FLT_MAX;
bool fromUnderneath = false;
vec2 timeMinMax = vec2(0.0, 20.0);
time = timeMinMax.x;
const int c_numIters = 100;
float deltaT = (timeMinMax.y - timeMinMax.x) / float(c_numIters);
vec3 pos = rayPos + rayDir * time;
float firstSign = sign(pos.y - HeightAtPos(pos.xz));
for (int index = 0; index < c_numIters; ++index)
{
pos = rayPos + rayDir * time;
height = HeightAtPos(pos.xz);
if (sign(pos.y - height) * firstSign < 0.0)
{
fromUnderneath = firstSign < 0.0;
hitFound = true;
break;
}
time += deltaT;
lastHeight = height;
lastY = pos.y;
}
if (hitFound) {
time = time - deltaT + deltaT*(lastHeight-lastY)/(pos.y-lastY-height+lastHeight);
pos = rayPos + rayDir * time;
pixelColor = ShadePoint(pos, rayDir, time, fromUnderneath);
hitTime = time;
}
return pixelColor;
}
//=======================================================================================
void main()
{
// scrolling camera
vec3 cameraOffset = vec3(iTime, 0.5, iTime);
//----- camera
vec2 mouse = iMouse.xy / iResolution.xy;
vec3 cameraAt = vec3(0.5,0.5,0.5) + cameraOffset;
float angleX = iMouse.z > 0.0 ? 6.28 * mouse.x : 3.14 + iTime * 0.25;
float angleY = iMouse.z > 0.0 ? (mouse.y * 6.28) - 0.4 : 0.5;
vec3 cameraPos = (vec3(sin(angleX)*cos(angleY), sin(angleY), cos(angleX)*cos(angleY))) * 5.0;
// float angleX = 0.8;
// float angleY = 0.8;
// vec3 cameraPos = vec3(0.0,0.0,0.0);
cameraPos += vec3(0.5,0.5,0.5) + cameraOffset;
vec3 cameraFwd = normalize(cameraAt - cameraPos);
vec3 cameraLeft = normalize(cross(normalize(cameraAt - cameraPos), vec3(0.0,sign(cos(angleY)),0.0)));
vec3 cameraUp = normalize(cross(cameraLeft, cameraFwd));
float cameraViewWidth = 6.0;
float cameraViewHeight = cameraViewWidth * iResolution.y / iResolution.x;
float cameraDistance = 6.0; // intuitively backwards!
// Objects
vec2 rawPercent = (gl_FragCoord.xy / iResolution.xy);
vec2 percent = rawPercent - vec2(0.5,0.5);
vec3 rayTarget = (cameraFwd * vec3(cameraDistance,cameraDistance,cameraDistance))
- (cameraLeft * percent.x * cameraViewWidth)
+ (cameraUp * percent.y * cameraViewHeight);
vec3 rayDir = normalize(rayTarget);
float hitTime = FLT_MAX;
vec3 pixelColor = vec3(1.0, 1.0, 1.0);
pixelColor = HandleRay(cameraPos, rayDir, pixelColor, hitTime);
gl_FragColor = vec4(clamp(pixelColor,0.0,1.0), 1.0);
}
and the mouse interactive Processing sketch:
PShader shader;
void setup(){
size(300,300,P2D);
noStroke();
shader = loadShader("shader-MtsXzl.frag");
shader.set("iResolution",(float)width, float(height));
}
void draw(){
background(0);
shader.set("iTime",frameCount * 0.05);
shader.set("iMouse",(float)mouseX , (float)mouseY, mousePressed ? 1.0 : 0.0);
shader(shader);
rect(0,0,width,height);
}
Shadertoy is great way to play/learn: have fun !
Update
Here's a quick test tweaking Daniel Shiffman's 3D Terrain Generation example to add a stripped texture and basic sine waves instead of perlin noise:
// Daniel Shiffman
// http://codingtra.in
// http://patreon.com/codingtrain
// Code for: https://youtu.be/IKB1hWWedMk
int cols, rows;
int scl = 20;
int w = 2000;
int h = 1600;
float flying = 0;
float[][] terrain;
PImage texture;
void setup() {
size(600, 600, P3D);
textureMode(NORMAL);
noStroke();
cols = w / scl;
rows = h/ scl;
terrain = new float[cols][rows];
texture = getBarsTexture(512,512,96);
}
void draw() {
flying -= 0.1;
float yoff = flying;
for (int y = 0; y < rows; y++) {
float xoff = 0;
for (int x = 0; x < cols; x++) {
//terrain[x][y] = map(noise(xoff, yoff), 0, 1, -100, 100);
terrain[x][y] = map(sin(xoff) * sin(yoff), 0, 1, -60, 60);
xoff += 0.2;
}
yoff += 0.2;
}
background(0);
translate(width/2, height/2+50);
rotateX(PI/9);
translate(-w/2, -h/2);
for (int y = 0; y < rows-1; y++) {
beginShape(TRIANGLE_STRIP);
texture(texture);
for (int x = 0; x < cols; x++) {
float u0 = map(x,0,cols-1,0.0,1.0);
float u1 = map(x+1,0,cols-1,0.0,1.0);
float v0 = map(y,0,rows-1,0.0,1.0);
float v1 = map(y+1,0,rows-1,0.0,1.0);
vertex(x*scl, y*scl, terrain[x][y], u0, v0);
vertex(x*scl, (y+1)*scl, terrain[x][y+1], u1, v1);
}
endShape();
}
}
PGraphics getBarsTexture(int textureWidth, int textureHeight, int numBars){
PGraphics texture = createGraphics(textureWidth, textureHeight);
int moduleSide = textureWidth / numBars;
texture.beginDraw();
texture.background(0);
texture.noStroke();
for(int i = 0; i < numBars; i+= 2){
texture.rect(0, i * moduleSide, textureWidth, moduleSide);
}
texture.endDraw();
return texture;
}

Why is wrapping coordinates not making my simplex noise tile seamlessly?

I've been trying to create a fake 3D texture that repeats in shadertoy (see here, use wasd to move, arrow keys to rotate) But as you can see, it doesn't tile.
I generate the noise myself, and I've isolated the noise generation in this minimal example, however it does not generate seamlessly tileable noise seemingly no matter what I do.
Here is the code:
//Common, you probably won't have to look here.
vec2 modv(vec2 value, float modvalue){
return vec2(mod(value.x, modvalue),
mod(value.y, modvalue));
}
vec3 modv(vec3 value, float modvalue){
return vec3(mod(value.x, modvalue),
mod(value.y, modvalue),
mod(value.z, modvalue));
}
vec4 modv(vec4 value, float modvalue){
return vec4(mod(value.x, modvalue),
mod(value.y, modvalue),
mod(value.z, modvalue),
mod(value.w, modvalue));
}
//MATH CONSTANTS
const float pi = 3.1415926535897932384626433832795;
const float tau = 6.2831853071795864769252867665590;
const float eta = 1.5707963267948966192313216916397;
const float SQRT3 = 1.7320508075688772935274463415059;
const float SQRT2 = 1.4142135623730950488016887242096;
const float LTE1 = 0.9999999999999999999999999999999;
const float inf = uintBitsToFloat(0x7F800000u);
#define saturate(x) clamp(x,0.0,1.0)
#define norm01(x) ((x + 1.0) / 2.0)
vec2 pos3DTo2D(in vec3 pos,
const in int size_dim,
const in ivec2 z_size){
float size_dimf = float(size_dim);
pos = vec3(mod(pos.x, size_dimf), mod(pos.y, size_dimf), mod(pos.z, size_dimf));
int z_dim_x = int(pos.z) % z_size.x;
int z_dim_y = int(pos.z) / z_size.x;
float x = pos.x + float(z_dim_x * size_dim);
float y = pos.y + float(z_dim_y * size_dim);
return vec2(x,y);
}
vec4 textureAs3D(const in sampler2D iChannel,
in vec3 pos,
const in int size_dim,
const in ivec2 z_size,
const in vec3 iResolution){
//only need whole, will do another texture read to make sure interpolated?
vec2 tex_pos = pos3DTo2D(pos, size_dim, z_size)/iResolution.xy;
vec4 base_vec4 = texture(iChannel, tex_pos);
vec2 tex_pos_z1 = pos3DTo2D(pos+vec3(0.0,0.0,1.0), size_dim, z_size.xy)/iResolution.xy;
vec4 base_vec4_z1 = texture(iChannel, tex_pos_z1);
//return base_vec4;
return mix(base_vec4, base_vec4_z1, fract(pos.z));
}
vec4 textureZ3D(const in sampler2D iChannel,
in int y,
in int z,
in int offsetX,
const in int size_dim,
const in ivec2 z_size,
const in vec3 iResolution){
int tx = (z%z_size.x);
int ty = z/z_size.x;
int sx = offsetX + size_dim * tx;
int sy = y + (ty *size_dim);
if(ty < z_size.y){
return texelFetch(iChannel, ivec2(sx, sy),0);
}else{
return vec4(0.0);
}
//return texelFetch(iChannel, ivec2(x, y - (ty *32)),0);
}
//Buffer B this is what you are going to have to look at.
//noise
//NOISE CONSTANTS
// captured from https://en.wikipedia.org/wiki/SHA-2#Pseudocode
const uint CONST_A = 0xcc9e2d51u;
const uint CONST_B = 0x1b873593u;
const uint CONST_C = 0x85ebca6bu;
const uint CONST_D = 0xc2b2ae35u;
const uint CONST_E = 0xe6546b64u;
const uint CONST_F = 0x510e527fu;
const uint CONST_G = 0x923f82a4u;
const uint CONST_H = 0x14292967u;
const uint CONST_0 = 4294967291u;
const uint CONST_1 = 604807628u;
const uint CONST_2 = 2146583651u;
const uint CONST_3 = 1072842857u;
const uint CONST_4 = 1396182291u;
const uint CONST_5 = 2227730452u;
const uint CONST_6 = 3329325298u;
const uint CONST_7 = 3624381080u;
uvec3 singleHash(uvec3 uval){
uval ^= uval >> 16;
uval.x *= CONST_A;
uval.y *= CONST_B;
uval.z *= CONST_C;
return uval;
}
uint combineHash(uint seed, uvec3 uval){
// can move this out to compile time if need be.
// with out multiplying by one of the randomizing constants
// will result in not very different results from seed to seed.
uint un = seed * CONST_5;
un ^= (uval.x^uval.y)* CONST_0;
un ^= (un >> 16);
un = (un^uval.z)*CONST_1;
un ^= (un >> 16);
return un;
}
/*
//what the above hashes are based upon, seperate
//out this mumurhash based coherent noise hash
uint fullHash(uint seed, uvec3 uval){
uval ^= uval >> 16;
uval.x *= CONST_A;
uval.y *= CONST_B;
uval.z *= CONST_D;
uint un = seed * CONST_6;
un ^= (uval.x ^ uval.y) * CONST_0;
un ^= un >> 16;
un = (un^uval.z) * CONST_2;
un ^= un >> 16;
return un;
}
*/
const vec3 gradArray3d[8] = vec3[8](
vec3(1, 1, 1), vec3(1,-1, 1), vec3(-1, 1, 1), vec3(-1,-1, 1),
vec3(1, 1,-1), vec3(1,-1,-1), vec3(-1, 1,-1), vec3(-1,-1,-1)
);
vec3 getGradient3Old(uint uval){
vec3 grad = gradArray3d[uval & 7u];
return grad;
}
//source of some constants
//https://github.com/Auburns/FastNoise/blob/master/FastNoise.cpp
const float SKEW3D = 1.0 / 3.0;
const float UNSKEW3D = 1.0 / 6.0;
const float FAR_CORNER_UNSKEW3D = -1.0 + 3.0*UNSKEW3D;
const float NORMALIZE_SCALE3D = 30.0;// * SQRT3;
const float DISTCONST_3D = 0.6;
float simplexNoiseV(uint seed, in vec3 pos, in uint wrap){
pos = modv(pos, float(wrap));
float skew_factor = (pos.x + pos.y + pos.z)*SKEW3D;
vec3 fsimplex_corner0 = floor(pos + skew_factor);
ivec3 simplex_corner0 = ivec3(fsimplex_corner0);
float unskew_factor = (fsimplex_corner0.x + fsimplex_corner0.y + fsimplex_corner0.z) * UNSKEW3D;
vec3 pos0 = fsimplex_corner0 - unskew_factor;
//subpos's are positions with in grid cell.
vec3 subpos0 = pos - pos0;
//precomputed values used in determining hash, reduces redundant hash computation
//shows 10% -> 20% speed boost.
uvec3 wrapped_corner0 = uvec3(simplex_corner0);
uvec3 wrapped_corner1 = uvec3(simplex_corner0+1);
wrapped_corner0 = wrapped_corner0 % wrap;
wrapped_corner1 = wrapped_corner1 % wrap;
//uvec3 hashes_offset0 = singleHash(uvec3(simplex_corner0));
//uvec3 hashes_offset1 = singleHash(uvec3(simplex_corner0+1));
uvec3 hashes_offset0 = singleHash(wrapped_corner0);
uvec3 hashes_offset1 = singleHash(wrapped_corner1);
//near corner hash value
uint hashval0 = combineHash(seed, hashes_offset0);
//mid corner hash value
uint hashval1;
uint hashval2;
//far corner hash value
uint hashval3 = combineHash(seed, hashes_offset1);
ivec3 simplex_corner1;
ivec3 simplex_corner2;
if (subpos0.x >= subpos0.y)
{
if (subpos0.y >= subpos0.z)
{
hashval1 = combineHash(seed, uvec3(hashes_offset1.x, hashes_offset0.yz));
hashval2 = combineHash(seed, uvec3(hashes_offset1.xy, hashes_offset0.z));
simplex_corner1 = ivec3(1,0,0);
simplex_corner2 = ivec3(1,1,0);
}
else if (subpos0.x >= subpos0.z)
{
hashval1 = combineHash(seed, uvec3(hashes_offset1.x, hashes_offset0.yz));
hashval2 = combineHash(seed, uvec3(hashes_offset1.x, hashes_offset0.y, hashes_offset1.z));
simplex_corner1 = ivec3(1,0,0);
simplex_corner2 = ivec3(1,0,1);
}
else // subpos0.x < subpos0.z
{
hashval1 = combineHash(seed, uvec3(hashes_offset0.xy, hashes_offset1.z));
hashval2 = combineHash(seed, uvec3(hashes_offset1.x, hashes_offset0.y, hashes_offset1.z));
simplex_corner1 = ivec3(0,0,1);
simplex_corner2 = ivec3(1,0,1);
}
}
else // subpos0.x < subpos0.y
{
if (subpos0.y < subpos0.z)
{
hashval1 = combineHash(seed, uvec3(hashes_offset0.xy, hashes_offset1.z));
hashval2 = combineHash(seed, uvec3(hashes_offset0.x, hashes_offset1.yz));
simplex_corner1 = ivec3(0,0,1);
simplex_corner2 = ivec3(0,1,1);
}
else if (subpos0.x < subpos0.z)
{
hashval1 = combineHash(seed, uvec3(hashes_offset0.x, hashes_offset1.y, hashes_offset0.z));
hashval2 = combineHash(seed, uvec3(hashes_offset0.x, hashes_offset1.yz));
simplex_corner1 = ivec3(0,1,0);
simplex_corner2 = ivec3(0,1,1);
}
else // subpos0.x >= subpos0.z
{
hashval1 = combineHash(seed, uvec3(hashes_offset0.x, hashes_offset1.y, hashes_offset0.z));
hashval2 = combineHash(seed, uvec3(hashes_offset1.xy, hashes_offset0.z));
simplex_corner1 = ivec3(0,1,0);
simplex_corner2 = ivec3(1,1,0);
}
}
//we would do this if we didn't want to seperate the hash values.
//hashval0 = fullHash(seed, uvec3(simplex_corner0));
//hashval1 = fullHash(seed, uvec3(simplex_corner0+simplex_corner1));
//hashval2 = fullHash(seed, uvec3(simplex_corner0+simplex_corner2));
//hashval3 = fullHash(seed, uvec3(simplex_corner0+1));
vec3 subpos1 = subpos0 - vec3(simplex_corner1) + UNSKEW3D;
vec3 subpos2 = subpos0 - vec3(simplex_corner2) + 2.0*UNSKEW3D;
vec3 subpos3 = subpos0 + FAR_CORNER_UNSKEW3D;
float n0, n1, n2, n3;
//http://catlikecoding.com/unity/tutorials/simplex-noise/
//circle distance factor to make sure second derivative is continuous
// t variables represent (1 - x^2 + y^2 + ...)^3, a distance function with
// continous first and second derivatives that are zero when x is one.
float t0 = DISTCONST_3D - subpos0.x*subpos0.x - subpos0.y*subpos0.y - subpos0.z*subpos0.z;
//if t < 0, we get odd dips in continuity at the ends, so we just force it to zero
// to prevent it
if(t0 < 0.0){
n0 = 0.0;
}else{
float t0_pow2 = t0 * t0;
float t0_pow4 = t0_pow2 * t0_pow2;
vec3 grad = getGradient3Old(hashval0);
float product = dot(subpos0, grad);
n0 = t0_pow4 * product;
}
float t1 = DISTCONST_3D - subpos1.x*subpos1.x - subpos1.y*subpos1.y - subpos1.z*subpos1.z;
if(t1 < 0.0){
n1 = 0.0;
}else{
float t1_pow2 = t1 * t1;
float t1_pow4 = t1_pow2 * t1_pow2;
vec3 grad = getGradient3Old(hashval1);
float product = dot(subpos1, grad);
n1 = t1_pow4 * product;
}
float t2 = DISTCONST_3D - subpos2.x*subpos2.x - subpos2.y*subpos2.y - subpos2.z*subpos2.z;
if(t2 < 0.0){
n2 = 0.0;
}else{
float t2_pow2 = t2 * t2;
float t2_pow4 = t2_pow2*t2_pow2;
vec3 grad = getGradient3Old(hashval2);
float product = dot(subpos2, grad);
n2 = t2_pow4 * product;
}
float t3 = DISTCONST_3D - subpos3.x*subpos3.x - subpos3.y*subpos3.y - subpos3.z*subpos3.z;
if(t3 < 0.0){
n3 = 0.0;
}else{
float t3_pow2 = t3 * t3;
float t3_pow4 = t3_pow2*t3_pow2;
vec3 grad = getGradient3Old(hashval3);
float product = dot(subpos3, grad);
n3 = t3_pow4 * product;
}
return (n0 + n1 + n2 + n3);
}
//settings for fractal brownian motion noise
struct BrownianFractalSettings{
uint seed;
int octave_count;
float frequency;
float lacunarity;
float persistence;
float amplitude;
};
float accumulateSimplexNoiseV(in BrownianFractalSettings settings, vec3 pos, float wrap){
float accumulated_noise = 0.0;
wrap *= settings.frequency;
vec3 octave_pos = pos * settings.frequency;
for (int octave = 0; octave < settings.octave_count; octave++) {
octave_pos = modv(octave_pos, wrap);
float noise = simplexNoiseV(settings.seed, octave_pos, uint(wrap));
noise *= pow(settings.persistence, float(octave));
accumulated_noise += noise;
octave_pos *= settings.lacunarity;
wrap *= settings.lacunarity;
}
float scale = 2.0 - pow(settings.persistence, float(settings.octave_count - 1));
return (accumulated_noise/scale) * NORMALIZE_SCALE3D * settings.amplitude;
}
const float FREQUENCY = 1.0/8.0;
const float WRAP = 32.0;
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
//set to zero in order to stop scrolling, scrolling shows the lack of tilability between
//wrapping.
const float use_sin_debug = 1.0;
vec3 origin = vec3(norm01(sin(iTime))*64.0*use_sin_debug,0.0,0.0);
vec3 color = vec3(0.0,0.0,0.0);
BrownianFractalSettings brn_settings =
BrownianFractalSettings(203u, 1, FREQUENCY, 2.0, 0.4, 1.0);
const int size_dim = 32;
ivec2 z_size = ivec2(8, 4);
ivec2 iFragCoord = ivec2(fragCoord.x, fragCoord.y);
int z_dim_x = iFragCoord.x / size_dim;
int z_dim_y = iFragCoord.y / size_dim;
if(z_dim_x < z_size.x && z_dim_y < z_size.y){
int ix = iFragCoord.x % size_dim;
int iy = iFragCoord.y % size_dim;
int iz = (z_dim_x) + ((z_dim_y)*z_size.x);
vec3 pos = vec3(ix,iy,iz) + origin;
float value = accumulateSimplexNoiseV(brn_settings, pos, WRAP);
color = vec3(norm01(value));
}else{
color = vec3(1.0,0.0,0.0);
}
fragColor = vec4(color,1.0);
}
//Image, used to finally display
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
const float fcm = 4.0;
//grabs a single 32x32 tile in order to test tileability, currently generates
//a whole array of images however.
vec2 fragCoordMod = vec2(mod(fragCoord.x, 32.0 * fcm), mod(fragCoord.y, 32.0 * fcm));
vec3 color = texture(iChannel2, fragCoordMod/(fcm*iResolution.xy)).xyz;
fragColor = vec4(color, 1.0);
}
What I've tried position % wrap value, modifying wrap value by lacunarity, and after warp % wrap value, which are currently in use (look in simplexNoiseV for the core algorithm, accumulateSimplexNoiseV for the octave summation).
According to these answers it should be that simple (mod position used for hashing), however this clearly just doesn't work. I'm not sure if it's partially because my hashing function is not Ken Perlin's, but it doesn't seem like that should make a difference. It does seem the skewing of coordinates should make this method not work at all, but apparently others have had success with this.
Here's an example of it not tiling:
Why is wrapping coordinates not making my simplex noise tile seamlessly?
UPDATE:
I've still not fixed the issue, but it appears that tiling works appropriately along the simplicies, and not the grid seen here:
Do I have to modify my modulus to account for the skewing?

Optimize WebGL shader?

I wrote the following shader to render a pattern with a bunch of concentric circles. Eventually I want to have each rotating sphere be a light emitter to create something along these lines.
Of course right now I'm just doing the most basic part to render the different objects.
Unfortunately the shader is incredibly slow (16fps full screen on a high-end macbook). I'm pretty sure this is due to the numerous for loops and branching that I have in the shader. I'm wondering how I can pull off the geometry I'm trying to achieve in a more performance optimized way:
EDIT: you can run the shader here: https://www.shadertoy.com/view/lssyRH
One obvious optimization I am missing is that currently all the fragments are checked against the entire 24 surrounding circles. It would be pretty quick and easy to just discard these checks entirely by checking if the fragment intersects the outer bounds of the diagram. I guess I'm just trying to get a handle on how the best practice is of doing something like this.
#define N 10
#define M 5
#define K 24
#define M_PI 3.1415926535897932384626433832795
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
float aspectRatio = iResolution.x / iResolution.y;
float h = 1.0;
float w = aspectRatio;
vec2 uv = vec2(fragCoord.x / iResolution.x * aspectRatio, fragCoord.y / iResolution.y);
float radius = 0.01;
float orbitR = 0.02;
float orbiterRadius = 0.005;
float centerRadius = 0.002;
float encloseR = 2.0 * orbitR;
float encloserRadius = 0.002;
float spacingX = (w / (float(N) + 1.0));
float spacingY = h / (float(M) + 1.0);
float x = 0.0;
float y = 0.0;
vec4 totalLight = vec4(0.0, 0.0, 0.0, 1.0);
for (int i = 0; i < N; i++) {
for (int j = 0; j < M; j++) {
// compute the center of the diagram
vec2 center = vec2(spacingX * (float(i) + 1.0), spacingY * (float(j) + 1.0));
x = center.x + orbitR * cos(iGlobalTime);
y = center.y + orbitR * sin(iGlobalTime);
vec2 bulb = vec2(x,y);
if (length(uv - center) < centerRadius) {
// frag intersects white center marker
fragColor = vec4(1.0);
return;
} else if (length(uv - bulb) < radius) {
// intersects rotating "light"
fragColor = vec4(uv,0.5+0.5*sin(iGlobalTime),1.0);
return;
} else {
// intersects one of the enclosing 24 cylinders
for(int k = 0; k < K; k++) {
float theta = M_PI * 2.0 * float(k)/ float(K);
x = center.x + cos(theta) * encloseR;
y = center.y + sin(theta) * encloseR;
vec2 encloser = vec2(x,y);
if (length(uv - encloser) < encloserRadius) {
fragColor = vec4(uv,0.5+0.5*sin(iGlobalTime),1.0);
return;
}
}
}
}
}
}

Keeping in mind that you want to optimize the fragment shader, and only the fragment shader:
Move the sin(iGlobalTime) and cos(iGlobalTime) out of the loops, these remain static over the whole draw call so no need to recalculate them every loop iteration.
GPUs employ vectorized instruction sets (SIMD) where possible, take advantage of that. You're wasting lots of cycles by doing multiple scalar ops where you could use a single vector instruction(see annotated code)
[Three years wiser me here: I'm not really sure if this statement is true in regards to how modern GPUs process the instructions, however it certainly does help readability and maybe even give a hint or two to the compiler]
Do your radius checks squared, save that sqrt(length) for when you really need it
Replace float casts of constants(your loop limits) with a float constant(intelligent shader compilers will already do this, not something to count on though)
Don't have undefined behavior in your shader(not writing to gl_FragColor)
Here is an optimized and annotated version of your shader(still containing that undefined behavior, just like the one you provided). Annotation is in the form of:
// annotation
// old code, if any
new code
#define N 10
// define float constant N
#define fN 10.
#define M 5
// define float constant M
#define fM 5.
#define K 24
// define float constant K
#define fK 24.
#define M_PI 3.1415926535897932384626433832795
// predefine 2 times PI
#define M_PI2 6.28318531
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
float aspectRatio = iResolution.x / iResolution.y;
// we dont need these separate
// float h = 1.0;
// float w = aspectRatio;
// use vector ops(2 divs 1 mul => 1 div 1 mul)
// vec2 uv = vec2(fragCoord.x / iResolution.x * aspectRatio, fragCoord.y / iResolution.y);
vec2 uv = fragCoord.xy / iResolution.xy;
uv.x *= aspectRatio;
// most of the following declarations should be predefined or marked as "const"...
float radius = 0.01;
// precalc squared radius
float radius2 = radius*radius;
float orbitR = 0.02;
float orbiterRadius = 0.005;
float centerRadius = 0.002;
// precalc squared center radius
float centerRadius2 = centerRadius * centerRadius;
float encloseR = 2.0 * orbitR;
float encloserRadius = 0.002;
// precalc squared encloser radius
float encloserRadius2 = encloserRadius * encloserRadius;
// Use float constants and vector ops here(2 casts 2 adds 2 divs => 1 add 1 div)
// float spacingX = w / (float(N) + 1.0);
// float spacingY = h / (float(M) + 1.0);
vec2 spacing = vec2(aspectRatio, 1.0) / (vec2(fN, fM)+1.);
// calc sin and cos of global time
// saves N*M(sin,cos,2 muls)
vec2 stct = vec2(sin(iGlobalTime), cos(iGlobalTime));
vec2 orbit = orbitR * stct;
// not needed anymore
// float x = 0.0;
// float y = 0.0;
// was never used
// vec4 totalLight = vec4(0.0, 0.0, 0.0, 1.0);
for (int i = 0; i < N; i++) {
for (int j = 0; j < M; j++) {
// compute the center of the diagram
// Use vector ops
// vec2 center = vec2(spacingX * (float(i) + 1.0), spacingY * (float(j) + 1.0));
vec2 center = spacing * (vec2(i,j)+1.0);
// Again use vector opts, use precalced time trig(orbit = orbitR * stct)
// x = center.x + orbitR * cos(iGlobalTime);
// y = center.y + orbitR * sin(iGlobalTime);
// vec2 bulb = vec2(x,y);
vec2 bulb = center + orbit;
// calculate offsets
vec2 centerOffset = uv - center;
vec2 bulbOffset = uv - bulb;
// use squared length check
// if (length(uv - center) < centerRadius) {
if (dot(centerOffset, centerOffset) < centerRadius2) {
// frag intersects white center marker
fragColor = vec4(1.0);
return;
// use squared length check
// } else if (length(uv - bulb) < radius) {
} else if (dot(bulbOffset, bulbOffset) < radius2) {
// Use precalced sin global time in stct.x
// intersects rotating "light"
fragColor = vec4(uv,0.5+0.5*stct.x,1.0);
return;
} else {
// intersects one of the enclosing 24 cylinders
for(int k = 0; k < K; k++) {
// use predefined 2*PI and float K
float theta = M_PI2 * float(k) / fK;
// Use vector ops(2 muls 2 adds => 1 mul 1 add)
// x = center.x + cos(theta) * encloseR;
// y = center.y + sin(theta) * encloseR;
// vec2 encloser = vec2(x,y);
vec2 encloseOffset = uv - (center + vec2(cos(theta),sin(theta)) * encloseR);
if (dot(encloseOffset,encloseOffset) < encloserRadius2) {
fragColor = vec4(uv,0.5+0.5*stct.x,1.0);
return;
}
}
}
}
}
}

I did a little more thinking ... I realized the best way to optimize it is to actually change the logic so that before doing intersection tests on the small circles it checks the bounds of the group of circles. This got it to run at 60fps:
Example here:
https://www.shadertoy.com/view/lssyRH

3D texture in WebGL/three.js using 2D texture workaround?

I would like to use some 3D textures for objects that I'm rendering in WebGL. I'm currently using the following method in a fragment shader, as suggested on WebGL and OpenGL Differences:
// tex is a texture with each slice of the cube placed horizontally across the texture.
// texCoord is a 3d texture coord
// size is the size if the cube in pixels.
vec4 sampleAs3DTexture(sampler2D tex, vec3 texCoord, float size) {
float sliceSize = 1.0 / size; // space of 1 slice
float slicePixelSize = sliceSize / size; // space of 1 pixel
float sliceInnerSize = slicePixelSize * (size - 1.0); // space of size pixels
float zSlice0 = min(floor(texCoord.z * size), size - 1.0);
float zSlice1 = min(zSlice0 + 1.0, size - 1.0);
float xOffset = slicePixelSize * 0.5 + texCoord.x * sliceInnerSize;
float s0 = xOffset + (zSlice0 * sliceSize);
float s1 = xOffset + (zSlice1 * sliceSize);
vec4 slice0Color = texture2D(tex, vec2(s0, texCoord.y));
vec4 slice1Color = texture2D(tex, vec2(s1, texCoord.y));
float zOffset = mod(texCoord.z * size, 1.0);
return mix(slice0Color, slice1Color, zOffset);
}
The problem is that the largest 3D texture I can use is 64x64x64 (since the maximum 2D texture width is 4096 = 64*64). I would like to try to use larger textures if possible, so I would like to see if anyone has suggestions for using higher resolution 3D textures with a similar workaround. Presumably, I should be able to organize the 2D texture such that I have the 3D slices arranged horizontally AND vertically, but my google-fu has not been able to find a workable solution so far.

Seems relatively straight forward. If you want to go down the image as well you'll have to compute a v texture coordinate that selects the right row for your slice. To do that you'll need to know how many rows there are in the texture and how many slices per row
// tex is a texture with each slice of the cube placed in grid in a texture.
// texCoord is a 3d texture coord
// size is the size if the cube in pixels.
// slicesPerRow is how many slices there are across the texture
// numRows is the number of rows of slices
vec2 computeSliceOffset(float slice, float slicesPerRow, vec2 sliceSize) {
return sliceSize * vec2(mod(slice, slicesPerRow),
floor(slice / slicesPerRow));
}
vec4 sampleAs3DTexture(
sampler2D tex, vec3 texCoord, float size, float numRows, float slicesPerRow) {
float slice = texCoord.z * size;
float sliceZ = floor(slice); // slice we need
float zOffset = fract(slice); // dist between slices
vec2 sliceSize = vec2(1.0 / slicesPerRow, // u space of 1 slice
1.0 / numRows); // v space of 1 slice
vec2 slice0Offset = computeSliceOffset(sliceZ, slicesPerRow, sliceSize);
vec2 slice1Offset = computeSliceOffset(sliceZ + 1.0, slicesPerRow, sliceSize);
vec2 slicePixelSize = sliceSize / size; // space of 1 pixel
vec2 sliceInnerSize = slicePixelSize * (size - 1.0); // space of size pixels
vec2 uv = slicePixelSize * 0.5 + texCoord.xy * sliceInnerSize;
vec4 slice0Color = texture2D(tex, slice0Offset + uv);
vec4 slice1Color = texture2D(tex, slice1Offset + uv);
return mix(slice0Color, slice1Color, zOffset);
return slice0Color;
}
Here's a snippet
var canvas = document.getElementById("c");
var gl = canvas.getContext("webgl");
var program = twgl.createProgramFromScripts(
gl, ["vshader", "fshader"], ["a_position"]);
gl.useProgram(program);
var sizeLoc = gl.getUniformLocation(program, "u_size");
var numRowsLoc = gl.getUniformLocation(program, "u_numRows");
var slicesPerRowLoc = gl.getUniformLocation(program, "u_slicesPerRow");
// make sphere triangles
var numDivisionsAround = 32;
var numDivisionsDown = 16;
var verts = [];
for (var v = 0; v < numDivisionsDown; ++v) {
var v0 = Math.sin((v + 0) / numDivisionsDown * Math.PI);
var v1 = Math.sin((v + 1) / numDivisionsDown * Math.PI);
var y0 = Math.cos((v + 0) / numDivisionsDown * Math.PI);
var y1 = Math.cos((v + 1) / numDivisionsDown * Math.PI);
for (var h = 0; h < numDivisionsAround; ++h) {
var a0 = (h + 0) * Math.PI * 2 / numDivisionsAround;
var a1 = (h + 1) * Math.PI * 2 / numDivisionsAround;
var x00 = Math.sin(a0) * v0;
var x10 = Math.sin(a1) * v0;
var x01 = Math.sin(a0) * v1;
var x11 = Math.sin(a1) * v1;
var z00 = Math.cos(a0) * v0;
var z10 = Math.cos(a1) * v0;
var z01 = Math.cos(a0) * v1;
var z11 = Math.cos(a1) * v1;
verts.push(
x00, y0, z00,
x10, y0, z10,
x01, y1, z01,
x01, y1, z01,
x10, y0, z10,
x11, y1, z11);
}
}
var vertBuffer = gl.createBuffer();
gl.bindBuffer(gl.ARRAY_BUFFER, vertBuffer);
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array(verts), gl.STATIC_DRAW);
gl.enableVertexAttribArray(0);
gl.vertexAttribPointer(0, 3, gl.FLOAT, false, 0, 0);
// Make 3D texture
var size = 8;
var slicesPerRow = 4;
var numRows = Math.floor((size + slicesPerRow - 1) / slicesPerRow);
var pixels = new Uint8Array(size * slicesPerRow * size * numRows * 4);
var pixelsAcross = slicesPerRow * size;
for (var slice = 0; slice < size; ++slice) {
var row = Math.floor(slice / slicesPerRow);
var xOff = slice % slicesPerRow * size;
var yOff = row * size;
for (var y = 0; y < size; ++y) {
for (var x = 0; x < size; ++x) {
var offset = ((yOff + y) * pixelsAcross + xOff + x) * 4;
pixels[offset + 0] = x / size * 255;
pixels[offset + 1] = y / size * 255;
pixels[offset + 2] = slice / size * 255;
pixels[offset + 3] = 255;
}
}
}
// put this in a 2d canvas for debugging
var c = document.createElement("canvas");
c.width = size * slicesPerRow;
c.height = size * numRows;
document.body.appendChild(c);
var ctx = c.getContext("2d");
var id = ctx.getImageData(0, 0, c.width, c.height);
var numBytes = c.width * c.height * 4;
for (var ii = 0; ii < numBytes; ++ii) {
id.data[ii] = pixels[ii];
}
ctx.putImageData(id, 0, 0);
var tex = gl.createTexture();
gl.bindTexture(gl.TEXTURE_2D, tex);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, size * slicesPerRow, numRows * size, 0,
gl.RGBA, gl.UNSIGNED_BYTE, pixels);
var log = console.log.bind(console);
log("size : " + size);
log("numRows : " + numRows);
log("slicesPerRow: " + slicesPerRow);
gl.uniform1f(sizeLoc, size);
gl.uniform1f(numRowsLoc, numRows);
gl.uniform1f(slicesPerRowLoc, slicesPerRow);
// draw circle
gl.enable(gl.DEPTH_TEST);
gl.drawArrays(gl.TRIANGLES, 0, verts.length / 3);
canvas {
border: 1px solid black;
margin: 2px;
}
<script src="https://twgljs.org/dist/3.x/twgl.min.js"></script>
<script id="vshader" type="whatever">
attribute vec4 a_position;
varying vec3 v_texcoord;
void main() {
gl_Position = a_position;
v_texcoord = a_position.xyz * 0.5 + 0.5;
}
</script>
<script id="fshader" type="whatever">
precision mediump float;
// tex is a texture with each slice of the cube placed in grid in a texture.
// texCoord is a 3d texture coord
// size is the size if the cube in pixels.
// slicesPerRow is how many slices there are across the texture
// numRows is the number of rows of slices
vec2 computeSliceOffset(float slice, float slicesPerRow, vec2 sliceSize) {
return sliceSize * vec2(mod(slice, slicesPerRow),
floor(slice / slicesPerRow));
}
vec4 sampleAs3DTexture(
sampler2D tex, vec3 texCoord, float size, float numRows, float slicesPerRow) {
float slice = texCoord.z * size;
float sliceZ = floor(slice); // slice we need
float zOffset = fract(slice); // dist between slices
vec2 sliceSize = vec2(1.0 / slicesPerRow, // u space of 1 slice
1.0 / numRows); // v space of 1 slice
vec2 slice0Offset = computeSliceOffset(sliceZ, slicesPerRow, sliceSize);
vec2 slice1Offset = computeSliceOffset(sliceZ + 1.0, slicesPerRow, sliceSize);
vec2 slicePixelSize = sliceSize / size; // space of 1 pixel
vec2 sliceInnerSize = slicePixelSize * (size - 1.0); // space of size pixels
vec2 uv = slicePixelSize * 0.5 + texCoord.xy * sliceInnerSize;
vec4 slice0Color = texture2D(tex, slice0Offset + uv);
vec4 slice1Color = texture2D(tex, slice1Offset + uv);
return mix(slice0Color, slice1Color, zOffset);
return slice0Color;
}
varying vec3 v_texcoord;
uniform float u_size;
uniform float u_numRows;
uniform float u_slicesPerRow;
uniform sampler2D u_texture;
void main() {
gl_FragColor = sampleAs3DTexture(
u_texture, v_texcoord, u_size, u_numRows, u_slicesPerRow);
}
</script>
<canvas id="c" width="400" height="400"></canvas>

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

WEBGL compilation error on windows (ANGLE d3d9): indefinite logarithm - windows

int issigned = int(pow(-1., float(bytemeR[0]))); is invalid and should be replaced by: int issigned = 1 - 2 * bytemeR[0];

Related

Why is matrix multiplication row x row 4-5 times slower than row x column on Mali's GPU？

What is this called and how to achieve! Visuals in processing

Why is wrapping coordinates not making my simplex noise tile seamlessly?

Optimize WebGL shader?

3D texture in WebGL/three.js using 2D texture workaround?

Categories

Resources