Warning X3550: array reference cannot be used as an l-value - opengl-es

I'm trying to make my shader work in Shaderfrog, I import my glsl sandbox version via URL.
I have the error: "C:\fakepath(111,2-27): warning X3550: array reference cannot be used as an l-value; not natively addressable, forcing loop to unroll.
Really not sure why this error is occurring, I've tried un-nesting any loops within loops but to no avail.
Here's my fragment code:
#extension GL_OES_standard_derivatives : enable
precision highp float;
varying vec2 vUv;
uniform float time;
uniform vec2 resolution;
void main()
{
vec2 p = vUv.xy / resolution.x * .05;
vec3 col;
for (float j = 0.; j < 3.; j++) {
p.x += ((0.05 / 2.0) * sin(2.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 2.0)) * 2.0)));
p.y += ((0.02 / 2.0)* cos(2.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 2.0)) * 2.0)));
p.x += ((0.05 / 3.0) * sin(3.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 3.0)) * 3.0)));
p.y += ((0.02 / 3.0)* cos(3.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 3.0)) * 3.0)));
p.x += ((0.05 / 4.0) * sin(4.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 4.0)) * 4.0)));
p.y += ((0.02 / 4.0)* cos(4.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 4.0)) * 4.0)));
p.x += ((0.05 / 5.0) * sin(5.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 5.0)) * 5.0)));
p.y += ((0.02 / 5.0)* cos(5.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 5.0)) * 5.0)));
p.x += ((0.05 / 6.0) * sin(6.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 6.0)) * 6.0)));
p.y += ((0.02 / 6.0)* cos(6.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 6.0)) * 6.0)));
p.x += ((0.05 / 7.0) * sin(7.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 7.0)) * 7.0)));
p.y += ((0.02 / 7.0) * cos(7.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 7.0)) * 7.0)));
p.x += ((0.05 / 8.0) * sin(8.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 8.0)) * 8.0)));
p.y += ((0.02 / 8.0)* cos(8.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 8.0)) * 8.0)));
p.x += ((0.05 / 9.0) * sin(9.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 9.0)) * 9.0)));
p.y += ((0.02 / 9.0)* cos(9.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 9.0)) * 9.0)));
p.x += ((0.05 / 10.0) * sin(10.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 10.0)) * 10.0)));
p.y += ((0.02 / 10.0)* cos(10.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 10.0)) * 10.0)));
p.x += ((0.05 / 11.0) * sin(11.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 11.0)) * 11.0)));
p.y += ((0.02 / 11.0)* cos(11.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 11.0)) * 11.0)));
p.x += ((0.05 / 12.0) * sin(12.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 12.0)) * 12.0)));
p.y += ((0.02 / 12.0)* cos(12.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 12.0)) * 12.0)));
p.x += ((0.05 / 13.0) * sin(13.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 13.0)) * 13.0)));
p.y += ((0.02 / 13.0)* cos(13.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 13.0)) * 13.0)));
p.x += ((0.05 / 14.0) * sin(14.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 14.0)) * 14.0)));
p.y += ((0.02 / 14.0)* cos(14.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 14.0)) * 14.0)));
p.x += ((0.05 / 15.0) * sin(15.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 15.0)) * 15.0)));
p.y += ((0.02 / 15.0)* cos(15.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 15.0)) * 15.0)));
p.x += ((0.05 / 16.0) * sin(16.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 16.0)) * 16.0)));
p.y += ((0.02 / 16.0)* cos(16.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 16.0)) * 16.0)));
p.x += ((0.05 / 17.0) * sin(17.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 17.0)) * 17.0)));
p.y += ((0.02 / 17.0)* cos(17.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 17.0)) * 17.0)));
p.x += ((0.05 / 18.0) * sin(18.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 18.0)) * 18.0)));
p.y += ((0.02 / 18.0)* cos(18.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 18.0)) * 18.0)));
p.x += ((0.05 / 19.0) * sin(19.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 19.0)) * 19.0)));
p.y += ((0.02 / 19.0)* cos(19.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 19.0)) * 19.0)));
p.x += ((0.05 / 20.0) * sin(20.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 20.0)) * 20.0)));
p.y += ((0.02 / 20.0)* cos(20.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 20.0)) * 20.0)));
p.x += ((0.05 / 21.0) * sin(21.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 21.0)) * 21.0)));
p.y += ((0.02 / 21.0)* cos(21.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 21.0)) * 21.0)));
p.x += ((0.05 / 22.0) * sin(22.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 22.0)) * 22.0)));
p.y += ((0.02 / 22.0)* cos(22.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 22.0)) * 22.0)));
p.x += ((0.05 / 23.0) * sin(23.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 23.0)) * 23.0)));
p.y += ((0.02 / 23.0)* cos(23.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 23.0)) * 23.0)));
p.x += ((0.05 / 24.0) * sin(24.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 24.0)) * 24.0)));
p.y += ((0.02 / 24.0)* cos(24.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 24.0)) * 24.0)));
p.x += ((0.05 / 25.0) * sin(25.0 * 10. * p.y + (time*0.2) + cos((time / (150. * 25.0)) * 25.0)));
p.y += ((0.02 / 25.0)* cos(25.0 * 10. * p.x + (time*0.15) + sin((time / (100. * 25.0)) * 25.0)));
col[int(j)] = 20.*(abs(p.x + p.y));
}
gl_FragColor = vec4(col, 1.);
}
https://shaderfrog.com/app/editor
http://glslsandbox.com/e#59252.0

To understand the error
array reference cannot be used as an l-value
see OpenGL ES Shading Language 1.00 Specification - 5.5 Vector Components
[...]
To form an l-value, swizzling must be applied to an l-value of vector type [...]
and further OpenGL ES Shading Language 1.00 Specification - 5.8 Assignments
[...] Array variables are l-values and may be passed to parameters declared as out or inout. However, they may not be used as the target of an assignment. [...]
That means it is possible to read form col[i], but it is not possible to write to it by an assignment.
You have to assign the components of the vector in a selection:
vec3 col;
for (int j = 0; j < 3; j++) {
// [...]
float c = 20.*(abs(p.x + p.y));
if (j==0)
col.x = c;
else if (j==1)
col.y = c;
else
col.z = c;
}

Related

pgraphics element won't center on mobile devices | drawingContext.drawImage causing mobile offset

I’m working on some of Tim Rodenbrökers code involving a copy() function (https://timrodenbroeker.de/processing-tutorial-kinetic-typography-1/), expanding it and making it ready for web.
This involves replacing the copy() function with drawingContext.drawImage() for performance increase (found here: https://discourse.processing.org/t/p5-js-copy-function-slow-performance-since-version-0-10-0/30007).
Doing this works great for desktop; on mobile, however, the pgraphics element (centered on the canvas, usually), moves position.
Using the regular copy() function centeres it correctly.
The positioning varies according to mobile screen size, I can’t seem to figure out the exact behavior to fix. It's not the font size, I've tried adapting the position to screen.size and document.documentElement.clientWidth, no luck.
let font;
let pg;
function setup() {
font = loadFont("./assets/FGrotesk-Regular.otf");
createCanvas(innerWidth, innerHeight);
pg = createGraphics(innerWidth, innerHeight, P2D);
frameRate(60);
pixelDensity(1);
}
function draw() {
background(0);
pg.background(0);
pg.fill(255);
pg.textFont(font);
pg.textSize(380);
pg.push();
pg.translate(innerWidth / 2, innerHeight / 2);
pg.textAlign(CENTER, CENTER);
pg.text("Enrico", 0, -50);
pg.text("Gisana", 0, 50);
pg.pop();
let tilesX = 400;
let tilesY = 20;
let tileW = int(width / tilesX);
let tileH = int(height / tilesY);
for (y = 0; y < tilesY; y++) {
for (x = 0; x < tilesX; x++) {
// WARP
let wave_x = int(sin(frameCount * 0.02 + (x * y) * 0.07) * 100) - (mouseY / 2);
let wave_y = int(sin(frameCount * 0.02 + (x * y) * 0.07) * 100) - (mouseY / 2);
if (mouseX - (width / 2) >= 0) {
wave_x = int(sin(frameCount * 0.02 + ((x / 0.8) * (y/0.2)) * 0.04) * (-1 * (mouseX - (width / 2)) / 30));
} else {
wave_x = int(sin(frameCount * 0.02 + ((x / 0.8) * (y/0.2)) * 0.04) * (-1 * (mouseX - (width / 2)) / 30));
}
if (mouseY - (height / 2) >= 0) {
wave_y = int(sin(frameCount * 0.02 + ((x / 0.2) * (y/0.8)) * 0.04) * ((mouseY - (height / 2)) / 30));
} else {
wave_y = int(sin(frameCount * 0.02 + ((x / 0.2) * (y/0.8)) * 0.04) * ((mouseY - (height / 2)) / 30));
}
// SOURCE
let sx = x * tileW + wave_x;
// + wave should be added here
let sy = y * tileH - wave_y;
let sw = tileW;
let sh = tileH;
// DESTINATION
let dx = x * tileW;
let dy = y * tileH;
let dw = tileW;
let dh = tileH;
drawingContext.drawImage(pg.elt, sx, sy, sw, sh, dx, dy, dw, dh);
}
}
}

Why is matrix multiplication row x row 4-5 times slower than row x column on Mali's GPU?

Recently, I encountered a problem when using computer shader to develop matrix multiplication. A common matrix multiplication C = AB. in order to make the memory continuous, I transposed the B matrix. I think this can speed up the running speed. However, when I measured the speed, I found that the form of line X was several times slower than that of line X. I explored it for a long time and couldn't understand it, so I wrote down the problem for help!!!
My environment Mali G77 (MediaTek Dimensity 1200)
A matrix dimension: 4x2048x2048
B matrix dimension: 4x2048x2048
Time comparison:
Row x row: About 9s
Row x column: about 1.6s
Column x column: about 3.3s
question demo:https://github.com/yikox/ProfilerDemo
shader code:
//computer shader
#version 310 es
#define XLOCAL 8
#define YLOCAL 8
#define ZLOCAL 1
layout(binding = 0) writeonly buffer soutput{
vec4 data[];
} uOutput;
layout(binding = 1) readonly buffer sinput0{
vec4 data[];
} uInput0;
layout(binding = 2) readonly buffer sinput1{
vec4 data[];
} uInput1;
layout(location=3) uniform ivec4 uInputSize0;
layout(location=4) uniform ivec4 uInputSize1;
layout(location=5) uniform ivec4 uOutputSize;
layout (local_size_x = XLOCAL, local_size_y = YLOCAL, local_size_z = ZLOCAL) in;
//矩阵A和矩阵B相乘的某一列的第I个元素
vec4 PixelMul(int i, ivec3 pos)
{
// 行x行
// vec4 data0 = uInput0.data[i + pos.y * uInputSize0.x + pos.z * uInputSize0.x * uInputSize0.y];
// vec4 data1 = uInput1.data[i + pos.x * uInputSize1.y + pos.z * uInputSize1.x * uInputSize1.y];
// 行x列
// vec4 data0 = uInput0.data[i + pos.y * uInputSize0.x + pos.z * uInputSize0.x * uInputSize0.y];
// vec4 data1 = uInput1.data[pos.x + i * uInputSize1.y + pos.z * uInputSize1.x * uInputSize1.y];
// 列x列
vec4 data0 = uInput0.data[pos.y + i * uInputSize0.x + pos.z * uInputSize0.x * uInputSize0.y];
vec4 data1 = uInput1.data[pos.x + i * uInputSize1.y + pos.z * uInputSize1.x * uInputSize1.y];
return data0 * data1;
}
void main()
{
ivec3 pos = ivec3(gl_GlobalInvocationID) * ivec3(2, 2, 1);
if(all(lessThan(pos, uOutputSize.xyz)))
{
vec4 outData00 = vec4(0);
vec4 outData01 = vec4(0);
vec4 outData10 = vec4(0);
vec4 outData11 = vec4(0);
for(int i = 0; i < uInputSize0.x; i++)
{
outData00 += PixelMul(i, pos + ivec3(0, 0, 0));
outData01 += PixelMul(i, pos + ivec3(1, 0, 0));
outData10 += PixelMul(i, pos + ivec3(0, 1, 0));
outData11 += PixelMul(i, pos + ivec3(1, 1, 0));
}
uOutput.data[pos.x + 0 + (pos.y + 0) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData00;
uOutput.data[pos.x + 1 + (pos.y + 0) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData01;
uOutput.data[pos.x + 0 + (pos.y + 1) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData10;
uOutput.data[pos.x + 1 + (pos.y + 1) * uOutputSize.x + pos.z * uOutputSize.x * uOutputSize.y] = outData11;
}
}

How does this 2d noise generation function work? Does it have a name?

I came across this 2D noise function in the Book of Shaders
float noise(vec2 st) {
vec2 integerPart = floor(st);
vec2 fractionalPart = fract(st);
float s00 = random(integerPart);
float s01 = random(integerPart + vec2(0.0, 1.0));
float s10 = random(integerPart + vec2(1.0, 0.0));
float s11 = random(integerPart + vec2(1.0, 1.0));
float dx1 = s10 - s00;
float dx2 = s11 - s01;
float dy1 = s01 - s00;
float dy2 = s11 - s10;
float alpha = smoothstep(0.0, 1.0, fractionalPart.x);
float beta = smoothstep(0.0, 1.0, fractionalPart.y);
return s00 + alpha * dx1 + (1 - alpha) * beta * dy1 + alpha * beta * dy2;
}
It is clear what this function does: it generates four random numbers at the vertices of a square, then interpolates them. What I am finding difficult is understanding why the interpolation (the s00 + alpha * dx1 + (1 - alpha) * beta * dy1 + alpha * beta * dy2 expression) works. How is it interpolating the four values when it does not seem to be symmetric in the x and y values?
If you expand the last line, it's:
return s00 * (1-alpha) * (1-beta) +
s10 * alpha * (1-beta) +
s01 * (1-alpha) * beta +
s11 * alpha * beta;
Which is symmetric in x and y. If you add up the weights:
alpha * beta + (1-alpha) * beta + alpha * (1-beta) + (1-alpha) * (1-beta)
= (alpha + 1-alpha) * beta + (alpha + 1-alpha) * (1-beta)
= beta + 1-beta
= 1
so it's an affine combination of the values at the corners

Texture coordinates for triangle fans

I am trying to "fill" a surface of some geometry that I drew. I am using GL_TRIANGLE_FAN primitive. (for example : 1 hub (center) point and 12 other points). I have calculated texture coordinates for each vertex in the interval 0-1. But as a result I get this , its a little bit confused. I desire to get result like that image . Please help, what is wrong here?
How can I calculate correct texture coordinates in such Triangulation ( GL_TRIANGLE_FAN )
in the image red dots is my points
*Code - Snippet :
assert(("CROSS type intersection needs only 5 vertices : center point and "
"rest points in anticlockwise order", (lp->size() > 5) && (lp->size() < 5)));
osg::Vec3 vAlong_1,vAlong_2;
vAlong_1 = (*lp)[1] - (*lp)[4];
vAlong_2 = (*lp)[1] - (*lp)[2];
eps = ((*lp)[2] - (*lp)[4]).length() * 0.2 / 2;
vAlong_1.normalize();
vAlong_2.normalize();
_edgeCoords->push_back((*lp)[0]);
_edgeCoords->push_back((*lp)[1]);
if (CMF::euclidDistance((*lp)[0],(*lp)[1]) <= CMF::euclidDistance((*lp)[0],(*lp)[2])) {
float cosAlpha = -(vAlong_1 * vAlong_2);
float extraLength = ((*lp)[2] - (*lp)[1]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[1] + vAlong_1 * (eps + extraLength));
_edgeCoords->push_back((*lp)[2] + vAlong_1 * eps);
} else {
float cosAlpha = (vAlong_1 * vAlong_2);
float extraLength = ((*lp)[2] - (*lp)[1]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[1] + vAlong_1 * eps);
_edgeCoords->push_back((*lp)[2] + vAlong_1 * (eps + extraLength));
}
_edgeCoords->push_back((*lp)[2]);
if (CMF::euclidDistance((*lp)[0],(*lp)[2]) <= CMF::euclidDistance((*lp)[0],(*lp)[3])) {
float cosAlpha = -(vAlong_1 * vAlong_2);
float extraLength = ((*lp)[3] - (*lp)[2]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[2] - vAlong_2 * (eps + extraLength));
_edgeCoords->push_back((*lp)[3] - vAlong_2 * eps);
} else {
float cosAlpha = (vAlong_1 * vAlong_2);
float extraLength = ((*lp)[3] - (*lp)[2]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[2] - vAlong_2 * eps);
_edgeCoords->push_back((*lp)[3] - vAlong_2 * (eps + extraLength));
}
_edgeCoords->push_back((*lp)[3]);
if (CMF::euclidDistance((*lp)[0],(*lp)[3]) <= CMF::euclidDistance((*lp)[0],(*lp)[4])) {
float cosAlpha = -(vAlong_1 * vAlong_2);
float extraLength = ((*lp)[4] - (*lp)[3]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[3] - vAlong_1 * (eps + extraLength));
_edgeCoords->push_back((*lp)[4] - vAlong_1 * eps);
} else {
float cosAlpha = (vAlong_1 * vAlong_2);
float extraLength = ((*lp)[4] - (*lp)[3]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[3] - vAlong_1 * eps);
_edgeCoords->push_back((*lp)[4] - vAlong_1 * (eps + extraLength));
}
_edgeCoords->push_back((*lp)[4]);
if (CMF::euclidDistance((*lp)[0],(*lp)[1]) <= CMF::euclidDistance((*lp)[0],(*lp)[4])) {
float cosAlpha = -(vAlong_1 * vAlong_2);
float extraLength = ((*lp)[4] - (*lp)[1]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[4] + vAlong_2 * eps);
_edgeCoords->push_back((*lp)[1] + vAlong_2 * (eps + extraLength));
} else {
float cosAlpha = (vAlong_1 * vAlong_2);
float extraLength = ((*lp)[4] - (*lp)[1]).length() * cosAlpha;
_edgeCoords->push_back((*lp)[4] + vAlong_2 * (eps + extraLength));
_edgeCoords->push_back((*lp)[1] + vAlong_2 * eps);
}
_edgeCoords->push_back((*lp)[1]);
_tCoords->push_back(osg::Vec2(0.5,0.5));
_tCoords->push_back(osg::Vec2(0.666,0.666));
_tCoords->push_back(osg::Vec2(0.666,1.0));
_tCoords->push_back(osg::Vec2(0.333,1.0));
_tCoords->push_back(osg::Vec2(0.333,0.666));
_tCoords->push_back(osg::Vec2(0.0,0.666));
_tCoords->push_back(osg::Vec2(0.0,0.333));
_tCoords->push_back(osg::Vec2(0.333,0.333));
_tCoords->push_back(osg::Vec2(0.333,0.0));
_tCoords->push_back(osg::Vec2(0.666,0.0));
_tCoords->push_back(osg::Vec2(0.666,0.333));
_tCoords->push_back(osg::Vec2(1.0,0.333));
_tCoords->push_back(osg::Vec2(1.0,0.666));
_tCoords->push_back(osg::Vec2(0.666,0.666));
Try keeping the 2d positions always equal to the texture coordinates for each vertex. That will ensure your geometry appears as an undistorted cutout of your texture. You can then rescale and center the mesh as you like without distorting the texture by applying transforms to the vertex positions.
One way to do this would be to create a function that pushes a single vertex, accepting the 2d coordinates of the vertex and any transforms you want to apply. The function would then push the 2d coordinates as texcoords, then transform them and push the result as positions.

Severe artifact when interpolating between dual quaternions

I'm having trouble with my implementation of dual quaternion skinning. I'm still learning about the subject, so for the moment I'm converting from the bone matrix to a dual quaternion CPU side, and back to a matrix in the shader.
The conversion does apparently work correctly for single bones, but if I try to linearly blend between dual quaternions, I get this artifact:
http://imagizer.imageshack.us/a/img838/8671/nun.gif
I don't know what's causing this. Maybe it's related to how I normalize the dual quaternion, maybe it's in how I convert from dual quat to matrix. I've tried searching for actual dual quaternion code, but all I find is a bunch of hard to read mathematical definitions.
I am including pieces of the shader code, as I'm pretty sure that's where the problem is. Hopefully somebody proficient in quaternion math can look through it!
Blending the dual quaternions. Boneweight2 = (1.0 - boneweight1), so they'll always sum up to 1.
vec4 blendReal = boneReal[bone1] * boneWeight1 + boneReal[bone2] * boneWeight2;
vec4 blendDual = boneDual[bone1] * boneWeight1 + boneDual[bone2] * boneWeight2;
float blend_norm_real = length(blendReal);
blendReal /= blend_norm_real;
blendDual /= blend_norm_real;
Create matrix from dual quaternion:
mat4 MatFromDualQuat(vec4 rq, vec4 dq)
{
//Source: Section 3.4 http://www.seas.upenn.edu/~ladislav/papers/sdq-i3d07/sdq-i3d07.pdf
//rq = real quaternion
//dq = dual quaternion
mat4 M;
M[0][0] = 1.0 - 2.0 * (rq.y * rq.y + rq.z * rq.z); //
M[1][0] = 2.0 * (rq.x * rq.y + rq.w * rq.z);//
M[2][0] = 2.0 * (rq.w * rq.y - rq.x * rq.z);//
M[3][0] = 0.0;
M[0][1] = 2.0 * (rq.x * rq.y - rq.w * rq.z);
M[1][1] = 1.0 - 2.0 * (rq.x * rq.x + rq.z * rq.z);
M[2][1] = 2.0 * (rq.y * rq.z + rq.w * rq.x);
M[3][1] = 0.0;
M[0][2] = - 2.0 * (rq.x * rq.z + rq.w * rq.y);
M[1][2] = 2.0 * (rq.y * rq.z - rq.w * rq.x);
M[2][2] = 1.0 - 2.0 * (rq.x * rq.x + rq.y * rq.y);
M[3][2] = 0.0;
M[0][3] = 2.0 * (-dq.w * rq.x + dq.x * rq.w + dq.z * rq.y - dq.y * rq.z);
M[1][3] = 2.0 * (-dq.w * rq.y + dq.y * rq.w + dq.x * rq.z - dq.z * rq.x);
M[2][3] = 2.0 * (-dq.w * rq.z + dq.z * rq.w + dq.y * rq.x - dq.x * rq.y);
M[3][3] = 1.0;
return M;
}
And then I multiply that with the bind pose vertex position.

Resources