HLSL Stream Out Entries don't work correctly - directx-11

I want implement Particle system based on stream out structure to my bigger project. I saw few articles about that method and I build one particle. It works almost correctly but in geometry shader with stream out i cant get value of InitVel.z and age because it always is 0. If i change order of age(for example age is before Position) it works fine for age but 6th float of order is still 0. It looks like he push only 5 first positions. I had no idea what i do wrong because i try change almost all(create input layout for vertex, the same like entry SO Declaration, change number of strides for static 28, change it to 32 but in this case he draw chaotic so size of strides is probably good). I think it is problem with limits of NumEntry in declaration Entry but on site msdn i saw the limit for directx is D3D11_SO_STREAM_COUNT(4)*D3D11_SO_OUTPUT_COMPONENT_COUNT(128) not 5. Pls can you look in this code and give me the way or hope of implement it correctly?? Thanks a lot for help.
Structure of particle
struct Particle{
Particle() {}
Particle(float x, float y, float z,float vx, float vy, float vz,float
l /*UINT typ*/)
:InitPos(x, y, z), InitVel(vx, vy, vz), Age(l) /*, Type(typ)*/{}
XMFLOAT3 InitPos;
XMFLOAT3 InitVel;
float Age;
//UINT Type;
};
SO Entry
D3D11_SO_DECLARATION_ENTRY PartlayoutSO[] =
{
{ 0,"POSITION", 0, 0 , 3, 0 }, // output all components of position
{ 0,"VELOCITY", 0, 0, 3, 0 },
{ 0,"AGE", 0, 0, 1, 0 }
//{ 0,"TYPE", 0, 0, 1, 0 }
};
Global Variables
//streamout shaders
ID3D11VertexShader* Part_VSSO;
ID3D11GeometryShader* Part_GSSO;
ID3DBlob *Part_GSSO_Buffer;
ID3DBlob *Part_VSSO_Buffer;
//normal shaders
ID3D11VertexShader* Part_VS;
ID3D11GeometryShader* Part_GS;
ID3DBlob *Part_GS_Buffer;
ID3D11PixelShader* Part_PS;
ID3DBlob *Part_VS_Buffer;
ID3DBlob *Part_PS_Buffer;
ID3D11Buffer* PartVertBufferInit;
//ID3D11Buffer* Popy;
ID3D11Buffer* mDrawVB;
ID3D11Buffer* mStreamOutVB;
ID3D11InputLayout* PartVertLayout;// I try to set input layout too
void ParticleSystem::InitParticles()
{
mFirstRun = true;
srand(time(NULL));
hr = D3DCompileFromFile(L"ParticleVertexShaderSO4.hlsl", NULL,
D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "vs_5_0", NULL, NULL,
&Part_VSSO_Buffer, NULL);
hr = D3DCompileFromFile(L"ParticleGeometryShaderSO4.hlsl", NULL,
D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "gs_5_0", NULL, NULL,
&Part_GSSO_Buffer, NULL);
UINT StrideArray[1] = { sizeof(Particle) };//I try to set static 28 bits-7*4
per float
hr = device->CreateVertexShader(Part_VSSO_Buffer->GetBufferPointer(),
Part_VSSO_Buffer->GetBufferSize(), NULL, &Part_VSSO);
hr = device->CreateGeometryShaderWithStreamOutput(Part_GSSO_Buffer-
>GetBufferPointer(), Part_GSSO_Buffer->GetBufferSize(), PartlayoutSO ,3/*
sizeof(PartlayoutSO)*/ , StrideArray, 1,D3D11_SO_NO_RASTERIZED_STREAM,
NULL,&Part_GSSO);
//Draw Shaders
hr = D3DCompileFromFile(L"ParticleVertexShaderDRAW4.hlsl", NULL,
D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "vs_5_0", NULL, NULL,
&Part_VS_Buffer, NULL);
hr = D3DCompileFromFile(L"ParticleGeometryShaderDRAW4.hlsl", NULL,
D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "gs_5_0", NULL, NULL,
&Part_GS_Buffer, NULL);
hr = D3DCompileFromFile(L"ParticlePixelShaderDRAW4.hlsl", NULL,
D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "ps_5_0", NULL, NULL,
&Part_PS_Buffer, NULL);
hr = device->CreateVertexShader(Part_VS_Buffer->GetBufferPointer(),
Part_VS_Buffer->GetBufferSize(), NULL, &Part_VS);
hr = device->CreateGeometryShader(Part_GS_Buffer->GetBufferPointer(),
Part_GS_Buffer->GetBufferSize(), NULL, &Part_GS);
hr = device->CreatePixelShader(Part_PS_Buffer->GetBufferPointer(),
Part_PS_Buffer->GetBufferSize(), NULL, &Part_PS);
BuildVertBuffer();
}
void ParticleSystem::BuildVertBuffer()
{
D3D11_BUFFER_DESC vertexBufferDesc1;
ZeroMemory(&vertexBufferDesc1, sizeof(vertexBufferDesc1));
vertexBufferDesc1.Usage = D3D11_USAGE_DEFAULT;
vertexBufferDesc1.ByteWidth = sizeof(Particle)*1; //*numParticles;
vertexBufferDesc1.BindFlags = D3D11_BIND_VERTEX_BUFFER;// |
D3D11_BIND_STREAM_OUTPUT;
vertexBufferDesc1.CPUAccessFlags = 0;
vertexBufferDesc1.MiscFlags = 0;
vertexBufferDesc1.StructureByteStride = 0;// I tried to comment this too
Particle p;
ZeroMemory(&p, sizeof(Particle));
p.InitPos = XMFLOAT3(0.0f, 0.0f, 0.0f);
p.InitVel = XMFLOAT3(0.0f, 0.0f, 0.0f);
p.Age = 0.0f;
//p.Type = 100.0f;
D3D11_SUBRESOURCE_DATA vertexBufferData1;
ZeroMemory(&vertexBufferData1, sizeof(vertexBufferData1));
vertexBufferData1.pSysMem = &p;//było &p
vertexBufferData1.SysMemPitch = 0;
vertexBufferData1.SysMemSlicePitch = 0;
hr = device->CreateBuffer(&vertexBufferDesc1, &vertexBufferData1,
&PartVertBufferInit);
ZeroMemory(&vertexBufferDesc1, sizeof(vertexBufferDesc1));
vertexBufferDesc1.ByteWidth = sizeof(Particle) * numParticles;
vertexBufferDesc1.BindFlags = D3D11_BIND_VERTEX_BUFFER |
D3D11_BIND_STREAM_OUTPUT;
hr = device->CreateBuffer(&vertexBufferDesc1, 0, &mDrawVB);
hr = device->CreateBuffer(&vertexBufferDesc1, 0, &mStreamOutVB);
}
void ParticleSystem::LoadDataParticles()
{
UINT stride = sizeof(Particle);
UINT offset = 0;
//Create the Input Layout
//device->CreateInputLayout(Partlayout, numElementsPart, Part_VSSO_Buffer-
//>GetBufferPointer(),
// Part_VSSO_Buffer->GetBufferSize(), &PartVertLayout);
//Set the Input Layout
//context->IASetInputLayout(PartVertLayout);
//Set Primitive Topology
context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
if (mFirstRun)
{
// context->CopyResource(Popy, PartVertBufferInit);
context->IASetVertexBuffers(0, 1, &PartVertBufferInit, &stride,
&offset);
}
else
{
context->IASetVertexBuffers(0, 1, &mDrawVB, &stride, &offset);
}
context->SOSetTargets(1, &mStreamOutVB, &offset);
context->VSSetShader(Part_VSSO, NULL, 0);
context->GSSetShader(Part_GSSO, NULL, 0);
context->PSSetShader(NULL, NULL, 0);
//context->PSSetShader(Part_PS, NULL, 0);
ID3D11DepthStencilState* depthState;//disable depth
D3D11_DEPTH_STENCIL_DESC depthStateDesc;
depthStateDesc.DepthEnable = false;
depthStateDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO;
device->CreateDepthStencilState(&depthStateDesc, &depthState);
context->OMSetDepthStencilState(depthState, 0);
if (mFirstRun)
{
//mFirstRun;
context->Draw(1, 0);
mFirstRun = false;
}
else
{
context->DrawAuto();
}
//}
// done streaming-out--unbind the vertex buffer
ID3D11Buffer* bufferArray[1] = { 0 };
context->SOSetTargets(1, bufferArray, &offset);
// ping-pong the vertex buffers
std::swap(mStreamOutVB, mDrawVB);
// Draw the updated particle system we just streamed-out.
//Create the Input Layout
//device->CreateInputLayout(Partlayout, numElementsPart, Part_VS_Buffer-
//>GetBufferPointer(),
// Part_VS_Buffer->GetBufferSize(), &PartVertLayout);
//Set the normal Input Layout
//context->IASetInputLayout(PartVertLayout);
context->IASetVertexBuffers(0, 1, &mDrawVB, &stride, &offset);
ZeroMemory(&depthStateDesc, sizeof(depthStateDesc));
depthStateDesc.DepthEnable = true;
depthStateDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO;
device->CreateDepthStencilState(&depthStateDesc, &depthState);
context->OMSetDepthStencilState(depthState, 0);
//I tried add normal layout here the same like Entry SO but no changes
//Set Primitive Topology
//context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
context->VSSetShader(Part_VS, NULL, 0);
context->GSSetShader(Part_GS, NULL, 0);
context->PSSetShader(Part_PS, NULL, 0);
context->DrawAuto();
//mFirstRun = true;
context->GSSetShader(NULL, NULL, 0);
}
void ParticleSystem::RenderParticles()
{
//mFirstRun = true;
LoadDataParticles();
}
And the code of shaders:
VertexShader to stream out
struct Particle
{
float3 InitPos : POSITION;
float3 InitVel : VELOCITY;
float Age : AGE;
//uint Type : TYPE;
};
Particle main(Particle vin)
{
return vin;// just push data into geomtrywithso
}
GeometrywithSo
struct Particle
{
float3 InitPos : POSITION;
float3 InitVel : VELOCITY;
float Age : AGE;
//uint Type : TYPE;
};
float RandomPosition(float offset)
{
float u = Time + offset;// (Time + offset);
float v = ObjTexture13.SampleLevel(ObjSamplerState, u, 0).r;
return (v);
}
[maxvertexcount(6)]
void main(
point Particle gin[1],
inout PointStream< Particle > Output
)
{
//gin[0].Age = Time;
if ( StartPart == 1.0f )
{
//if (gin[0].Age < 100.0f)
//{
for (int i = 0; i < 6; i++)
{
float3 VelRandom; //= 5.0f * RandomPosition((float)i / 5.0f);
VelRandom.y = 10.0f+i;
VelRandom.x = 35 * i* RandomPosition((float)i / 5.0f);//+ offse;
VelRandom.z = 10.0f;//35*i * RandomPosition((float)i / 5.0f);
Particle p;
p.InitPos = VelRandom;//float3(0.0f, 5.0f, 0.0f); //+ VelRandom;
p.InitVel = float3(10.0f, 10.0f, 10.0f);
p.Age = 0.0f;//VelRandom.y;
//p.Type = PT_FLARE;
Output.Append(p);
}
Output.Append(gin[0]);
}
else if (StartPart == 0.0f)
{
if (gin[0].Age >= 0)
{
Output.Append(gin[0]);
}
}
}
If I change Age in geometry with so: for example Age += Time from const buffer
In geometry shader its fine once but in draw shader it is 0 and next time if it is reading in geometry with so it is 0 too.
Vertex shader to draw
struct VertexOut
{
float3 Pos : POSITION;
float4 Colour : COLOR;
//uint Type : TYPE;
};
struct Particle
{
float3 InitPos : POSITION;
float3 InitVel : VELOCITY;
float Age : AGE;
// uint Type : TYPE;
};
VertexOut main(Particle vin)
{
VertexOut vout;
float3 gAccelW = float3(0.0f, -0.98f, 0.0f);
float t = vin.Age;
//float b = Time/10000;
// constant Acceleration equation
vout.Pos = vin.InitVel+ (0.7f * gAccelW)*Time/100;
//vout.Pos.x = t;
vout.Colour = float4(1.0f, 0.0f, 0.0f, 1.0f);
//vout.Age = vout.Pos.y;
//vout.Type = vin.Type;
return vout;
}
Geometry shader to change point into line
struct VertexOut
{
float3 Pos : POSITION;
float4 Colour : COLOR;
//uint Type : TYPE;
};
struct GSOutput
{
float4 Pos : SV_POSITION;
float4 Colour : COLOR;
//float2 Tex : TEXCOORD;
};
[maxvertexcount(2)]
void main(
point VertexOut gin[1],
inout LineStream< GSOutput > Output
)
{
float3 gAccelW = float3(0.0f, -0.98f, 0.0f);
//if (gin[0].Type != PT_EMITTER)
{
float4 v[2];
v[0] = float4(gin[0].Pos, 1.0f);
v[1] = float4((gin[0].Pos + gAccelW), 1.0f);
GSOutput gout;
[unroll]
for (int i = 0; i < 2; ++i)
{
gout.Pos = mul(v[i], WVP);// mul(v[i], gViewProj);
gout.Colour = gin[0].Colour;
Output.Append(gout);
}
}
}
And pixel Shader
struct GSOutput
{
float4 Pos : SV_POSITION;
float4 Colour : COLOR;
};
float4 main(GSOutput pin) : SV_TARGET
{
return pin.Colour;
}

Related

Values are not correct when building NormalZMap texture

now I am trying to learn about DX11.
I am trying to practicing the SSAO effect.
When i try to build a NormalZmap, it looks quite weird.
Where I store the normals value and Z value in RGBA channel, but the pixel value inside the texture are not quite the same.
the output value of pixel shader
pixel value of texture
Looks like the Z value is passed well, but the normals value are messed up.
Here is my texture setting
D3D11_TEXTURE2D_DESC texDesc;
texDesc.Width = SCREEN_WIDTH;
texDesc.Height = SCREEN_HEIGHT;
texDesc.MipLevels = 1;
texDesc.ArraySize = 1;
texDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
texDesc.SampleDesc.Count = 1;
texDesc.SampleDesc.Quality = 0;
texDesc.Usage = D3D11_USAGE_DEFAULT;
texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
texDesc.CPUAccessFlags = 0;
texDesc.MiscFlags = 0;
GetDevice()->CreateTexture2D(&texDesc, NULL, &g_NormalZMap);
GetDevice()->CreateRenderTargetView(g_NormalZMap, NULL, &g_NormalZMapRTV);
GetDevice()->CreateShaderResourceView(g_NormalZMap, NULL, &g_NormalZMapSRV);
texDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
texDesc.BindFlags = D3D11_BIND_DEPTH_STENCIL;
GetDevice()->CreateTexture2D(&texDesc, NULL, &g_NormalZMapDS);
GetDevice()->CreateDepthStencilView(g_NormalZMapDS, NULL, &g_NormalZMapDSV);
Here is my inputlayout setting
D3D11_INPUT_ELEMENT_DESC normalZMapLayout[] ={
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
UINT normalZMapNumElements = ARRAYSIZE(normalZMapLayout);
GetDevice()->CreateInputLayout(normalZMapLayout,
normalZMapNumElements,
pVSBlob->GetBufferPointer(),
pVSBlob->GetBufferSize(),
&g_InputLayoutSSAONormalZMap);
pVSBlob->Release();
pVSBlob = NULL;
Here is my HLSL code
cbuffer WorldBuffer : register(b0)
{
matrix World;
}
cbuffer ViewBuffer : register(b1)
{
matrix View;
}
cbuffer ProjectionBuffer : register(b2)
{
matrix Projection;
}
struct VSINPUT
{
float4 Position : POSITION0;
float4 Normal : NORMAL0;
float4 Diffuse : COLOR0;
float2 TexCoord : TEXCOORD0;
};
struct PSINPUT
{
float4 Position : SV_POSITION;
float4 Normal : NORMAL0;
float2 TexCoord : TEXCOORD0;
float4 ViewPos : POSITION0;
};
struct PSOUTPUT
{
float4 Diffuse : SV_Target;
};
PSINPUT NormalZMapVS(VSINPUT input)
{
PSINPUT output;
float4x4 wv = mul(World, View);
float4x4 wvp = mul(wv, Projection);
float4x4 invTransWv = transpose(inverse(wv));
output.Position = mul(input.Position, wvp);
output.Normal.xyz = normalize(mul(input.Normal.xyz, (float3x3)invTransWv));
output.TexCoord = input.TexCoord;
output.ViewPos = mul(input.Position, wv);
return output;
}
PSOUTPUT NormalZMapPS(PSINPUT input)
{
PSOUTPUT output;
float4 texColor = g_Texture.Sample(g_SamplerState, input.TexCoord);
clip(texColor.a - 0.1f);
float4 Out;
Out.xyz = input.Normal.xyz;
Out.w = input.ViewPos.z;
output.Diffuse = Out;
return output;
}

direct3d 11 and 2D: pass coordinates of a vertex as int and not float

My purpose is to write a backend of a toolkit using only Direct3D 11 for 2D (no additional library like Direct2D, or SpriteBatch or something else).
Note that it is the first time I use Direct3D, and I'm currently learning d3D 11.
So for now, I can display a triangle or rectangle of the color I want.
The vertex structure of my C code contains 2 float for the position and 4 unsigned char for the color. In my vertex shader, the vertex structure has 2 floats for the position of the vertex, and 4 floats for the color.
I have remarked that if I use DXGI_FORMAT_R8G8B8A8_UNORM for the color in my D3D11_INPUT_ELEMENT_DESC array, then the color is interpolated automatically from the values 0 to 255 to the values 0.0f to 1.0f. It seems resonnable when I read the documentation (DXGI Format anumeration, the description of _UNORM):
"Unsigned normalized integer; which is interpreted in a resource as an unsigned integer, and is interpreted in a shader as an unsigned normalized floating-point value in the range [0, 1]. All 0's maps to 0.0f, and all 1's maps to 1.0f. A sequence of evenly spaced floating-point values from 0.0f to 1.0f are represented. For instance, a 2-bit UNORM represents 0.0f, 1/3, 2/3, and 1.0f."
Or at least that is how I interpret this doc (I may be wrong). And the color of the triangle is correct.
What I would like to do is the same for pixels: if I pass an integer for the coordinates (x between 0 and the width of the window -1, and y between 0 and the height of the window - 1), then is it interpreted as the correct signed normalized floating-point value bythe vertex shader (-1.0f to 1.0f for x, and 1.0f to -1.0f for y). I tried several values in my Vertex C struct and D3D11_INPUT_ELEMENT_DESC array, without luck. So I have 2 questions:
Is it possible ?
If it is not possible, is it faster to convert the coordinates in the C code, or in the shader code (with the viewport as a constant buffer) ? See the macros XF and YF in the code below for the conversion from int to float.
Below is my complete code that displays a simple triangle, followed with the HLSL code for vertex and pixel shader. I use the C api of Direct3D. I support Win 7 and Win 10.
Source code:
/* Windows 10 */
#define _WIN32_WINNT 0x0A00
#if defined _WIN32_WINNT && _WIN32_WINNT >= 0x0A00
# define HAVE_WIN10
#endif
#include <stdio.h>
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
/* C API for d3d11 */
#define COBJMACROS
#include <guiddef.h>
#ifdef HAVE_WIN10
# include <dxgi1_3.h>
#else
# include <dxgi.h>
#endif
#include <d3d11.h>
#include "d3d11_vs.h"
#include "d3d11_ps.h"
/* comment for no debug informations */
#define _DEBUG
#ifdef _DEBUG
# define FCT \
do { printf(" * %s\n", __FUNCTION__); fflush(stdout); } while (0)
#else
# define FCT \
do { } while (0)
#endif
#define XF(w,x) ((float)(2 * (x) - (w)) / (float)(w))
#define YF(h,y) ((float)((h) - 2 * (y)) / (float)(h))
typedef struct Window Window;
typedef struct D3d D3d;
struct Window
{
HINSTANCE instance;
RECT rect;
HWND win;
D3d *d3d;
};
struct D3d
{
#ifdef HAVE_WIN10
IDXGIFactory2 *dxgi_factory;
IDXGISwapChain1 *dxgi_swapchain;
#else
IDXGIFactory *dxgi_factory;
IDXGISwapChain *dxgi_swapchain;
#endif
ID3D11Device *d3d_device;
ID3D11DeviceContext *d3d_device_ctx;
ID3D11RenderTargetView *d3d_render_target_view;
ID3D11InputLayout *d3d_input_layout;
ID3D11VertexShader *d3d_vertex_shader;
ID3D11PixelShader *d3d_pixel_shader;
D3D11_VIEWPORT viewport;
Window *win;
unsigned int vsync : 1;
};
typedef struct
{
FLOAT x;
FLOAT y;
BYTE r;
BYTE g;
BYTE b;
BYTE a;
} Vertex;
void d3d_resize(D3d *d3d, UINT width, UINT height);
void d3d_render(D3d *d3d);
/************************* Window *************************/
LRESULT CALLBACK
_window_procedure(HWND window,
UINT message,
WPARAM window_param,
LPARAM data_param)
{
switch (message)
{
case WM_CLOSE:
PostQuitMessage(0);
return 0;
case WM_KEYUP:
if (window_param == 'Q')
{
PostQuitMessage(0);
}
return 0;
case WM_ERASEBKGND:
/* no need to erase back */
return 1;
/* GDI notifications */
case WM_CREATE:
#ifdef _DEBUG
printf(" * WM_CREATE\n");
fflush(stdout);
#endif
return 0;
case WM_SIZE:
{
Window *win;
#ifdef _DEBUG
printf(" * WM_SIZE\n");
fflush(stdout);
#endif
win = (Window *)GetWindowLongPtr(window, GWLP_USERDATA);
d3d_resize(win->d3d,
(UINT)LOWORD(data_param), (UINT)HIWORD(data_param));
return 0;
}
case WM_PAINT:
{
#ifdef _DEBUG
printf(" * WM_PAINT\n");
fflush(stdout);
#endif
if (GetUpdateRect(window, NULL, FALSE))
{
PAINTSTRUCT ps;
Window *win;
BeginPaint(window, &ps);
win = (Window *)GetWindowLongPtr(window, GWLP_USERDATA);
d3d_render(win->d3d);
EndPaint(window, &ps);
}
return 0;
}
default:
return DefWindowProc(window, message, window_param, data_param);
}
}
Window *window_new(int x, int y, int w, int h)
{
WNDCLASS wc;
RECT r;
Window *win;
win = (Window *)calloc(1, sizeof(Window));
if (!win)
return NULL;
win->instance = GetModuleHandle(NULL);
if (!win->instance)
goto free_win;
memset(&wc, 0, sizeof(WNDCLASS));
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = _window_procedure;
wc.cbClsExtra = 0;
wc.cbWndExtra = 0;
wc.hInstance = win->instance;
wc.hIcon = LoadIcon(NULL, IDI_APPLICATION);
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.hbrBackground = NULL;
wc.lpszMenuName = NULL;
wc.lpszClassName = "D3D";
if (!RegisterClass(&wc))
goto free_library;
r.left = 0;
r.top = 0;
r.right = w;
r.bottom = h;
if (!AdjustWindowRectEx(&r,
WS_OVERLAPPEDWINDOW | WS_SIZEBOX,
FALSE,
0U))
goto unregister_class;
win->win = CreateWindowEx(0U,
"D3D", "Test",
WS_OVERLAPPEDWINDOW | WS_SIZEBOX,
x, y,
r.right - r.left,
r.bottom - r.top,
NULL,
NULL, win->instance, NULL);
if (!win->win)
goto unregister_class;
return win;
unregister_class:
UnregisterClass("D2D", win->instance);
free_library:
FreeLibrary(win->instance);
free_win:
free(win);
return NULL;
}
void window_del(Window *win)
{
if (!win)
return;
DestroyWindow(win->win);
UnregisterClass("D2D", win->instance);
FreeLibrary(win->instance);
free(win);
}
void window_show(Window *win)
{
ShowWindow(win->win, SW_SHOWNORMAL);
}
/************************** D3D11 **************************/
static void d3d_refresh_rate_get(D3d *d3d, UINT *num, UINT *den)
{
DXGI_MODE_DESC *display_mode_list = NULL; /* 28 bytes */
IDXGIAdapter *dxgi_adapter;
IDXGIOutput *dxgi_output;
UINT nbr_modes;
UINT i;
HRESULT res;
*num = 0U;
*den = 1U;
if (!d3d->vsync)
return;
/* adapter of primary desktop : pass 0U */
res = IDXGIFactory_EnumAdapters(d3d->dxgi_factory, 0U, &dxgi_adapter);
if (FAILED(res))
return;
/* output of primary desktop : pass 0U */
res = IDXGIAdapter_EnumOutputs(dxgi_adapter, 0U, &dxgi_output);
if (FAILED(res))
goto release_dxgi_adapter;
/* number of mode that fit the format */
res = IDXGIOutput_GetDisplayModeList(dxgi_output,
DXGI_FORMAT_B8G8R8A8_UNORM,
DXGI_ENUM_MODES_INTERLACED,
&nbr_modes, NULL);
if (FAILED(res))
goto release_dxgi_output;
printf("display mode list : %d\n", nbr_modes);
fflush(stdout);
display_mode_list = (DXGI_MODE_DESC *)malloc(nbr_modes * sizeof(DXGI_MODE_DESC));
if (!display_mode_list)
goto release_dxgi_output;
/* fill the mode list */
res = IDXGIOutput_GetDisplayModeList(dxgi_output,
DXGI_FORMAT_B8G8R8A8_UNORM,
DXGI_ENUM_MODES_INTERLACED,
&nbr_modes, display_mode_list);
if (FAILED(res))
goto free_mode_list;
for (i = 0; i < nbr_modes; i++)
{
if ((display_mode_list[i].Width == (UINT)GetSystemMetrics(SM_CXSCREEN)) &&
(display_mode_list[i].Height == (UINT)GetSystemMetrics(SM_CYSCREEN)))
{
*num = display_mode_list[i].RefreshRate.Numerator;
*den = display_mode_list[i].RefreshRate.Denominator;
break;
}
}
#ifdef _DEBUG
{
DXGI_ADAPTER_DESC adapter_desc;
IDXGIAdapter_GetDesc(dxgi_adapter, &adapter_desc);
printf(" * video mem: %llu B, %llu MB\n",
adapter_desc.DedicatedVideoMemory,
adapter_desc.DedicatedVideoMemory / 1024 / 1024);
fflush(stdout);
wprintf(L" * description: %ls\n", adapter_desc.Description);
fflush(stdout);
}
#endif
free_mode_list:
free(display_mode_list);
release_dxgi_output:
IDXGIOutput_Release(dxgi_output);
release_dxgi_adapter:
IDXGIFactory_Release(dxgi_adapter);
}
D3d *d3d_init(Window *win, int vsync)
{
D3D11_INPUT_ELEMENT_DESC desc_ie[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 2 * sizeof(float), D3D11_INPUT_PER_VERTEX_DATA, 0 }
};
#ifdef HAVE_WIN10
DXGI_SWAP_CHAIN_DESC1 desc;
DXGI_SWAP_CHAIN_FULLSCREEN_DESC desc_fs;
#else
DXGI_SWAP_CHAIN_DESC desc;
#endif
D3d *d3d;
RECT r;
HRESULT res;
UINT flags;
UINT num;
UINT den;
D3D_FEATURE_LEVEL feature_level[4];
d3d = (D3d *)calloc(1, sizeof(D3d));
if (!d3d)
return NULL;
d3d->vsync = vsync;
win->d3d = d3d;
d3d->win = win;
/* create the DXGI factory */
flags = 0;
#ifdef HAVE_WIN10
# ifdef _DEBUG
flags = DXGI_CREATE_FACTORY_DEBUG;
# endif
res = CreateDXGIFactory2(flags, &IID_IDXGIFactory2, (void **)&d3d->dxgi_factory);
#else
res = CreateDXGIFactory(&IID_IDXGIFactory, (void **)&d3d->dxgi_factory);
#endif
if (FAILED(res))
goto free_d3d;
/* single threaded for now */
flags = D3D11_CREATE_DEVICE_SINGLETHREADED |
D3D11_CREATE_DEVICE_BGRA_SUPPORT;
#ifdef HAVE_WIN10
# ifdef _DEBUG
flags |= D3D11_CREATE_DEVICE_DEBUG;
# endif
#endif
feature_level[0] = D3D_FEATURE_LEVEL_11_1;
feature_level[1] = D3D_FEATURE_LEVEL_11_0;
feature_level[2] = D3D_FEATURE_LEVEL_10_1;
feature_level[3] = D3D_FEATURE_LEVEL_10_0;
/* create device and device context with hardware support */
res = D3D11CreateDevice(NULL,
D3D_DRIVER_TYPE_HARDWARE,
NULL,
flags,
feature_level,
3U,
D3D11_SDK_VERSION,
&d3d->d3d_device,
NULL,
&d3d->d3d_device_ctx);
if (FAILED(res))
goto release_dxgi_factory2;
if (!GetClientRect(win->win, &r))
goto release_d3d_device;
/*
* create the swap chain. It needs some settings...
* the size of the internal buffers
* the image format
* the number of back buffers (>= 2 for flip model, see SwapEffect field)
*
* Settings are different in win 7 and win10
*/
d3d_refresh_rate_get(d3d, &num, &den);
#ifdef HAVE_WIN10
desc.Width = r.right - r.left;
desc.Height = r.bottom - r.top;
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
desc.Stereo = FALSE;
#else
desc.BufferDesc.Width = r.right - r.left;
desc.BufferDesc.Height = r.bottom - r.top;
desc.BufferDesc.RefreshRate.Numerator = num;
desc.BufferDesc.RefreshRate.Denominator = den;
desc.BufferDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;;
desc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
#endif
desc.SampleDesc.Count = 1U;
desc.SampleDesc.Quality = 0U;
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferCount = 2U;
#ifdef HAVE_WIN10
desc.Scaling = DXGI_SCALING_NONE;
#else
desc.OutputWindow = win->win;
desc.Windowed = TRUE;
#endif
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
#ifdef HAVE_WIN10
desc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
#endif
desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
#ifdef HAVE_WIN10
desc_fs.RefreshRate.Numerator = num;
desc_fs.RefreshRate.Denominator = den;
desc_fs.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
desc_fs.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
desc_fs.Windowed = TRUE;
#endif
#ifdef HAVE_WIN10
res = IDXGIFactory2_CreateSwapChainForHwnd(d3d->dxgi_factory,
(IUnknown *)d3d->d3d_device,
win->win,
&desc,
&desc_fs,
NULL,
&d3d->dxgi_swapchain);
#else
res = IDXGIFactory_CreateSwapChain(d3d->dxgi_factory,
(IUnknown *)d3d->d3d_device,
&desc,
&d3d->dxgi_swapchain);
#endif
if (FAILED(res))
goto release_d3d_device;
/* Vertex shader */
res = ID3D11Device_CreateVertexShader(d3d->d3d_device,
d3d_vertex_shader,
sizeof(d3d_vertex_shader),
NULL,
&d3d->d3d_vertex_shader);
if (FAILED(res))
{
printf(" * CreateVertexShader() failed\n");
goto release_dxgi_swapchain;
}
/* Pixel shader */
res = ID3D11Device_CreatePixelShader(d3d->d3d_device,
d3d_pixel_shader,
sizeof(d3d_pixel_shader),
NULL,
&d3d->d3d_pixel_shader);
if (FAILED(res))
{
printf(" * CreatePixelShader() failed\n");
goto release_vertex_shader;
}
/* create the input layout */
res = ID3D11Device_CreateInputLayout(d3d->d3d_device,
desc_ie,
sizeof(desc_ie) / sizeof(D3D11_INPUT_ELEMENT_DESC),
d3d_vertex_shader,
sizeof(d3d_vertex_shader),
&d3d->d3d_input_layout);
if (FAILED(res))
{
printf(" * CreateInputLayout() failed\n");
goto release_pixel_shader;
}
return d3d;
release_pixel_shader:
ID3D11PixelShader_Release(d3d->d3d_pixel_shader);
release_vertex_shader:
ID3D11VertexShader_Release(d3d->d3d_vertex_shader);
release_dxgi_swapchain:
#ifdef HAVE_WIN10
IDXGISwapChain1_SetFullscreenState(d3d->dxgi_swapchain, FALSE, NULL);
IDXGISwapChain1_Release(d3d->dxgi_swapchain);
#else
IDXGISwapChain_SetFullscreenState(d3d->dxgi_swapchain, FALSE, NULL);
IDXGISwapChain_Release(d3d->dxgi_swapchain);
#endif
release_d3d_device:
ID3D11DeviceContext_Release(d3d->d3d_device_ctx);
ID3D11Device_Release(d3d->d3d_device);
release_dxgi_factory2:
#ifdef HAVE_WIN10
IDXGIFactory2_Release(d3d->dxgi_factory);
#else
IDXGIFactory_Release(d3d->dxgi_factory);
#endif
free_d3d:
free(d3d);
return NULL;
}
void d3d_shutdown(D3d *d3d)
{
#ifdef _DEBUG
ID3D11Debug *d3d_debug;
HRESULT res;
#endif
if (!d3d)
return;
#ifdef _DEBUG
res = ID3D11Debug_QueryInterface(d3d->d3d_device, &IID_ID3D11Debug,
(void **)&d3d_debug);
#endif
ID3D11PixelShader_Release(d3d->d3d_pixel_shader);
ID3D11VertexShader_Release(d3d->d3d_vertex_shader);
ID3D11InputLayout_Release(d3d->d3d_input_layout);
ID3D11RenderTargetView_Release(d3d->d3d_render_target_view);
#ifdef HAVE_WIN10
IDXGISwapChain1_SetFullscreenState(d3d->dxgi_swapchain, FALSE, NULL);
IDXGISwapChain1_Release(d3d->dxgi_swapchain);
#else
IDXGISwapChain_SetFullscreenState(d3d->dxgi_swapchain, FALSE, NULL);
IDXGISwapChain_Release(d3d->dxgi_swapchain);
#endif
ID3D11DeviceContext_Release(d3d->d3d_device_ctx);
ID3D11Device_Release(d3d->d3d_device);
#ifdef HAVE_WIN10
IDXGIFactory2_Release(d3d->dxgi_factory);
#else
IDXGIFactory_Release(d3d->dxgi_factory);
#endif
free(d3d);
#ifdef _DEBUG
if (SUCCEEDED(res))
{
ID3D11Debug_ReportLiveDeviceObjects(d3d_debug, D3D11_RLDO_DETAIL);
ID3D11Debug_Release(d3d_debug);
}
#endif
}
void d3d_resize(D3d *d3d, UINT width, UINT height)
{
D3D11_RENDER_TARGET_VIEW_DESC desc_rtv;
ID3D11Texture2D *back_buffer;
HRESULT res;
FCT;
/* set viewport, depends on size of the window */
d3d->viewport.TopLeftX = 0.0f;
d3d->viewport.TopLeftY = 0.0f;
d3d->viewport.Width = (float)width;
d3d->viewport.Height = (float)height;
d3d->viewport.MinDepth = 0.0f;
d3d->viewport.MaxDepth = 1.0f;
/* release the render target view */
if (d3d->d3d_render_target_view)
ID3D11RenderTargetView_Release(d3d->d3d_render_target_view);
/* unset the render target view in the output merger */
ID3D11DeviceContext_OMSetRenderTargets(d3d->d3d_device_ctx,
0U, NULL, NULL);
/* resize the internal nuffers of the swapt chain to the new size */
#ifdef HAVE_WIN10
res = IDXGISwapChain1_ResizeBuffers(d3d->dxgi_swapchain,
0U, /* preserve buffer count */
width, height,
DXGI_FORMAT_UNKNOWN, /* preserve format */
0U);
#else
res = IDXGISwapChain_ResizeBuffers(d3d->dxgi_swapchain,
0U, /* preserve buffer count */
width, height,
DXGI_FORMAT_UNKNOWN, /* preserve format */
0U);
#endif
if ((res == DXGI_ERROR_DEVICE_REMOVED) ||
(res == DXGI_ERROR_DEVICE_RESET) ||
(res == DXGI_ERROR_DRIVER_INTERNAL_ERROR))
{
return;
}
if (FAILED(res))
{
printf("ResizeBuffers() failed\n");
fflush(stdout);
return;
}
/* get the internal buffer of the swap chain */
#ifdef HAVE_WIN10
res = IDXGISwapChain1_GetBuffer(d3d->dxgi_swapchain, 0,
&IID_ID3D11Texture2D,
(void **)&back_buffer);
#else
res = IDXGISwapChain_GetBuffer(d3d->dxgi_swapchain, 0,
&IID_ID3D11Texture2D,
(void **)&back_buffer);
#endif
if (FAILED(res))
{
printf("swapchain GetBuffer() failed\n");
fflush(stdout);
return;
}
ZeroMemory(&desc_rtv, sizeof(D3D11_RENDER_TARGET_VIEW_DESC));
desc_rtv.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
desc_rtv.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
/* create the new render target view from this internal buffer */
res = ID3D11Device_CreateRenderTargetView(d3d->d3d_device,
(ID3D11Resource *)back_buffer,
&desc_rtv,
&d3d->d3d_render_target_view);
ID3D11Texture2D_Release(back_buffer);
}
/*** triangle ***/
typedef struct
{
Vertex vertices[3];
unsigned int indices[3];
ID3D11Buffer *vertex_buffer;
ID3D11Buffer *index_buffer; /* not useful for a single triangle */
UINT stride;
UINT offset;
UINT count;
UINT index_count;
} Triangle;
Triangle *triangle_new(D3d *d3d,
int w, int h,
int x1, int y1,
int x2, int y2,
int x3, int y3,
unsigned char r,
unsigned char g,
unsigned char b,
unsigned char a)
{
D3D11_BUFFER_DESC desc;
D3D11_SUBRESOURCE_DATA sr_data;
Triangle *t;
HRESULT res;
t = (Triangle *)malloc(sizeof(Triangle));
if (!t)
return NULL;
t->vertices[0].x = XF(w, x1);
t->vertices[0].y = YF(h, y1);
t->vertices[0].r = r;
t->vertices[0].g = g;
t->vertices[0].b = b;
t->vertices[0].a = a;
t->vertices[1].x = XF(w, x2);
t->vertices[1].y = YF(h, y2);
t->vertices[1].r = r;
t->vertices[1].g = g;
t->vertices[1].b = b;
t->vertices[1].a = a;
t->vertices[2].x = XF(w, x3);
t->vertices[2].y = YF(h, y3);
t->vertices[2].r = r;
t->vertices[2].g = g;
t->vertices[2].b = b;
t->vertices[2].a = a;
/* useful only for the rectangle later */
t->indices[0] = 0;
t->indices[1] = 1;
t->indices[2] = 2;
t->stride = sizeof(Vertex);
t->offset = 0U;
t->index_count = 3U;
desc.ByteWidth = sizeof(t->vertices);
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.MiscFlags = 0U;
desc.StructureByteStride = 0U;
sr_data.pSysMem = t->vertices;
sr_data.SysMemPitch = 0U;
sr_data.SysMemSlicePitch = 0U;
res = ID3D11Device_CreateBuffer(d3d->d3d_device,
&desc,
&sr_data,
&t->vertex_buffer);
if (FAILED(res))
{
free(t);
return NULL;
}
desc.ByteWidth = sizeof(t->indices);
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.MiscFlags = 0U;
desc.StructureByteStride = 0U;
sr_data.pSysMem = t->indices;
sr_data.SysMemPitch = 0U;
sr_data.SysMemSlicePitch = 0U;
res = ID3D11Device_CreateBuffer(d3d->d3d_device,
&desc,
&sr_data,
&t->index_buffer);
if (FAILED(res))
{
free(t);
return NULL;
}
return t;
}
void triangle_free(Triangle *t)
{
if (!t)
return;
ID3D11Buffer_Release(t->index_buffer);
ID3D11Buffer_Release(t->vertex_buffer);
free(t);
}
void d3d_render(D3d *d3d)
{
#ifdef HAVE_WIN10
DXGI_PRESENT_PARAMETERS pp;
#endif
const FLOAT color[4] = { 0.10f, 0.18f, 0.24f, 1.0f };
RECT rect;
HRESULT res;
FCT;
if (!GetClientRect(d3d->win->win, &rect))
{
return;
}
/* scene */
Triangle *t;
t = triangle_new(d3d,
rect.right - rect.left,
rect.bottom - rect.top,
320, 120,
480, 360,
160, 360,
255, 255, 0, 255); /* r, g, b, a */
/* clear render target */
ID3D11DeviceContext_ClearRenderTargetView(d3d->d3d_device_ctx,
d3d->d3d_render_target_view,
color);
/* Input Assembler (IA) */
/* TRIANGLESTRIP only useful for the rectangle later */
ID3D11DeviceContext_IASetPrimitiveTopology(d3d->d3d_device_ctx,
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
ID3D11DeviceContext_IASetInputLayout(d3d->d3d_device_ctx,
d3d->d3d_input_layout);
ID3D11DeviceContext_IASetVertexBuffers(d3d->d3d_device_ctx,
0,
1,
&t->vertex_buffer,
&t->stride,
&t->offset);
ID3D11DeviceContext_IASetIndexBuffer(d3d->d3d_device_ctx,
t->index_buffer,
DXGI_FORMAT_R32_UINT,
0);
/* vertex shader */
ID3D11DeviceContext_VSSetShader(d3d->d3d_device_ctx,
d3d->d3d_vertex_shader,
NULL,
0);
/* pixel shader */
ID3D11DeviceContext_PSSetShader(d3d->d3d_device_ctx,
d3d->d3d_pixel_shader,
NULL,
0);
/* set viewport in the Rasterizer Stage */
ID3D11DeviceContext_RSSetViewports(d3d->d3d_device_ctx, 1U, &d3d->viewport);
/* Output merger */
ID3D11DeviceContext_OMSetRenderTargets(d3d->d3d_device_ctx,
1U, &d3d->d3d_render_target_view,
NULL);
/* draw */
ID3D11DeviceContext_DrawIndexed(d3d->d3d_device_ctx,
t->index_count,
0, 0);
triangle_free(t);
/*
* present frame, that is flip the back buffer and the front buffer
* if no vsync, we present immediatly
*/
#ifdef HAVE_WIN10
pp.DirtyRectsCount = 0;
pp.pDirtyRects = NULL;
pp.pScrollRect = NULL;
pp.pScrollOffset = NULL;
res = IDXGISwapChain1_Present1(d3d->dxgi_swapchain,
d3d->vsync ? 1 : 0, 0, &pp);
#else
res = IDXGISwapChain_Present(d3d->dxgi_swapchain,
d3d->vsync ? 1 : 0, 0);
#endif
if (res == DXGI_ERROR_DEVICE_RESET || res == DXGI_ERROR_DEVICE_REMOVED)
{
printf("device removed or lost, need to recreate everything\n");
fflush(stdout);
}
else if (res == DXGI_STATUS_OCCLUDED)
{
printf("window is not visible, so vsync won't work. Let's sleep a bit to reduce CPU usage\n");
fflush(stdout);
}
}
int main()
{
Window *win;
D3d *d3d;
/* remove scaling on HiDPI */
#ifdef HAVE_WIN10
SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_SYSTEM_AWARE);
#endif
win = window_new(100, 100, 800, 480);
if (!win)
return 1;
d3d = d3d_init(win, 0);
if (!d3d)
goto del_window;
SetWindowLongPtr(win->win, GWLP_USERDATA, (LONG_PTR)win);
window_show(win);
/* mesage loop */
while (1)
{
MSG msg;
BOOL ret;
ret = PeekMessage(&msg, NULL, 0, 0, PM_REMOVE);
if (ret)
{
do
{
if (msg.message == WM_QUIT)
goto beach;
TranslateMessage(&msg);
DispatchMessageW(&msg);
} while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE));
}
}
beach:
d3d_shutdown(d3d);
window_del(win);
return 0;
del_window:
window_del(win);
printf(" error\n");
fflush(stdout);
return 1;
}
Vertex shader:
struct vs_input
{
float2 position : POSITION;
float4 color : COLOR;
};
struct ps_input
{
float4 position : SV_POSITION;
float4 color : COLOR;
};
ps_input main(vs_input input )
{
ps_input output;
output.position = float4(input.position, 0.0f, 1.0f);
output.color = input.color;
return output;
}
Pixel shader:
struct ps_input
{
float4 position : SV_POSITION;
float4 color : COLOR;
};
float4 main(ps_input input) : SV_TARGET
{
return input.color;
}
thank you
If you want to use pixel coordinates for your vertex, you can use one of those 2 formats :
DXGI_FORMAT_R32G32_FLOAT (same as you use right now, pixel in floating point)
DXGI_FORMAT_R32G32_UINT (pixel coordinates as int, vertex shader position input becomes uint2 position : POSITION)
if you use float, the float conversion is done in C side, if you use UINT, the conversion is done on the vertex shader side. Speed difference would need profiling, if number of vertices is low I'd expect it to be negligible.
you can then easily remap those values into the -1 to 1 range in vertex shader (which is quite efficient), you only need to pass the inverse viewport size in a constant buffer.
so your vertex shader becomes :
struct vs_input
{
float2 position : POSITION;
//uint2 position : POSITION; If you use UINT
float4 color : COLOR;
};
struct ps_input
{
float4 position : SV_POSITION;
float4 color : COLOR;
};
cbuffer cbViewport : register(b0)
{
float2 inverseViewportSize;
}
ps_input main(vs_input input )
{
ps_input output;
float2 p = input.position; //if you use UINT, conversion is done here
p *= inverseViewportSize;
p *= 2.0f;
p -= 1.0f;
p.y *= -1.0f; (clip space is bottom to top, pixel is top to bottom)
output.position = float4(p, 0.0f, 1.0f);
output.color = input.color;
return output;
}

How does D3D11 render pixels with an alpha value of 0 in the texture as transparent?

I used DrawIconEx (GDI/D3D11 interoperability and CopyResource) to generate an ID3D11Texture2D which has many pixels with an alpha channel value of 0. this texture has been verified by D3D11_USAGE_STAGING/Map to view the pixel value and ScreenGrab save png (relevant code needs to be modified: DXGI_FORMAT_B8G8R8A8_UNORM->Use GUID_WICPixelFormat32bppBGRA instead of GUID_WICPixelFormat24bppBGR).
When I use the rendering texture method of Tutorial 5: Texturing, the alpha value of 0 pixels will be rendered as black, which is not what i want, I hope these pixels render to be transparent. What will be done to achieve the goal? Here is my relevant code:
HRESULT CGraphRender::Init()
{
...
// Create an alpha enabled blend state description.
_blend_state = nullptr;
D3D11_BLEND_DESC blendDesc;
ZeroMemory(&blendDesc, sizeof(D3D11_BLEND_DESC));
blendDesc.RenderTarget[0].BlendEnable = TRUE;
blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
hr = _d3d_device->CreateBlendState(&blendDesc, &_blend_state);
RETURN_ON_FAIL(hr);
....
}
HRESULT CGraphRender::Clear_3D(float color[])
{
ID3D11RenderTargetView* rtv[] = { _back_rendertarget_view };
_immediate_context->OMSetRenderTargets(_countof(rtv), rtv, nullptr);
_immediate_context->ClearRenderTargetView(_back_rendertarget_view, color);
float blendFactor[4] = { 1.f, 1.f, 1.f, 1.f };
_immediate_context->OMSetBlendState(_blend_state, blendFactor, 0xffffffff);
return S_OK;
}
The problem has been solved: Perform the OMGetBlendState(_blend_state... setting before rendering the "alpha" texture, and restore the default blendstate after rendered
HRESULT CGraphRender::DrawTexture(const std::shared_ptr<CDrawTextureShader>& texture, const RECT& dst_rect, const BOOL& is_blend_alpha)
{
CComPtr<ID3D11DeviceContext> immediate_context;
_d3d_device->GetImmediateContext(&immediate_context);
if (!immediate_context)
{
return E_UNEXPECTED;
}
if (is_blend_alpha)
{
CComPtr<ID3D11BlendState> old_blend_state;
FLOAT old_blend_factor[4] = { 0.f };
UINT old_sample_mask = 0;
immediate_context->OMGetBlendState(&old_blend_state, old_blend_factor, &old_sample_mask);
float blend_factor[4] = { 1.f, 1.f, 1.f, 1.f };
immediate_context->OMSetBlendState(_blend_state, blend_factor, 0xffffffff);
HRESULT hr = texture->Render(immediate_context, dst_rect);
immediate_context->OMSetBlendState(old_blend_state, old_blend_factor, old_sample_mask);
return hr;
}
else
{
return texture->Render(immediate_context, dst_rect);
}
}

Nuklear OpenGL flicker issue

I have downloaded the demo program for Nucklear in LWJGL, i've managed to compile it and have it to succesfully work. Then i've tried to implement the same code into my game engine and nothing shows up, unless i disable glClear(GL_COLOR_BUFFER_BIT). At that point i can see the little Nuklear window flickering.
This is my Window class
package Engine.Renderer;
import Engine.Messages.AppMsg;
import Engine.IntApplication;
import Engine.Messages.Type;
import Simulator.Application;
import org.lwjgl.nuklear.NkColorf;
import org.lwjgl.nuklear.NkMouse;
import org.lwjgl.opengl.*;
import org.lwjgl.system.Callback;
import org.lwjgl.system.MemoryStack;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import static org.lwjgl.glfw.Callbacks.*;
import static org.lwjgl.glfw.GLFW.*;
import static org.lwjgl.nuklear.Nuklear.*;
import static org.lwjgl.opengl.ARBDebugOutput.*;
import static org.lwjgl.opengl.ARBDebugOutput.GL_DEBUG_SEVERITY_LOW_ARB;
import static org.lwjgl.opengl.GL11C.*;
import static org.lwjgl.system.MemoryStack.stackPush;
import static org.lwjgl.system.MemoryUtil.*;
public class Window
{
public Window(int width, int height, String title)
{
m_Width = width;
m_Height = height;
m_Title = title;
if(!glfwInit())
{
///TODO: LOG
Runtime.getRuntime().exit(1);
}
glfwDefaultWindowHints();
glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GLFW_TRUE);
glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
m_Window = glfwCreateWindow(m_Width, m_Height, m_Title, NULL, NULL);
if(m_Window == NULL)
{
///TODO: LOG
Runtime.getRuntime().exit(1);
}
glfwMakeContextCurrent(m_Window);
GLCapabilities caps = GL.createCapabilities();
glfwShowWindow(m_Window);
Callback debugProc = GLUtil.setupDebugMessageCallback();
if (caps.OpenGL43) {
GL43.glDebugMessageControl(GL43.GL_DEBUG_SOURCE_API, GL43.GL_DEBUG_TYPE_OTHER, GL43.GL_DEBUG_SEVERITY_NOTIFICATION, (IntBuffer)null, false);
} else if (caps.GL_KHR_debug) {
KHRDebug.glDebugMessageControl(
KHRDebug.GL_DEBUG_SOURCE_API,
KHRDebug.GL_DEBUG_TYPE_OTHER,
KHRDebug.GL_DEBUG_SEVERITY_NOTIFICATION,
(IntBuffer)null,
false
);
} else if (caps.GL_ARB_debug_output) {
glDebugMessageControlARB(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DEBUG_SEVERITY_LOW_ARB, (IntBuffer)null, false);
}
SetUpWindow();
renderer = new GUIRenderer();
}
public void Draw()
{
}
public void processEvents()
{
try (MemoryStack stack = stackPush()) {
IntBuffer w = stack.mallocInt(1);
IntBuffer h = stack.mallocInt(1);
glfwGetWindowSize(m_Window, w, h);
m_Width = w.get(0);
m_Height = h.get(0);
glfwGetFramebufferSize(m_Window, w, h);
m_DisplayWidth = w.get(0);
m_DisplayHeight = h.get(0);
}
nk_input_begin(NuklearContainer.ctx);
glfwPollEvents();
NkMouse mouse = NuklearContainer.ctx.input().mouse();
if (mouse.grab()) {
glfwSetInputMode(m_Window, GLFW_CURSOR, GLFW_CURSOR_HIDDEN);
} else if (mouse.grabbed()) {
float prevX = mouse.prev().x();
float prevY = mouse.prev().y();
glfwSetCursorPos(m_Window, prevX, prevY);
mouse.pos().x(prevX);
mouse.pos().y(prevY);
} else if (mouse.ungrab()) {
glfwSetInputMode(m_Window, GLFW_CURSOR, GLFW_CURSOR_NORMAL);
}
nk_input_end(NuklearContainer.ctx);
try (MemoryStack stack = stackPush()) {
IntBuffer width = stack.mallocInt(1);
IntBuffer height = stack.mallocInt(1);
glfwGetWindowSize(m_Window, width, height);
glViewport(0, 0, width.get(0), height.get(0));
NkColorf bg = NkColorf.create().r(0.10f).g(0.18f).b(0.24f).a(1.0f);
glClearColor(bg.r(), bg.g(), bg.b(), bg.a());
}
//glClear(GL_COLOR_BUFFER_BIT);
renderer.Render();
glfwSwapBuffers(m_Window);
}
public void CleanUp()
{
glfwFreeCallbacks(m_Window);
glfwDestroyWindow(m_Window);
glfwTerminate();
}
private void SetUpWindow()
{
glfwSetKeyCallback(m_Window, (window, key, scancode, action, mods) ->
{
AppMsg msg = new AppMsg();
msg.key = key;
msg.scancode = scancode;
msg.mods = mods;
if(action == 0)
msg.type = Type.KEYUP;
if(action == 1)
msg.type = Type.KEYDOWN;
if(action == 2)
msg.type = Type.KEYREPEAT;
IntApplication.OnMsgProc(msg);
});
glfwSetMouseButtonCallback(m_Window, (window, button, action, mods) ->
{
AppMsg msg = new AppMsg();
msg.button = button;
msg.mods = mods;
if(action == 0)
msg.type = Type.MOUSEUP;
if(action == 1)
msg.type = Type.MOUSEDOWN;
Application.OnMsgProc(msg);
});
glfwSetCursorPosCallback(m_Window, (window, xpos, ypos) ->
{
AppMsg msg = new AppMsg();
msg.xpos = xpos;
msg.ypos = ypos;
msg.type = Type.MOUSEMOVE;
Application.OnMsgProc(msg);
});
glfwSetWindowCloseCallback(m_Window, (window) ->
IntApplication.OnClose());
nk_init(NuklearContainer.ctx, NuklearContainer.ALLOCATOR, null);
NuklearContainer.ctx.clip()
.copy((handle, text, len) -> {
if (len == 0) {
return;
}
try (MemoryStack stack = stackPush()) {
ByteBuffer str = stack.malloc(len + 1);
memCopy(text, memAddress(str), len);
str.put(len, (byte)0);
glfwSetClipboardString(m_Window, str);
}
})
.paste((handle, edit) -> {
long text = nglfwGetClipboardString(m_Window);
if (text != NULL) {
nnk_textedit_paste(edit, text, nnk_strlen(text));
}
});
}
private long m_Window;
static int m_Width, m_Height;
static int m_DisplayWidth, m_DisplayHeight;
private String m_Title;
GUIRenderer renderer;
}
And here is my GUIRender class
package Engine.Renderer;
import static org.lwjgl.nuklear.Nuklear.*;
import static org.lwjgl.opengl.GL20C.*;
import static org.lwjgl.opengl.GL30.glBindVertexArray;
import static org.lwjgl.opengl.GL30.glGenVertexArrays;
import static org.lwjgl.stb.STBTruetype.*;
import static org.lwjgl.stb.STBTruetype.stbtt_GetCodepointHMetrics;
import static org.lwjgl.system.MemoryStack.stackPush;
import static org.lwjgl.system.MemoryUtil.*;
import static org.lwjgl.system.MemoryUtil.memAddress;
import Engine.IOUtil;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.Objects;
import org.lwjgl.nuklear.*;
import org.lwjgl.stb.STBTTAlignedQuad;
import org.lwjgl.stb.STBTTFontinfo;
import org.lwjgl.stb.STBTTPackContext;
import org.lwjgl.stb.STBTTPackedchar;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.Platform;
public class GUIRenderer extends Renderer
{
public GUIRenderer()
{
super();
try
{
this.ttf = IOUtil.ioResourceToByteBuffer("C:/Windows/Fonts/Arial.ttf", 512 * 1024);
} catch (IOException e)
{
throw new RuntimeException(e);
}
if (!m_Initialized)
Initialize();
try (MemoryStack stack = stackPush()) {
NkRect rect = NkRect.mallocStack(stack);
if (nk_begin(
NuklearContainer.ctx,
"Hello World",
nk_rect(50, 50, 230, 250, rect),
NK_WINDOW_BORDER | NK_WINDOW_NO_INPUT | NK_WINDOW_NO_INPUT | NK_WINDOW_NO_INPUT | NK_WINDOW_TITLE
)) {
nk_layout_row_static(NuklearContainer.ctx, 20, 80, 1);
nk_label(NuklearContainer.ctx, "background:", NK_TEXT_LEFT);
}
}
nk_end(NuklearContainer.ctx);
}
private void Initialize()
{
String NK_SHADER_VERSION = Platform.get() == Platform.MACOSX ? "#version 150\n" : "#version 300 es\n";
String vertex_shader =
NK_SHADER_VERSION +
"uniform mat4 ProjMtx;\n" +
"in vec2 Position;\n" +
"in vec2 TexCoord;\n" +
"in vec4 Color;\n" +
"out vec2 Frag_UV;\n" +
"out vec4 Frag_Color;\n" +
"void main() {\n" +
" Frag_UV = TexCoord;\n" +
" Frag_Color = Color;\n" +
" gl_Position = ProjMtx * vec4(Position.xy, 0, 1);\n" +
"}\n";
String fragment_shader =
NK_SHADER_VERSION +
"precision mediump float;\n" +
"uniform sampler2D Texture;\n" +
"in vec2 Frag_UV;\n" +
"in vec4 Frag_Color;\n" +
"out vec4 Out_Color;\n" +
"void main(){\n" +
" Out_Color = Frag_Color * texture(Texture, Frag_UV.st);\n" +
"}\n";
nk_buffer_init(NuklearContainer.cmds, NuklearContainer.ALLOCATOR, BUFFER_INITIAL_SIZE);
m_Program = glCreateProgram();
m_Vertex_Shader = glCreateShader(GL_VERTEX_SHADER);
m_Fragment_Shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(m_Vertex_Shader, vertex_shader);
glShaderSource(m_Fragment_Shader, fragment_shader);
glCompileShader(m_Vertex_Shader);
glCompileShader(m_Fragment_Shader);
if (glGetShaderi(m_Vertex_Shader, GL_COMPILE_STATUS) != GL_TRUE)
{
throw new IllegalStateException();
}
if (glGetShaderi(m_Fragment_Shader, GL_COMPILE_STATUS) != GL_TRUE)
{
throw new IllegalStateException();
}
glAttachShader(m_Program, m_Vertex_Shader);
glAttachShader(m_Program, m_Fragment_Shader);
glLinkProgram(m_Program);
if (glGetProgrami(m_Program, GL_LINK_STATUS) != GL_TRUE)
{
throw new IllegalStateException();
}
m_Uniform_Texture = glGetUniformLocation(m_Program, "Texture");
m_Uniform_Proj = glGetUniformLocation(m_Program, "ProjMtx");
int attrib_pos = glGetAttribLocation(m_Program, "Position");
int attrib_uv = glGetAttribLocation(m_Program, "TexCoord");
int attrib_col = glGetAttribLocation(m_Program, "Color");
{
// buffer setup
m_Vbo = glGenBuffers();
m_Ebo = glGenBuffers();
m_Vao = glGenVertexArrays();
glBindVertexArray(m_Vao);
glBindBuffer(GL_ARRAY_BUFFER, m_Vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_Ebo);
glEnableVertexAttribArray(attrib_pos);
glEnableVertexAttribArray(attrib_uv);
glEnableVertexAttribArray(attrib_col);
glVertexAttribPointer(attrib_pos, 2, GL_FLOAT, false, 20, 0);
glVertexAttribPointer(attrib_uv, 2, GL_FLOAT, false, 20, 8);
glVertexAttribPointer(attrib_col, 4, GL_UNSIGNED_BYTE, true, 20, 16);
}
{
// null texture setup
int nullTexID = glGenTextures();
NuklearContainer.null_texture.texture().id(nullTexID);
NuklearContainer.null_texture.uv().set(0.5f, 0.5f);
glBindTexture(GL_TEXTURE_2D, nullTexID);
try (MemoryStack stack = stackPush())
{
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 1, 1, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, stack.ints(0xFFFFFFFF));
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
}
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindVertexArray(0);
FontInit();
}
private void FontInit()
{
int BITMAP_W = 1024;
int BITMAP_H = 1024;
int FONT_HEIGHT = 18;
int fontTexID = glGenTextures();
STBTTFontinfo fontInfo = STBTTFontinfo.create();
STBTTPackedchar.Buffer cdata = STBTTPackedchar.create(95);
float scale;
float descent;
try (MemoryStack stack = stackPush()) {
stbtt_InitFont(fontInfo, ttf);
scale = stbtt_ScaleForPixelHeight(fontInfo, FONT_HEIGHT);
IntBuffer d = stack.mallocInt(1);
stbtt_GetFontVMetrics(fontInfo, null, d, null);
descent = d.get(0) * scale;
ByteBuffer bitmap = memAlloc(BITMAP_W * BITMAP_H);
STBTTPackContext pc = STBTTPackContext.mallocStack(stack);
stbtt_PackBegin(pc, bitmap, BITMAP_W, BITMAP_H, 0, 1, NULL);
stbtt_PackSetOversampling(pc, 4, 4);
stbtt_PackFontRange(pc, ttf, 0, FONT_HEIGHT, 32, cdata);
stbtt_PackEnd(pc);
// Convert R8 to RGBA8
ByteBuffer texture = memAlloc(BITMAP_W * BITMAP_H * 4);
for (int i = 0; i < bitmap.capacity(); i++) {
texture.putInt((bitmap.get(i) << 24) | 0x00FFFFFF);
}
texture.flip();
glBindTexture(GL_TEXTURE_2D, fontTexID);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, BITMAP_W, BITMAP_H, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
memFree(texture);
memFree(bitmap);
}
NuklearContainer.default_font
.width((handle, h, text, len) -> {
float text_width = 0;
try (MemoryStack stack = stackPush()) {
IntBuffer unicode = stack.mallocInt(1);
int glyph_len = nnk_utf_decode(text, memAddress(unicode), len);
int text_len = glyph_len;
if (glyph_len == 0) {
return 0;
}
IntBuffer advance = stack.mallocInt(1);
while (text_len <= len && glyph_len != 0) {
if (unicode.get(0) == NK_UTF_INVALID) {
break;
}
/* query currently drawn glyph information */
stbtt_GetCodepointHMetrics(fontInfo, unicode.get(0), advance, null);
text_width += advance.get(0) * scale;
/* offset next glyph */
glyph_len = nnk_utf_decode(text + text_len, memAddress(unicode), len - text_len);
text_len += glyph_len;
}
}
return text_width;
})
.height(FONT_HEIGHT)
.query((handle, font_height, glyph, codepoint, next_codepoint) -> {
try (MemoryStack stack = stackPush()) {
FloatBuffer x = stack.floats(0.0f);
FloatBuffer y = stack.floats(0.0f);
STBTTAlignedQuad q = STBTTAlignedQuad.mallocStack(stack);
IntBuffer advance = stack.mallocInt(1);
stbtt_GetPackedQuad(cdata, BITMAP_W, BITMAP_H, codepoint - 32, x, y, q, false);
stbtt_GetCodepointHMetrics(fontInfo, codepoint, advance, null);
NkUserFontGlyph ufg = NkUserFontGlyph.create(glyph);
ufg.width(q.x1() - q.x0());
ufg.height(q.y1() - q.y0());
ufg.offset().set(q.x0(), q.y0() + (FONT_HEIGHT + descent));
ufg.xadvance(advance.get(0) * scale);
ufg.uv(0).set(q.s0(), q.t0());
ufg.uv(1).set(q.s1(), q.t1());
}
})
.texture(it -> it
.id(fontTexID));
nk_style_set_font(NuklearContainer.ctx, NuklearContainer.default_font);
}
public void Render()
{
try (MemoryStack stack = stackPush())
{
// setup global state
glEnable(GL_BLEND);
glBlendEquation(GL_FUNC_ADD);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glEnable(GL_SCISSOR_TEST);
glActiveTexture(GL_TEXTURE0);
// setup program
glUseProgram(m_Program);
glUniform1i(m_Uniform_Texture, 0);
glUniformMatrix4fv(m_Uniform_Proj, false, stack.floats(
2.0f / Window.m_Width, 0.0f, 0.0f, 0.0f,
0.0f, -2.0f / Window.m_Height, 0.0f, 0.0f,
0.0f, 0.0f, -1.0f, 0.0f,
-1.0f, 1.0f, 0.0f, 1.0f
));
glViewport(0, 0, Window.m_DisplayWidth, Window.m_DisplayHeight);
}
{
// convert from command queue into draw list and draw to screen
// allocate vertex and element buffer
glBindVertexArray(m_Vao);
glBindBuffer(GL_ARRAY_BUFFER, m_Vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_Ebo);
glBufferData(GL_ARRAY_BUFFER, max_vertex_buffer, GL_STREAM_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, max_element_buffer, GL_STREAM_DRAW);
// load draw vertices & elements directly into vertex + element buffer
ByteBuffer vertices = Objects.requireNonNull(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY, max_vertex_buffer, null));
ByteBuffer elements = Objects.requireNonNull(glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY, max_element_buffer, null));
try (MemoryStack stack = stackPush())
{
// fill convert configuration
NkConvertConfig config = NkConvertConfig.callocStack(stack)
.vertex_layout(VERTEX_LAYOUT)
.vertex_size(20)
.vertex_alignment(4)
.null_texture(NuklearContainer.null_texture)
.circle_segment_count(22)
.curve_segment_count(22)
.arc_segment_count(22)
.global_alpha(1.0f)
.shape_AA(NK_ANTI_ALIASING_ON)
.line_AA(NK_ANTI_ALIASING_ON);
// setup buffers to load vertices and elements
NkBuffer vbuf = NkBuffer.mallocStack(stack);
NkBuffer ebuf = NkBuffer.mallocStack(stack);
nk_buffer_init_fixed(vbuf, vertices/*, max_vertex_buffer*/);
nk_buffer_init_fixed(ebuf, elements/*, max_element_buffer*/);
nk_convert(NuklearContainer.ctx, NuklearContainer.cmds, vbuf, ebuf, config);
}
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
glUnmapBuffer(GL_ARRAY_BUFFER);
// iterate over and execute each draw command
float fb_scale_x = (float) Window.m_DisplayWidth / (float) Window.m_Width;
float fb_scale_y = (float) Window.m_DisplayHeight / (float) Window.m_Height;
long offset = NULL;
for (NkDrawCommand cmd = nk__draw_begin(NuklearContainer.ctx, NuklearContainer.cmds); cmd != null; cmd = nk__draw_next(cmd, NuklearContainer.cmds, NuklearContainer.ctx))
{
if (cmd.elem_count() == 0)
{
continue;
}
glBindTexture(GL_TEXTURE_2D, cmd.texture().id());
glScissor(
(int) (cmd.clip_rect().x() * fb_scale_x),
(int) ((Window.m_Height - (int) (cmd.clip_rect().y() + cmd.clip_rect().h())) * fb_scale_y),
(int) (cmd.clip_rect().w() * fb_scale_x),
(int) (cmd.clip_rect().h() * fb_scale_y)
);
glDrawElements(GL_TRIANGLES, cmd.elem_count(), GL_UNSIGNED_SHORT, offset);
offset += cmd.elem_count() * 2;
}
nk_clear(NuklearContainer.ctx);
}
glUseProgram(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
}
private boolean m_Initialized;
public boolean IsInitialized() { return m_Initialized; }
private int BUFFER_INITIAL_SIZE = 4 * 1024;
long max_vertex_buffer = 512 * 1024;
long max_element_buffer = 128 * 1024;
private NkDrawVertexLayoutElement.Buffer VERTEX_LAYOUT = NkDrawVertexLayoutElement.create(4)
.position(0).attribute(NK_VERTEX_POSITION).format(NK_FORMAT_FLOAT).offset(0)
.position(1).attribute(NK_VERTEX_TEXCOORD).format(NK_FORMAT_FLOAT).offset(8)
.position(2).attribute(NK_VERTEX_COLOR).format(NK_FORMAT_R8G8B8A8).offset(16)
.position(3).attribute(NK_VERTEX_ATTRIBUTE_COUNT).format(NK_FORMAT_COUNT).offset(0)
.flip();
private ByteBuffer ttf;
}
I'm really sorry if the code is huge, but even just the GLFWDemo is 600+ lines long.

FFMPEG RGB to YUV420P : Warning: data is not aligned! This can lead to a speedloss [duplicate]

This question already has answers here:
Pixel format conversion issue [FFMPEG]
(2 answers)
Closed 4 years ago.
I am trying to convert a standard RGB color space to YUV420P. I am struggling to figure out why I keep getting 'Warning: data is not aligned! This can lead to a speedloss' when executing the code. I have looked at a multitude of examples.
int ImageDecoder::rgb2yuv(uint8_t *src,
uint8_t *dest,
uint32_t width,
uint32_t height)
{
struct SwsContext *imgCtx = NULL;
AVFrame *pFrameYUV;
enum AVPixelFormat src_pix_fmt = AV_PIX_FMT_RGB24;
enum AVPixelFormat dst_pix_fmt = AV_PIX_FMT_YUV420P;
int ret;
int size;
const int RGBLinesize[1] = { 3 * (int)width };
pFrameYUV = av_frame_alloc();
pFrameYUV->width = width;
pFrameYUV->height = height;
pFrameYUV->format = dst_pix_fmt;
// Initialize pFrameYUV linesize
ret = av_image_alloc(pFrameYUV->data, pFrameYUV->linesize, pFrameYUV->width, pFrameYUV->height, AV_PIX_FMT_YUV420P, 1);
getLogger()->info("ImageDecoder:{} width={} height={} linesize[0]={} linesize[1]={} linesize[2]={}",
__func__, pFrameYUV->width, pFrameYUV->height, pFrameYUV->linesize[0], pFrameYUV->linesize[1], pFrameYUV->linesize[2]);
size = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, pFrameYUV->width, pFrameYUV->height, 1);
imgCtx = sws_getCachedContext(imgCtx,
width,
height,
AV_PIX_FMT_RGB24,
pFrameYUV->width,
pFrameYUV->height,
AV_PIX_FMT_YUV420P,
SWS_BICUBIC, 0, 0, 0);
if( imgCtx == NULL)
{
getLogger()->error("ERROR: ImageDecoder: {} Cannot initialize the conversion context", __func__);
}
sws_scale(imgCtx,
(const uint8_t* const*)&src,
RGBLinesize,
0,
height,
pFrameYUV->data,
pFrameYUV->linesize);
memcpy(dest, &pFrameYUV->data[0], size);
sws_freeContext(imgCtx);
av_free(pFrameYUV);
}
I hope it helps you.
I am converting YUV444 decoded frames to RGBA format.
AVFrame* RGBFrame = av_frame_alloc();
RGBFrame->width = YUV_frame->width; RGBFrame->format = AV_PIX_FMT_RGBA;
RGBFrame->height = YUV_frame->height;
int ret = av_image_alloc(RGBFrame->data, RGBFrame->linesize, RGBFrame->width, RGBFrame->height, AV_PIX_FMT_RGBA, YUV_frame->pict_type);
if (ret < 0)
return false;
SwsContext* sws_Context = NULL;
sws_Context = sws_getCachedContext(sws_Context, YUV_frame->width, YUV_frame->height, pVideoCodecCtx->pix_fmt,
YUV_frame->width, YUV_frame->height, AV_PIX_FMT_RGBA, SWS_BILINEAR, NULL, NULL, NULL);
if (sws_Context == NULL) return false;
int result = sws_scale(sws_Context, YUV_frame->data, YUV_frame->linesize, 0, (int)YUV_frame->height, RGBFrame->data, RGBFrame->linesize);
if (result < 0) return false;
if (RGBFrame == NULL) {
av_frame_unref(RGBFrame);
return false;
}
sws_freeContext(sws_Context);
int ImageDecoder::rgb2yuv(uint8_t *src,
uint8_t *dest,
uint32_t *outBufferSize,
uint32_t width,
uint32_t height)
{
struct SwsContext *imgCtx = NULL;
uint8_t * RGBData[1] = {src};
const int RGBLinesize[1] = {3 * (int) width};
uint8_t * YUVData[] = {dest,
YUVData[0] + ((int) width * (int) height),
YUVData[1] + (((int) width * (int) height) / 4)};
const int YUVLinesize[] = {(int) width, (int) width / 2, (int) width / 2};
int size;
size = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, width, height, 1);
*outBufferSize = size;
imgCtx = sws_getCachedContext(imgCtx,
width,
height,
AV_PIX_FMT_RGB24,
width,
height,
AV_PIX_FMT_YUV420P,
SWS_BICUBIC, 0, 0, 0);
if (imgCtx == NULL)
{
getLogger()->error("ERROR: ImageDecoder: {} Cannot initialize the conversion context", __func__);
return -1;
}
sws_scale(imgCtx,
RGBData,
RGBLinesize,
0,
height,
YUVData,
YUVLinesize);
sws_freeContext(imgCtx);
return 0;
}

Resources