I'm programming a 2D-Game in OpenGL and I have to output a level which consists of 20x15 fields.
So I'm currently outputting a texture for each field which is quite slow (300 textures/frame).
But due to the reason that the level never changes, I wondered if it's possible to combine the textures to a big, single texture before the game-loop starts.
Then I would have to output only one texture with 4 Texture Coordinates (0/0)(0/1)(1/1)(1/0) and 4 glVertex2f() which specifies the position in the Window.
This is my current Code for each of the 300 fields:
glColor3f(1,1,1);
glBindTexture(GL_TEXTURE_2D,textur);
glBegin(GL_QUADS);
glTexCoord2f(textArea.a.x,textArea.b.y);glVertex2f(display.a.x,display.a.y);
glTexCoord2f(textArea.a.x,textArea.a.y);glVertex2f(display.a.x,display.b.y);
glTexCoord2f(textArea.b.x,textArea.a.y);glVertex2f(display.b.x,display.b.y);
glTexCoord2f(textArea.b.x,textArea.b.y);glVertex2f(display.b.x,display.a.y);
glEnd();
Note that I have the images for all possible field-types in one .tga-File. So I'm choosing the right one with glTexCoord2f().
The image-File with all Tiles is loaded into
GLuint textur;
So I bind the same texture for every field.
My target is to decrease CPU-time. Display-Lists didn't work because there is so many data to load in the Graphics Card, that, in the end, display-Lists were even slower.
I also wasn't able to use VBOs because I don't use extensions like GLUT.
So my idea was to generate a single texture which should be quite easy and effective.
I hope you can give me feedback how I can combine textures and if this method would be the easiest one to increase performance
EDIT: that are the OpenGl-Functions I use in my program:
When I start the program, I initialize the window:
glfwInit();
if( !glfwOpenWindow(windowSize.x,windowSize.y, 0, 0, 0, 0, 0, 0, GLFW_WINDOW ) )
{ glfwTerminate();
return;
}
And that's all what the game-loop does with OpenG:
int main()
{
//INIT HERE (see code above)
glBlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_BLEND);
glAlphaFunc(GL_GREATER,0.1f);
glEnable(GL_ALPHA_TEST);
long loopStart;//measure loopcycle-time
do{
height = height > 0 ? height : 1;
glViewport( 0, 0, width, height ); //set Origin
glClearColor( 0.0f, 0.0f, 0.0f, 0.0f ); //background-color
glClear(GL_COLOR_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0,windowSize.x,0,windowSize.y,0,128); //2D-Mode
glMatrixMode(GL_MODELVIEW);
loopStart=clock();
//(...) OUTPUT HERE (code see above)
glfwSwapBuffers(); //erzeugte Grafikdaten ausgeben
printf("%4dms -> ",clock()-loopStart);
}while(...);
glDisable(GL_ALPHA_TEST);
glDisable(GL_TEXTURE_2D);
glfwTerminate();
}
I see you're using GLFW. You can add GLEW and GLM and then you should use OpenGL 3.x or higher.
Here is a FULL example, how you can easily draw 2000 Textured Quads (With Alphablending) or more with FPS of 200 or more on a lost budget Laptop. It has only one little Texture, but it will work also with an 4096x4096 Texture Atlas. You get one HUGE Performance-Hit, if the Subtexture Size in the big texture EXACTLY matches the size of your Quad you draw! You should use 50x50 Pixels also in the Big-Texture! The following Deme-Code here also UPDATES ALL 2000 Quads each frame and send them to the GPU. If you will not have to update them each frame and put the Scroll-Coordinates to the Shader..you will gain performance again.
If you need no blending...use Alpha-Tests..you will gain again more speed.
#define GLEW_STATIC
#include "glew.h"
#include "glfw.h"
#include "glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include "glm/gtx/transform.hpp"
#include <sstream>
#include <fstream>
#include <vector>
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
std::ofstream logger("Log\\Ausgabe.txt", (std::ios::out | std::ios::app));
class Vertex
{
public:
float x;
float y;
float z;
float tx;
float ty;
};
class Quad
{
public:
float x;
float y;
float width;
float height;
};
int getHighResTimeInMilliSeconds(bool bFirstRun);
GLuint buildShader();
void addQuadToLocalVerticeArray(Vertex * ptrVertexArrayLocal, Quad *quad, int *iQuadCounter);
int main()
{
logger << "Start" << std::endl;
if(!glfwInit())
exit(EXIT_FAILURE);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR,3);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR,3);
glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, 1);
glfwOpenWindowHint(GLFW_OPENGL_PROFILE,GLFW_OPENGL_CORE_PROFILE);
if( !glfwOpenWindow(1366, 768,8,8,8,8,32,32,GLFW_FULLSCREEN) )
{
glfwTerminate();
exit( EXIT_FAILURE );
}
if (glewInit() != GLEW_OK)
exit( EXIT_FAILURE );
//Init
GLuint VertexArrayID;
GLuint vertexbuffer;
GLuint MatrixID;
GLuint TextureID;
GLuint Texture;
GLuint programID = buildShader();
//Texture in Video-Speicher erstellen
GLFWimage img;
int iResult = glfwReadImage("Graphics\\gfx.tga", &img, GLFW_NO_RESCALE_BIT);
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &Texture);
glBindTexture(GL_TEXTURE_2D, Texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,32,32, 0, GL_RGBA, GL_UNSIGNED_BYTE, img.Data);
glfwFreeImage(&img);
Vertex * ptrVertexArrayLocal = new Vertex[12000];
glGenVertexArrays(1, &VertexArrayID);
glBindVertexArray(VertexArrayID);
glGenBuffers(1, &vertexbuffer);
glBindBuffer(GL_ARRAY_BUFFER, VertexArrayID);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 12000, NULL, GL_DYNAMIC_DRAW);
glm::mat4 Projection = glm::ortho(0.0f, (float)1366,0.0f, (float)768, 0.0f, 100.0f);
glm::mat4 Model = glm::mat4(1.0f);
glm::mat4 MVP = Projection * Model;
glViewport( 0, 0, 1366, 768 );
MatrixID = glGetUniformLocation(programID, "MVP");
glEnable(GL_CULL_FACE);
glEnable (GL_BLEND);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
TextureID = glGetUniformLocation(programID, "myTextureSampler");
glUseProgram(programID);
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, &MVP[0][0]);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Texture);
glUniform1i(TextureID, 0);
int iQuadVerticeCounter=0;
int iNumOfQuads = 2000;
Quad * ptrQuads = new Quad[iNumOfQuads];
//LOCAL VERTICES CHANGES EACH LOOP
for (int i=0; i<iNumOfQuads; i++)
{
ptrQuads[i].width = 32;
ptrQuads[i].height = 32;
ptrQuads[i].x = (float)(rand() % (1334));
ptrQuads[i].y = (float)(rand() % (736));
}
int iCurrentTime=0;
int iFPS=0;
int iFrames=0;
int iFrameCounterTimeStart=0;
int running = GL_TRUE;
bool bFirstRun=true;
while( running )
{
iCurrentTime = getHighResTimeInMilliSeconds(bFirstRun);
bFirstRun=false;
//UPDATE ALL QUADS EACH FRAME!
for (int i=0; i<iNumOfQuads; i++)
{
ptrQuads[i].width = 32;
ptrQuads[i].height = 32;
ptrQuads[i].x = ptrQuads[i].x;
ptrQuads[i].y = ptrQuads[i].y;
addQuadToLocalVerticeArray(ptrVertexArrayLocal, &ptrQuads[i], &iQuadVerticeCounter);
}
//DO THE RENDERING
glClear( GL_COLOR_BUFFER_BIT );
glBindBuffer(GL_ARRAY_BUFFER, VertexArrayID);
glBufferSubData(GL_ARRAY_BUFFER, 0,sizeof(Vertex) * iQuadVerticeCounter, ptrVertexArrayLocal);
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,sizeof(Vertex),BUFFER_OFFSET(0));
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
glVertexAttribPointer(1,2,GL_FLOAT,GL_FALSE,sizeof(Vertex),BUFFER_OFFSET(3*sizeof(GL_FLOAT)));
glDrawArrays(GL_TRIANGLES, 0, iQuadVerticeCounter);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
iQuadVerticeCounter=0;
glfwSwapBuffers();
//END OF DOING THE RENDERING
running = !glfwGetKey( GLFW_KEY_ESC ) &&glfwGetWindowParam( GLFW_OPENED );
iFrames++;
if (iCurrentTime >= iFrameCounterTimeStart + 1000.0f)
{
iFPS = (int)((iCurrentTime - iFrameCounterTimeStart) / 1000.0f * iFrames);
iFrameCounterTimeStart = iCurrentTime;
iFrames = 0;
logger << "FPS: " << iFPS << std::endl;
}
}
glfwTerminate();
exit( EXIT_SUCCESS );
}
int getHighResTimeInMilliSeconds(bool bFirstRun)
{
if (bFirstRun)
glfwSetTime(0);
return (int)((float)glfwGetTime()*1000.0f);
}
GLuint buildShader()
{
//Hint: Shader in the TXT-File looks like this
/*std::stringstream ssVertexShader;
ssVertexShader << "#version 330 core"<< std::endl
<< "layout(location = 0) in vec3 vertexPosition_modelspace;"<< std::endl
<< "layout(location = 1) in vec2 vertexUV;"<< std::endl
<< "out vec2 UV;"<< std::endl
<< "uniform mat4 MVP;"<< std::endl
<< "void main(){"<< std::endl
<< "vec4 v = vec4(vertexPosition_modelspace,1);"<< std::endl
<< "gl_Position = MVP * v;"<< std::endl
<< "UV = vertexUV;"<< std::endl
<< "}"<< std::endl;*/
std::string strVertexShaderCode;
std::ifstream VertexShaderStream("Shader\\VertexShader.txt", std::ios::in);
if(VertexShaderStream.is_open())
{
std::string Line = "";
while(getline(VertexShaderStream, Line))
strVertexShaderCode += "\n" + Line;
VertexShaderStream.close();
}
//Hint: Shader in the TXT-File looks like this
/*std::stringstream ssFragmentShader;
ssFragmentShader << "#version 330 core\n"
"in vec2 UV;\n"
"out vec4 color;\n"
"uniform sampler2D myTextureSampler;\n"
"void main(){\n"
"color = texture( myTextureSampler, UV ).rgba;\n"
"}\n";*/
std::string strFragmentShaderCode;
std::ifstream FragmentShaderStream("Shader\\FragmentShader.txt", std::ios::in);
if(FragmentShaderStream.is_open())
{
std::string Line = "";
while(getline(FragmentShaderStream, Line))
strFragmentShaderCode += "\n" + Line;
FragmentShaderStream.close();
}
GLuint gluiVertexShaderId = glCreateShader(GL_VERTEX_SHADER);
char const * VertexSourcePointer = strVertexShaderCode.c_str();
glShaderSource(gluiVertexShaderId, 1, &VertexSourcePointer , NULL);
glCompileShader(gluiVertexShaderId);
GLint Result = GL_FALSE;
int InfoLogLength;
glGetShaderiv(gluiVertexShaderId, GL_COMPILE_STATUS, &Result);
glGetShaderiv(gluiVertexShaderId, GL_INFO_LOG_LENGTH, &InfoLogLength);
std::vector<char> VertexShaderErrorMessage(InfoLogLength);
glGetShaderInfoLog(gluiVertexShaderId, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
std::string strInfoLog = std::string(&VertexShaderErrorMessage[0]);
GLuint gluiFragmentShaderId = glCreateShader(GL_FRAGMENT_SHADER);
char const * FragmentSourcePointer = strFragmentShaderCode.c_str();
glShaderSource(gluiFragmentShaderId, 1, &FragmentSourcePointer , NULL);
glCompileShader(gluiFragmentShaderId);
Result = GL_FALSE;
glGetShaderiv(gluiFragmentShaderId, GL_COMPILE_STATUS, &Result);
glGetShaderiv(gluiFragmentShaderId, GL_INFO_LOG_LENGTH, &InfoLogLength);
std::vector<char> FragmentShaderErrorMessage(InfoLogLength);
glGetShaderInfoLog(gluiFragmentShaderId, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
strInfoLog = std::string(&FragmentShaderErrorMessage[0]);
GLuint gluiProgramId = glCreateProgram();
glAttachShader(gluiProgramId, gluiVertexShaderId);
glAttachShader(gluiProgramId, gluiFragmentShaderId);
glLinkProgram(gluiProgramId);
Result = GL_FALSE;
glGetProgramiv(gluiProgramId, GL_LINK_STATUS, &Result);
glGetProgramiv(gluiProgramId, GL_INFO_LOG_LENGTH, &InfoLogLength);
std::vector<char> ProgramErrorMessage( std::max(InfoLogLength, int(1)) );
glGetProgramInfoLog(gluiProgramId, InfoLogLength, NULL, &ProgramErrorMessage[0]);
strInfoLog = std::string(&ProgramErrorMessage[0]);
glDeleteShader(gluiVertexShaderId);
glDeleteShader(gluiFragmentShaderId);
return gluiProgramId;
}
void addQuadToLocalVerticeArray(Vertex * ptrVertexArrayLocal, Quad *quad, int *ptrQuadVerticeCounter)
{
//Links oben
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 1.0f;
++(*ptrQuadVerticeCounter);
//Links unten
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y - quad->height;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 0.0f;
++(*ptrQuadVerticeCounter);
//Rechts unten
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x + quad->width;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y - quad->height;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 1.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 0.0f;
++(*ptrQuadVerticeCounter);
//Rechts unten
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x + quad->width;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y - quad->height;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 1.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 0.0f;
++(*ptrQuadVerticeCounter);
//Rechts oben
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x + quad->width;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 1.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 1.0f;
++(*ptrQuadVerticeCounter);
//Links oben
ptrVertexArrayLocal[*ptrQuadVerticeCounter].x = quad->x;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].y = quad->y;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].z = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].tx = 0.0f;
ptrVertexArrayLocal[*ptrQuadVerticeCounter].ty = 1.0f;
++(*ptrQuadVerticeCounter);
}
I identified a huge time-killer now. The textures I was using were too large, and the resolution was very unefficient.
The main-texture which included the level sprites had a resolution of 2200x2200 Pixels. So the GPU increased the size to 4096x4096 and calculated it with a huge amount of data.
The image contains 10x10 different Level-Tiles which are outputed on the screen with a resolution of 50x50 pixels each.
So I saved the Tiles-File with a lower resolution (1020 x 1020 Pixels -> each tile=102x102px) and now I have a loop-cycle time of <=15ms.
This isn't perfect, but in comparison with my previous 30-60ms it was a huge progress.
Related
I noticed that my 3D engine runs very slow on AMD hardware. After some investigation the slow code boiled down to creating FBO with several attachments and writing to any nonzero attachment. In all tests I compared AMD performance with the same AMD GPU, but writing to unaffected GL_COLOR_ATTACHMENT0, and with Nvidia hardware whose performance difference to my AMD device is well known.
Writing fragments to nonzero attachments is 2-3 times slower than expected.
This code is equivalent to how I create a framebuffer and measure performance in my test apps:
// Create a framebuffer
static const auto attachmentCount = 6;
GLuint fb, att[attachmentCount];
glGenTextures(attachmentCount, att);
glGenFramebuffers(1, &fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fb);
for (auto i = 0; i < attachmentCount; ++i) {
glBindTexture(GL_TEXTURE_2D, att[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, GL_TEXTURE_2D, att[i], 0);
}
GLuint dbs[] = {
GL_NONE,
GL_COLOR_ATTACHMENT1,
GL_NONE,
GL_NONE,
GL_NONE,
GL_NONE};
glDrawBuffers(attachmentCount, dbs);
// Main loop
while (shouldWork) {
glClear(GL_COLOR_BUFFER_BIT);
for (int i = 0; i < 100; ++i) glDrawArrays(GL_TRIANGLES, 0, 6);
glfwSwapBuffers(window);
glfwPollEvents();
showFps();
}
Is anything wrong with it?
Fully reproducible minimal tests can be found here. I tried many other writing patterns or OpenGL states and described some of them in AMD Community.
I suppose the problem is in AMD's OpenGL driver, but if it's not, or you faced the same problem and found a workaround (a vendor extension?), please share.
UPD: moving problem detail here.
I prepared a minimal test pack, where the application creates an FBO with six RGBA UNSIGNED_BYTE attachments and renders 100 fullscreen rects per frame to it. There are four executables with four patterns of writing:
Writing shader output 0 to attachment 0. Only output 0 is routed to the framebuffer with glDrawBuffers. All other outputs are set to GL_NONE.
Same as 1, but with output and attachment 1.
Writing output 0 to attachment 0, but all six shader outputs are routed to attachments 0..6 respectively, and all drawbuffers except 0 are masked with glColorMaski.
Same as 3, but for attachment 1.
I run all tests on two machines with almost similar CPUs and following GPUs:
AMD Radeon RX550, driver version 19.30.01.16
Nvidia Geforce GTX 650 Ti, which is ~2x less powerful than RX550
and got these results:
Geforce GTX 650 Ti:
attachment0: 195 FPS
attachment1: 195 FPS
attachment0 masked: 195 FPS
attachment1 masked: 235 FPS
Radeon RX550:
attachment0: 350 FPS
attachment1: 185 FPS
attachment0 masked: 330 FPS
attachment1 masked: 175 FPS
Pre-built test executables are attached to the post or can be downloaded from Google drive.
Test sources (with MSVS-friendly cmake buildsystem) are available here on Github
All four programs show a black window and console with FPS counter.
We see that when writing to nonzero attachment, AMD is much slower than less powerful nvidia GPU and than itself. Also global masking of drawbuffer output drops some fps.
I also tried to use renderbuffers instead of textures, use other image formats (while the formats in tests are the most compatible ones), render to power-of-two sized framebuffer. Results were the same.
Explicitly turning off scissor, stencil and depth tests does not help.
If I decrease number of attachments or reduce framebuffer coverage by multiplying vertex coords by less then 1 value, test performance increases proportionally, and finally RX550 outperforms GTX 650 Ti.
glClear calls are also affected, and their performance under various conditions fits the above observations.
My teammate launched tests on Radeon HD 3000 with Linux natively and using Wine. Both test runs exposed the same huge difference between attachment0 and attachment1 tests. I can't tell exactly what is his driver version, but it's provided by Ubuntu 19.04 repos.
Another teammate tried the tests on Radeon RX590 and got the same 2 times difference.
Finally, let me copy-paste two almost equivalent test examples here. This one works fast:
#include <iostream>
#include <cassert>
#include <string>
#include <sstream>
#include <chrono>
#include "GL/glew.h"
#include "GLFW/glfw3.h"
#include <vector>
static std::string getErrorDescr(const GLenum errCode)
{
// English descriptions are from
// https://www.opengl.org/sdk/docs/man/docbook4/xhtml/glGetError.xml
switch (errCode) {
case GL_NO_ERROR: return "No error has been recorded. THIS message is the error itself.";
case GL_INVALID_ENUM: return "An unacceptable value is specified for an enumerated argument.";
case GL_INVALID_VALUE: return "A numeric argument is out of range.";
case GL_INVALID_OPERATION: return "The specified operation is not allowed in the current state.";
case GL_INVALID_FRAMEBUFFER_OPERATION: return "The framebuffer object is not complete.";
case GL_OUT_OF_MEMORY: return "There is not enough memory left to execute the command.";
case GL_STACK_UNDERFLOW: return "An attempt has been made to perform an operation that would cause an internal stack to underflow.";
case GL_STACK_OVERFLOW: return "An attempt has been made to perform an operation that would cause an internal stack to overflow.";
default:;
}
return "No description available.";
}
static std::string getErrorMessage()
{
const GLenum error = glGetError();
if (GL_NO_ERROR == error) return "";
std::stringstream ss;
ss << "OpenGL error: " << static_cast<int>(error) << std::endl;
ss << "Error string: ";
ss << getErrorDescr(error);
ss << std::endl;
return ss.str();
}
[[maybe_unused]] static bool error()
{
const auto message = getErrorMessage();
if (message.length() == 0) return false;
std::cerr << message;
return true;
}
static bool compileShader(const GLuint shader, const std::string& source)
{
unsigned int linesCount = 0;
for (const auto c: source) linesCount += static_cast<unsigned int>(c == '\n');
const char** sourceLines = new const char*[linesCount];
int* lengths = new int[linesCount];
int idx = 0;
const char* lineStart = source.data();
int lineLength = 1;
const auto len = source.length();
for (unsigned int i = 0; i < len; ++i) {
if (source[i] == '\n') {
sourceLines[idx] = lineStart;
lengths[idx] = lineLength;
lineLength = 1;
lineStart = source.data() + i + 1;
++idx;
}
else ++lineLength;
}
glShaderSource(shader, linesCount, sourceLines, lengths);
glCompileShader(shader);
GLint logLength;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) {
auto* const log = new GLchar[logLength + 1];
glGetShaderInfoLog(shader, logLength, nullptr, log);
std::cout << "Log: " << std::endl;
std::cout << log;
delete[] log;
}
GLint compileStatus;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compileStatus);
delete[] sourceLines;
delete[] lengths;
return bool(compileStatus);
}
static GLuint createProgram(const std::string& vertSource, const std::string& fragSource)
{
const auto vs = glCreateShader(GL_VERTEX_SHADER);
if (vs == 0) {
std::cerr << "Error: vertex shader is 0." << std::endl;
return 2;
}
const auto fs = glCreateShader(GL_FRAGMENT_SHADER);
if (fs == 0) {
std::cerr << "Error: fragment shader is 0." << std::endl;
return 2;
}
// Compile shaders
if (!compileShader(vs, vertSource)) {
std::cerr << "Error: could not compile vertex shader." << std::endl;
return 5;
}
if (!compileShader(fs, fragSource)) {
std::cerr << "Error: could not compile fragment shader." << std::endl;
return 5;
}
// Link program
const auto program = glCreateProgram();
if (program == 0) {
std::cerr << "Error: program is 0." << std::endl;
return 2;
}
glAttachShader(program, vs);
glAttachShader(program, fs);
glLinkProgram(program);
// Get log
GLint logLength = 0;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) {
auto* const log = new GLchar[logLength + 1];
glGetProgramInfoLog(program, logLength, nullptr, log);
std::cout << "Log: " << std::endl;
std::cout << log;
delete[] log;
}
GLint linkStatus = 0;
glGetProgramiv(program, GL_LINK_STATUS, &linkStatus);
if (!linkStatus) {
std::cerr << "Error: could not link." << std::endl;
return 2;
}
glDeleteShader(vs);
glDeleteShader(fs);
return program;
}
static const std::string vertSource = R"(
#version 330
layout(location = 0) in vec2 v;
void main()
{
gl_Position = vec4(v, 0.0, 1.0);
}
)";
static const std::string fragSource = R"(
#version 330
layout(location = 0) out vec4 outColor0;
void main()
{
outColor0 = vec4(0.5, 0.5, 0.5, 1.0);
}
)";
int main()
{
// Init
if (!glfwInit()) {
std::cerr << "Error: glfw init failed." << std::endl;
return 3;
}
static const int width = 800;
static const int height= 600;
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
GLFWwindow* window = nullptr;
window = glfwCreateWindow(width, height, "Shader test", nullptr, nullptr);
if (window == nullptr) {
std::cerr << "Error: window is null." << std::endl;
glfwTerminate();
return 1;
}
glfwMakeContextCurrent(window);
if (glewInit() != GLEW_OK) {
std::cerr << "Error: glew not OK." << std::endl;
glfwTerminate();
return 2;
}
// Shader program
const auto shaderProgram = createProgram(vertSource, fragSource);
glUseProgram(shaderProgram);
// Vertex buffer
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLuint buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
float bufferData[] = {
-1.0f, -1.0f,
1.0f, -1.0f,
1.0f, 1.0f,
-1.0f, -1.0f,
1.0f, 1.0f,
-1.0f, 1.0f
};
glBufferData(GL_ARRAY_BUFFER, std::size(bufferData) * sizeof(float), bufferData, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (GLvoid*)(0));
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
// Framebuffer
GLuint fb, att[6];
glGenTextures(6, att);
glGenFramebuffers(1, &fb);
glBindTexture(GL_TEXTURE_2D, att[0]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[1]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[2]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[3]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[4]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[5]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fb);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, att[0], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, att[1], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, att[2], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, att[3], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT4, GL_TEXTURE_2D, att[4], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT5, GL_TEXTURE_2D, att[5], 0);
GLuint dbs[] = {
GL_COLOR_ATTACHMENT0,
GL_NONE,
GL_NONE,
GL_NONE,
GL_NONE,
GL_NONE};
glDrawBuffers(6, dbs);
if (GL_FRAMEBUFFER_COMPLETE != glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER)) {
std::cerr << "Error: framebuffer is incomplete." << std::endl;
return 1;
}
if (error()) {
std::cerr << "OpenGL error occured." << std::endl;
return 2;
}
// Fpsmeter
static const uint32_t framesMax = 50;
uint32_t framesCount = 0;
auto start = std::chrono::steady_clock::now();
// Main loop
while (!glfwWindowShouldClose(window)) {
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) glfwSetWindowShouldClose(window, GLFW_TRUE);
glClear(GL_COLOR_BUFFER_BIT);
for (int i = 0; i < 100; ++i) glDrawArrays(GL_TRIANGLES, 0, 6);
glfwSwapBuffers(window);
glfwPollEvents();
if (++framesCount == framesMax) {
framesCount = 0;
const auto now = std::chrono::steady_clock::now();
const auto duration = now - start;
start = now;
const float secsPerFrame = (std::chrono::duration_cast<std::chrono::microseconds>(duration).count() / 1000000.0f) / framesMax;
std::cout << "FPS: " << 1.0f / secsPerFrame << std::endl;
}
}
// Shutdown
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(vao);
glUseProgram(0);
glDeleteProgram(shaderProgram);
glDeleteBuffers(1, &buffer);
glDeleteVertexArrays(1, &vao);
glDeleteFramebuffers(1, &fb);
glDeleteTextures(6, att);
glfwMakeContextCurrent(nullptr);
glfwDestroyWindow(window);
glfwTerminate();
return 0;
}
And this one works equivalently fast on Nvidia and Intel GPUs, but 2-3 times slower than the first example on AMD GPUs:
#include <iostream>
#include <cassert>
#include <string>
#include <sstream>
#include <chrono>
#include "GL/glew.h"
#include "GLFW/glfw3.h"
#include <vector>
static std::string getErrorDescr(const GLenum errCode)
{
// English descriptions are from
// https://www.opengl.org/sdk/docs/man/docbook4/xhtml/glGetError.xml
switch (errCode) {
case GL_NO_ERROR: return "No error has been recorded. THIS message is the error itself.";
case GL_INVALID_ENUM: return "An unacceptable value is specified for an enumerated argument.";
case GL_INVALID_VALUE: return "A numeric argument is out of range.";
case GL_INVALID_OPERATION: return "The specified operation is not allowed in the current state.";
case GL_INVALID_FRAMEBUFFER_OPERATION: return "The framebuffer object is not complete.";
case GL_OUT_OF_MEMORY: return "There is not enough memory left to execute the command.";
case GL_STACK_UNDERFLOW: return "An attempt has been made to perform an operation that would cause an internal stack to underflow.";
case GL_STACK_OVERFLOW: return "An attempt has been made to perform an operation that would cause an internal stack to overflow.";
default:;
}
return "No description available.";
}
static std::string getErrorMessage()
{
const GLenum error = glGetError();
if (GL_NO_ERROR == error) return "";
std::stringstream ss;
ss << "OpenGL error: " << static_cast<int>(error) << std::endl;
ss << "Error string: ";
ss << getErrorDescr(error);
ss << std::endl;
return ss.str();
}
[[maybe_unused]] static bool error()
{
const auto message = getErrorMessage();
if (message.length() == 0) return false;
std::cerr << message;
return true;
}
static bool compileShader(const GLuint shader, const std::string& source)
{
unsigned int linesCount = 0;
for (const auto c: source) linesCount += static_cast<unsigned int>(c == '\n');
const char** sourceLines = new const char*[linesCount];
int* lengths = new int[linesCount];
int idx = 0;
const char* lineStart = source.data();
int lineLength = 1;
const auto len = source.length();
for (unsigned int i = 0; i < len; ++i) {
if (source[i] == '\n') {
sourceLines[idx] = lineStart;
lengths[idx] = lineLength;
lineLength = 1;
lineStart = source.data() + i + 1;
++idx;
}
else ++lineLength;
}
glShaderSource(shader, linesCount, sourceLines, lengths);
glCompileShader(shader);
GLint logLength;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) {
auto* const log = new GLchar[logLength + 1];
glGetShaderInfoLog(shader, logLength, nullptr, log);
std::cout << "Log: " << std::endl;
std::cout << log;
delete[] log;
}
GLint compileStatus;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compileStatus);
delete[] sourceLines;
delete[] lengths;
return bool(compileStatus);
}
static GLuint createProgram(const std::string& vertSource, const std::string& fragSource)
{
const auto vs = glCreateShader(GL_VERTEX_SHADER);
if (vs == 0) {
std::cerr << "Error: vertex shader is 0." << std::endl;
return 2;
}
const auto fs = glCreateShader(GL_FRAGMENT_SHADER);
if (fs == 0) {
std::cerr << "Error: fragment shader is 0." << std::endl;
return 2;
}
// Compile shaders
if (!compileShader(vs, vertSource)) {
std::cerr << "Error: could not compile vertex shader." << std::endl;
return 5;
}
if (!compileShader(fs, fragSource)) {
std::cerr << "Error: could not compile fragment shader." << std::endl;
return 5;
}
// Link program
const auto program = glCreateProgram();
if (program == 0) {
std::cerr << "Error: program is 0." << std::endl;
return 2;
}
glAttachShader(program, vs);
glAttachShader(program, fs);
glLinkProgram(program);
// Get log
GLint logLength = 0;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
if (logLength > 0) {
auto* const log = new GLchar[logLength + 1];
glGetProgramInfoLog(program, logLength, nullptr, log);
std::cout << "Log: " << std::endl;
std::cout << log;
delete[] log;
}
GLint linkStatus = 0;
glGetProgramiv(program, GL_LINK_STATUS, &linkStatus);
if (!linkStatus) {
std::cerr << "Error: could not link." << std::endl;
return 2;
}
glDeleteShader(vs);
glDeleteShader(fs);
return program;
}
static const std::string vertSource = R"(
#version 330
layout(location = 0) in vec2 v;
void main()
{
gl_Position = vec4(v, 0.0, 1.0);
}
)";
static const std::string fragSource = R"(
#version 330
layout(location = 1) out vec4 outColor1;
void main()
{
outColor1 = vec4(0.5, 0.5, 0.5, 1.0);
}
)";
int main()
{
// Init
if (!glfwInit()) {
std::cerr << "Error: glfw init failed." << std::endl;
return 3;
}
static const int width = 800;
static const int height= 600;
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
GLFWwindow* window = nullptr;
window = glfwCreateWindow(width, height, "Shader test", nullptr, nullptr);
if (window == nullptr) {
std::cerr << "Error: window is null." << std::endl;
glfwTerminate();
return 1;
}
glfwMakeContextCurrent(window);
if (glewInit() != GLEW_OK) {
std::cerr << "Error: glew not OK." << std::endl;
glfwTerminate();
return 2;
}
// Shader program
const auto shaderProgram = createProgram(vertSource, fragSource);
glUseProgram(shaderProgram);
// Vertex buffer
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLuint buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
float bufferData[] = {
-1.0f, -1.0f,
1.0f, -1.0f,
1.0f, 1.0f,
-1.0f, -1.0f,
1.0f, 1.0f,
-1.0f, 1.0f
};
glBufferData(GL_ARRAY_BUFFER, std::size(bufferData) * sizeof(float), bufferData, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (GLvoid*)(0));
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
// Framebuffer
GLuint fb, att[6];
glGenTextures(6, att);
glGenFramebuffers(1, &fb);
glBindTexture(GL_TEXTURE_2D, att[0]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[1]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[2]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[3]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[4]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, att[5]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fb);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, att[0], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, att[1], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_2D, att[2], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, att[3], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT4, GL_TEXTURE_2D, att[4], 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT5, GL_TEXTURE_2D, att[5], 0);
GLuint dbs[] = {
GL_NONE,
GL_COLOR_ATTACHMENT1,
GL_NONE,
GL_NONE,
GL_NONE,
GL_NONE};
glDrawBuffers(6, dbs);
if (GL_FRAMEBUFFER_COMPLETE != glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER)) {
std::cerr << "Error: framebuffer is incomplete." << std::endl;
return 1;
}
if (error()) {
std::cerr << "OpenGL error occured." << std::endl;
return 2;
}
// Fpsmeter
static const uint32_t framesMax = 50;
uint32_t framesCount = 0;
auto start = std::chrono::steady_clock::now();
// Main loop
while (!glfwWindowShouldClose(window)) {
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) glfwSetWindowShouldClose(window, GLFW_TRUE);
glClear(GL_COLOR_BUFFER_BIT);
for (int i = 0; i < 100; ++i) glDrawArrays(GL_TRIANGLES, 0, 6);
glfwSwapBuffers(window);
glfwPollEvents();
if (++framesCount == framesMax) {
framesCount = 0;
const auto now = std::chrono::steady_clock::now();
const auto duration = now - start;
start = now;
const float secsPerFrame = (std::chrono::duration_cast<std::chrono::microseconds>(duration).count() / 1000000.0f) / framesMax;
std::cout << "FPS: " << 1.0f / secsPerFrame << std::endl;
}
}
// Shutdown
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(vao);
glUseProgram(0);
glDeleteProgram(shaderProgram);
glDeleteBuffers(1, &buffer);
glDeleteVertexArrays(1, &vao);
glDeleteFramebuffers(1, &fb);
glDeleteTextures(6, att);
glfwMakeContextCurrent(nullptr);
glfwDestroyWindow(window);
glfwTerminate();
return 0;
}
The only difference between these examples is the color attachment used.
I composed two almost similar copy-pasted programs on purpose to avoid possible nasty effects of framebuffer deletion and recreation.
UPD2: Also tried OpenGL 4.6 debug context on my test examples on both Nvidia and AMD. Got no performance warnings.
UPD3: RX470 results:
attachment0: 775 FPS
attachment1: 396 FPS
UPD4: I built attachment0 and attachment1 tests for webgl via emscripten and ran them on Radeon RX550. Full source is in problem's Github repo, build command lines are
emcc --std=c++17 -O3 -s WASM=1 -s USE_GLFW=3 -s USE_WEBGL2=1 ./FillRate_attachment0_webgl.cpp -o attachment0.html
emcc --std=c++17 -O3 -s WASM=1 -s USE_GLFW=3 -s USE_WEBGL2=1 ./FillRate_attachment1_webgl.cpp -o attachment1.html
Both test programs issue a single drawcall: glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 1000);
First test: Firefox with default config, i.e. DirectX-backed ANGLE.
Unmasked Vendor: Google Inc.
Unmasked Renderer: ANGLE (Radeon RX550/550 Series Direct3D11 vs_5_0 ps_5_0)
attachment0: 38 FPS
attachment1: 38 FPS
Second test: Firefox with disabled ANGLE, (about:config -> webgl.disable-angle = true), using native OpenGL:
Unmasked Vendor: ATI Technologies Inc.
Unmasked Renderer: Radeon RX550/550 Series
attachment0: 38 FPS
attachment1: 19 FPS
We see that DirectX is not affected by the problem, and OpenGL issue is reproducible in WebGL. It's an expected result, as gamers and developers complained only about OpenGL performance.
P.S. Probably my issue is the root of this and this performance drops.
The problem is fixed by AMD since (at least) December 2019 driver. The fix is confirmed by abovementioned test programs and our game engine FPS rate.
See also this thread.
Dear AMD OpenGL driver team, thank you very much!
The following code compiles and runs without errors on linux but gives error
"Error validating program: 'Validation Failed: No vertex array object bound."
on mac OS 10.14.2 (Mojave). Note that the program compiles successfully but has a problem during runtime.
MacBook Pro (Retina, 15-inch, Mid 2015)
I am compiling using g++ -std=c++11 test.cpp -w -framework OpenGL -lglfw -lGLEW -o p
test.cpp
#include <bits/stdc++.h>
#include <GL/glew.h>
#include <GLFW/glfw3.h>
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
using namespace std;
#define cout(a) cout<<a<<endl
// IDs
GLuint VAO, VBO, VAO2, VBO2, shaderID, uniformModel;
float scale = 1.0, x = 0.0, y = 0.0;
const int numPoints = 50000;
const char* vShader = "shader.vert";
const char* fShader = "shader.frag";
void createSierpinskiGasket()
{
GLfloat points[3 * numPoints];
GLfloat vertices[] = {
-1.0f, -1.0f, 0.0f,
0.0f, 1.0f, 0.0f,
1.0f, -1.0f, 0.0f
};
points[0] = 0.25f; points[1] = 0.50f; points[2] = 0.0f;
for(int i = 3; i < numPoints * 3; i += 3)
{
int j = rand() % 3;
points[i] = (points[i - 3] + vertices[j * 3]) / 2.0;
points[i + 1] = (points[i - 2] + vertices[j * 3 + 1]) / 2.0;
points[i + 2] = (points[i - 1] + vertices[j * 3 + 2]) / 2.0;
}
glGenVertexArrays(1, &VAO2);
glBindVertexArray(VAO2);
glGenBuffers(1, &VBO2);
glBindBuffer(GL_ARRAY_BUFFER, VBO2);
glBufferData(GL_ARRAY_BUFFER, sizeof(points), points, GL_STATIC_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
}
void createTriangle()
{
GLfloat vertices[] = {
-1.0f, -1.0f, 0.0f,
1.0f, -1.0f, 0.0f,
0.0f, 1.0f, 0.0f
};
glGenVertexArrays(1, &VAO);
// Subsequent code will be associated with this VAO
glBindVertexArray(VAO);
glGenBuffers(1, &VBO);
// GL_ARRAY_BUFFER = Vertex data
glBindBuffer(GL_ARRAY_BUFFER, VBO);
// GL_STATIC_DRAW = Not going to change the data (transforms are OK)
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
// Location, number, type, normalize, stride, offset
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0);
// Enable location 0
glEnableVertexAttribArray(0);
// Unbinding
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
}
void addShader(const char* shaderIDCode, GLenum shaderIDType)
{
GLuint theShader = glCreateShader(shaderIDType);
const GLchar* theCode[1];
theCode[0] = shaderIDCode;
GLint codeLength[1];
codeLength[0] = strlen(shaderIDCode);
glShaderSource(theShader, 1, theCode, codeLength);
glCompileShader(theShader);
GLint result = 0;
GLchar eLog[1024] = { 0 };
glGetShaderiv(theShader, GL_COMPILE_STATUS, &result);
if (!result)
{
glGetShaderInfoLog(theShader, sizeof(eLog), NULL, eLog);
printf("Error compiling the %d shaderID: '%s'\n", shaderIDType, eLog);
return;
}
glAttachShader(shaderID, theShader);
}
void compileShader(const char* vertexCode, const char* fragmentCode)
{
// Creating shaderID program
shaderID = glCreateProgram();
if(!shaderID)
{
cout("Error creating shaderID.");
return;
}
addShader(vertexCode, GL_VERTEX_SHADER);
addShader(fragmentCode, GL_FRAGMENT_SHADER);
GLint result = 0;
GLchar eLog[1024] = { 0 };
glLinkProgram(shaderID);
glGetProgramiv(shaderID, GL_LINK_STATUS, &result);
if (!result)
{
glGetProgramInfoLog(shaderID, sizeof(eLog), NULL, eLog);
printf("Error linking program: '%s'\n", eLog);
return;
}
glValidateProgram(shaderID);
glGetProgramiv(shaderID, GL_VALIDATE_STATUS, &result);
if (!result)
{
glGetProgramInfoLog(shaderID, sizeof(eLog), NULL, eLog);
printf("Error validating program: '%s'\n", eLog);
return;
}
}
string readFile(const char* fileLocation)
{
string content;
ifstream fileStream(fileLocation, ios::in);
if (!fileStream.is_open()) {
printf("Failed to read %s! File doesn't exist.", fileLocation);
return "";
}
string line = "";
while (!fileStream.eof())
{
getline(fileStream, line);
content.append(line + "\n");
}
fileStream.close();
return content;
}
void createShader(const char* vertexLocation, const char* fragmentLocation)
{
string vertexString = readFile(vertexLocation);
string fragmentString = readFile(fragmentLocation);
const char* vertexCode = vertexString.c_str();
const char* fragmentCode = fragmentString.c_str();
compileShader(vertexCode, fragmentCode);
}
void handleKeys(GLFWwindow* window, int key, int code, int action, int mode)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
{
glfwSetWindowShouldClose(window, GL_TRUE);
}
if (key == GLFW_KEY_EQUAL && action == GLFW_PRESS)
{
scale += 0.05;
}
if (key == GLFW_KEY_MINUS && action == GLFW_PRESS)
{
scale -= 0.05;
}
if (key == GLFW_KEY_LEFT && action == GLFW_PRESS)
{
x -= 0.05;
}
if (key == GLFW_KEY_RIGHT && action == GLFW_PRESS)
{
x += 0.05;
}
if (key == GLFW_KEY_UP && action == GLFW_PRESS)
{
y += 0.05;
}
if (key == GLFW_KEY_DOWN && action == GLFW_PRESS)
{
y -= 0.05;
}
}
int main(void)
{
const GLint WIDTH = 800, HEIGHT = 600;
// Initializing GLFW
if(!glfwInit())
{
cout("GLFW initialization failed.");
glfwTerminate();
return 1;
}
// Setup GLFW window properties
// OpenGL version
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
// Not backwards compatible
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
// Allow forward compatibility
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
GLFWwindow* mainWindow = glfwCreateWindow(WIDTH, HEIGHT, "Test Window", NULL, NULL);
if(!mainWindow)
{
cout("GLFW window creation failed.");
glfwTerminate();
return 1;
}
// Get buffer size information
int bufferWidth, bufferHeight;
glfwGetFramebufferSize(mainWindow, &bufferWidth, &bufferHeight);
// Set context for GLEW to use
glfwMakeContextCurrent(mainWindow);
// Allow modern extension features
glewExperimental = GL_TRUE;
if(glewInit() != GLEW_OK)
{
cout("GLEW initialization failed.");
glfwDestroyWindow(mainWindow);
glfwTerminate();
return 1;
}
// Setup viewport size
glViewport(0, 0, bufferWidth, bufferHeight);
createTriangle();
createShader(vShader, fShader);
createSierpinskiGasket();
uniformModel = glGetUniformLocation(shaderID, "model");
// Loop until window is closed
while(!glfwWindowShouldClose(mainWindow))
{
// Get and handle user input
glfwPollEvents();
glfwSetKeyCallback(mainWindow, handleKeys);
// Clear window
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
// Clear colour buffer before next frame
glClear(GL_COLOR_BUFFER_BIT);
glUseProgram(shaderID);
glm::mat4 model = glm::mat4();
model = glm::translate(model, glm::vec3(x, y, 0));
//model = glm::rotate(model, rotX * toRadians, glm::vec3(1, 0, 0));
//model = glm::rotate(model, rotY * toRadians, glm::vec3(0, 1, 0));
//model = glm::rotate(model, rotZ * toRadians, glm::vec3(0, 0, 1));
model = glm::scale(model, glm::vec3(scale, scale, scale));
glUniformMatrix4fv(uniformModel, 1, GL_FALSE, glm::value_ptr(model));
glBindVertexArray(VAO);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindVertexArray(0);
/*glBindVertexArray(VAO2);
glDrawArrays(GL_POINTS, 0, numPoints);
glBindVertexArray(0);*/
glUseProgram(0);
glfwSwapBuffers(mainWindow);
}
return 0;
}
shader.frag
#version 330
in vec4 vCol;
uniform mat4 model;
out vec4 color;
void main()
{
//color = vec4(1.0f, 1.0f, 0.0f, 1.0f);
color = vec4(vCol.x, vCol.y, 0.5, 1.0);
}
shader.vert
#version 330
layout (location = 0) in vec3 pos;
uniform mat4 model;
out vec4 vCol;
void main()
{
gl_Position = model * vec4(pos.x, pos.y, pos.z, 1.0f);
vCol = vec4(clamp(pos, 0.0f, 1.0f), 1.0f);
}
The message
Validation Failed: No vertex array object bound.
means that the validation of the program could not be performed, because no Vertex Array Object is bound, when glValidateProgram is called
See OpenGL 4.6 API Core Profile Specification; 11.1. VERTEX SHADERS; page 402
[...] As a development aid, use the command
void ValidateProgram( uint program );
to validate the program object program against the current GL state.
This means that the VAO which is should be drawn, by the shader program, has to be bound, before glValidateProgram is called.
Bind the "triangle" VAO, before the shader program is validated:
createTriangle();
glBindVertexArray(VAO);
createShader(vShader, fShader);
these days I am learning something about OpenGL in the website LearnOpenGL(https://learnopengl.com/Getting-started/Hello-Triangle),and i use Xcode to run the code.My Xcode's version is 10.0 (10A255).About several days before, after the command line tool is updated, some code can not be compiled.The error information is linker command failed with exit code 1 (use -v to see invocation).I have add the link library already
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include <iostream>
void framebuffer_size_callback(GLFWwindow* window, int width, int
height);
void processInput(GLFWwindow *window);
const unsigned int SCR_WIDTH = 800;
const unsigned int SCR_HEIGHT = 600;
const char *vertexShaderSource = "#version 330 core\n"
"layout (location = 0) in vec3 aPos;\n"
"void main()\n"
"{\n"
" gl_Position = vec4(aPos.x, aPos.y, aPos.z, 1.0);\n"
"}\0";
const char *fragmentShaderSource = "#version 330 core\n"
"out vec4 FragColor;\n"
"void main()\n"
"{\n"
" FragColor = vec4(1.0f, 0.5f, 0.2f, 1.0f);\n"
"}\n\0";
int main()
{
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
#ifdef __APPLE__
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // uncomment this statement to fix compilation on OS X
#endif
GLFWwindow* window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);
if (window == NULL)
{
std::cout << "Failed to create GLFW window" << std::endl;
glfwTerminate();
return -1;
}
glfwMakeContextCurrent(window);
glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);
if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
{
std::cout << "Failed to initialize GLAD" << std::endl;
return -1;
}
int vertexShader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertexShader, 1, &vertexShaderSource, NULL);
glCompileShader(vertexShader);
int success;
char infoLog[512];
glGetShaderiv(vertexShader, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(vertexShader, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::VERTEX::COMPILATION_FAILED\n" << infoLog << std::endl;
}
int fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragmentShader, 1, &fragmentShaderSource, NULL);
glCompileShader(fragmentShader);
glGetShaderiv(fragmentShader, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(fragmentShader, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::FRAGMENT::COMPILATION_FAILED\n" << infoLog << std::endl;
}
int shaderProgram = glCreateProgram();
glAttachShader(shaderProgram, vertexShader);
glAttachShader(shaderProgram, fragmentShader);
glLinkProgram(shaderProgram);
glGetProgramiv(shaderProgram, GL_LINK_STATUS, &success);
if (!success) {
glGetProgramInfoLog(shaderProgram, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::PROGRAM::LINKING_FAILED\n" << infoLog << std::endl;
}
glDeleteShader(vertexShader);
glDeleteShader(fragmentShader);
float vertices[] = {
-0.5f, -0.5f, 0.0f, // left
0.5f, -0.5f, 0.0f, // right
0.0f, 0.5f, 0.0f // top
};
unsigned int VBO, VAO;
glGenVertexArrays(1, &VAO);
glGenBuffers(1, &VBO);
glBindVertexArray(VAO);
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
while (!glfwWindowShouldClose(window))
{
processInput(window);
glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
// draw our first triangle
glUseProgram(shaderProgram);
glBindVertexArray(VAO);
glDrawArrays(GL_TRIANGLES, 0, 3);
glfwSwapBuffers(window);
glfwPollEvents();
}
glDeleteVertexArrays(1, &VAO);
glDeleteBuffers(1, &VBO);
glfwTerminate();
return 0;
}
void processInput(GLFWwindow *window)
{
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
glfwSetWindowShouldClose(window, true);
}
void framebuffer_size_callback(GLFWwindow* window, int width, int height)
{
glViewport(0, 0, width, height);
}
I'm writing a movie player that use FFMPEG and OpenGL ES. Movie can be decoded succeessfully, but when I use AVFrame as texture to draw in my screen, I found it was so fuzzy. I don't know where wrong in my code. If I change the AVFrame from YUV to RGB image, it will be clear.
Does any one know why use YUV as texture to draw will be not clear?
My render code:
#import "SJGLView.h"
#import <GLKit/GLKit.h>
#import "SJDecoder.h"
#include "libavutil/pixfmt.h"
// MARK: - C Function
static void sj_logShaderError(GLuint shader) {
GLint info_len = 0;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &info_len);
if (info_len == 0) NSLog(#"Empty info");
else {
GLchar *log = (GLchar *)malloc(info_len);
glGetShaderInfoLog(shader, info_len, &info_len, log);
NSLog(#"Shader compile log: %s", log);
}
}
static void sj_logProgramError(GLuint program) {
int info_length;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &info_length);
if (info_length) {
GLchar *log = (GLchar *)malloc(info_length);
glGetProgramInfoLog(program, info_length, &info_length, log);
NSLog(#"Program link log: %s", log);
}
}
GLuint sj_loadShader(GLenum shader_type, const char* shader_source) {
GLuint shader = glCreateShader(shader_type);
glShaderSource(shader, 1, &shader_source, NULL);
glCompileShader(shader);
GLint compile_status = 0;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
if (!compile_status) goto fail;
return shader;
fail:
if (shader) {
sj_logShaderError(shader);
glDeleteShader(shader);
}
return 0;
}
void loadOrtho(float *matrix, float left, float right, float bottom, float top, float near, float far) {
float r_l = right - left;
float t_b = top - bottom;
float f_n = far - near;
float tx = (right + left)/(right - left);
float ty = (top + bottom)/(top - bottom);
float tz = (far + near)/(far - near);
matrix[0] = 2.0f / r_l;
matrix[1] = 0.0f;
matrix[2] = 0.0f;
matrix[3] = 0.0f;
matrix[4] = 0.0f;
matrix[5] = 2.0f / t_b;
matrix[6] = 0.0f;
matrix[7] = 0.0f;
matrix[8] = 0.0f;
matrix[9] = 0.0f;
matrix[10] = -2.0f / f_n;
matrix[11] = 0.0f;
matrix[12] = tx;
matrix[13] = ty;
matrix[14] = tz;
matrix[15] = 1.0f;
}
// BT.709, standard for HDTV
static const GLfloat g_bt709[] = {
1.164, 1.164, 1.164,
0.0, -0.213, 2.112,
1.793, -0.533, 0.0,
};
const GLfloat *getColorMatrix_bt709() {
return g_bt709;
}
enum {
ATTRIBUTE_VERTEX,
ATTRIBUTE_TEXCOORD,
};
#implementation SJGLView {
EAGLContext *_context;
GLuint _framebuffer;
GLuint _renderbuffer;
GLint _backingWidth;
GLint _backingHeight;
GLfloat _vertices[8];
GLuint _program;
GLuint _av4Position;
GLuint _av2Texcoord;
GLuint _um4Mvp;
GLfloat _texcoords[8];
GLuint _us2Sampler[3];
GLuint _um3ColorConversion;
GLuint _textures[3];
SJDecoder *_decoder;
}
+ (Class)layerClass {
return [CAEAGLLayer class];
}
- (instancetype)initWithFrame:(CGRect)frame decoder:(SJDecoder *)decoder {
self = [super initWithFrame:frame];
if (self) {
_decoder = decoder;
[self setupGL];
}
return self;
}
- (void)layoutSubviews {
glBindRenderbuffer(GL_RENDERBUFFER, _renderbuffer);
[_context renderbufferStorage:GL_RENDERBUFFER fromDrawable:(CAEAGLLayer*)self.layer];
glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_WIDTH, &_backingWidth);
glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_HEIGHT, &_backingHeight);
[self updateVertices];
[self render: nil];
}
- (void)setContentMode:(UIViewContentMode)contentMode
{
[super setContentMode:contentMode];
[self updateVertices];
[self render:nil];
}
- (void)setupGL {
_context = [[EAGLContext alloc] initWithAPI:kEAGLRenderingAPIOpenGLES2];
NSAssert(_context != nil, #"Failed to init EAGLContext");
CAEAGLLayer *eaglLayer= (CAEAGLLayer *)self.layer;
eaglLayer.opaque = YES;
eaglLayer.drawableProperties = #{
kEAGLDrawablePropertyRetainedBacking: [NSNumber numberWithBool:YES],
kEAGLDrawablePropertyColorFormat: kEAGLColorFormatRGBA8
};
[EAGLContext setCurrentContext:_context];
if ([self setupEAGLContext]) {
NSLog(#"Success to setup EAGLContext");
if ([self loadShaders]) {
NSLog(#"Success to load shader");
_us2Sampler[0] = glGetUniformLocation(_program, "us2_SamplerX");
_us2Sampler[1] = glGetUniformLocation(_program, "us2_SamplerY");
_us2Sampler[2] = glGetUniformLocation(_program, "us2_SamplerZ");
_um3ColorConversion = glGetUniformLocation(_program, "um3_ColorConversion");
}
}
}
- (BOOL)setupEAGLContext {
glGenFramebuffers(1, &_framebuffer);
glGenRenderbuffers(1, &_renderbuffer);
glBindFramebuffer(GL_FRAMEBUFFER, _framebuffer);
glBindRenderbuffer(GL_RENDERBUFFER, _renderbuffer);
[_context renderbufferStorage:GL_RENDERBUFFER fromDrawable:(CAEAGLLayer *)self.layer];
glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_WIDTH, &_backingWidth);
glGetRenderbufferParameteriv(GL_RENDERBUFFER, GL_RENDERBUFFER_HEIGHT, &_backingHeight);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, _renderbuffer);
GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if (status != GL_FRAMEBUFFER_COMPLETE) {
NSLog(#"Failed to make complete framebuffer object: %x", status);
return NO;
}
GLenum glError = glGetError();
if (glError != GL_NO_ERROR) {
NSLog(#"Failed to setup EAGLContext: %x", glError);
return NO;
}
return YES;
}
- (BOOL)loadShaders {
NSString *vertexPath = [[NSBundle mainBundle] pathForResource:#"vertex" ofType:#"vsh"];
const char *vertexString = [[NSString stringWithContentsOfFile:vertexPath encoding:NSUTF8StringEncoding error:nil] UTF8String];
NSString *fragmentPath = _decoder.format == SJVideoFrameFormatYUV ? [[NSBundle mainBundle] pathForResource:#"yuv420p" ofType:#"fsh"] :
[[NSBundle mainBundle] pathForResource:#"rgb" ofType:#"fsh"];
const char *fragmentString = [[NSString stringWithContentsOfFile:fragmentPath encoding:NSUTF8StringEncoding error:nil] UTF8String];
GLuint vertexShader = sj_loadShader(GL_VERTEX_SHADER, vertexString);
GLuint fragmentShader = sj_loadShader(GL_FRAGMENT_SHADER, fragmentString);
_program = glCreateProgram();
glAttachShader(_program, vertexShader);
glAttachShader(_program, fragmentShader);
glLinkProgram(_program);
GLint link_status = GL_FALSE;
glGetProgramiv(_program, GL_LINK_STATUS, &link_status);
if(!link_status) goto fail;
_av4Position = glGetAttribLocation(_program, "av4_Position");
_av2Texcoord = glGetAttribLocation(_program, "av2_Texcoord");
_um4Mvp = glGetUniformLocation(_program, "um4_ModelViewProjection");
return YES;
fail:
sj_logProgramError(_program);
glDeleteShader(vertexShader);
glDeleteShader(fragmentShader);
glDeleteProgram(_program);
return NO;
}
- (void)useRenderer {
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glUseProgram(_program);
if (0 == _textures[0]) glGenTextures(3, _textures);
for (int i = 0; i < 3; i++) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, _textures[i]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(_us2Sampler[i], i);
}
glUniformMatrix3fv(_um3ColorConversion, 1, GL_FALSE, getColorMatrix_bt709());
}
- (void)uploadTexture:(SJVideoFrame *)frame {
if (frame.format == SJVideoFrameFormatYUV) {
SJVideoYUVFrame *yuvFrame = (SJVideoYUVFrame *)frame;
const GLubyte *pixel[3] = { yuvFrame.luma.bytes, yuvFrame.chromaB.bytes, yuvFrame.chromaR.bytes };
const GLsizei widths[3] = { yuvFrame.width, yuvFrame.width/2, yuvFrame.width/2 };
const GLsizei heights[3] = { yuvFrame.height, yuvFrame.height/2, yuvFrame.height/2 };
for (int i = 0; i < 3; i++) {
glBindTexture(GL_TEXTURE_2D, _textures[i]);
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, widths[i], heights[i], 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, pixel[i]);
}
}
}
- (void)render:(SJVideoFrame *)frame {
[EAGLContext setCurrentContext:_context];
glUseProgram(_program);
[self useRenderer];
GLfloat modelviewProj[16];
loadOrtho(modelviewProj, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f);
glUniformMatrix4fv(_um4Mvp, 1, GL_FALSE, modelviewProj);
[self updateVertices];
[self updateTexcoords];
glBindFramebuffer(GL_FRAMEBUFFER, _framebuffer);
glViewport(0, 0, _backingWidth, _backingHeight);
[self uploadTexture:frame];
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindRenderbuffer(GL_RENDERBUFFER, _renderbuffer);
[_context presentRenderbuffer:GL_RENDERBUFFER];
}
- (void)updateVertices {
[self resetVertices];
BOOL fit = (self.contentMode == UIViewContentModeScaleAspectFit);
float width = _decoder.frameWidth;
float height = _decoder.frameHeight;
const float dW = (float)_backingWidth / width;
const float dH = (float)_backingHeight / height;
float dd = fit ? MIN(dH, dW) : MAX(dH, dW);
float nW = (width * dd / (float)_backingWidth);
float nH = (height * dd / (float)_backingHeight);
_vertices[0] = -nW;
_vertices[1] = -nH;
_vertices[2] = nW;
_vertices[3] = -nH;
_vertices[4] = -nW;
_vertices[5] = nH;
_vertices[6] = nW;
_vertices[7] = nH;
glVertexAttribPointer(_av4Position, 2, GL_FLOAT, GL_FALSE, 0, _vertices);
glEnableVertexAttribArray(_av4Position);
}
- (void)resetVertices {
_vertices[0] = -1.0f;
_vertices[1] = -1.0f;
_vertices[2] = 1.0f;
_vertices[3] = -1.0f;
_vertices[4] = -1.0f;
_vertices[5] = 1.0f;
_vertices[6] = 1.0f;
_vertices[7] = 1.0f;
}
- (void)updateTexcoords {
[self resetTexcoords];
glVertexAttribPointer(_av2Texcoord, 2, GL_FLOAT, GL_FALSE, 0, _texcoords);
glEnableVertexAttribArray(_av2Texcoord);
}
- (void)resetTexcoords {
_texcoords[0] = 0.0f;
_texcoords[1] = 1.0f;
_texcoords[2] = 1.0f;
_texcoords[3] = 1.0f;
_texcoords[4] = 0.0f;
_texcoords[5] = 0.0f;
_texcoords[6] = 1.0f;
_texcoords[7] = 0.0f;
}
.fsh :
precision highp float;
varying highp vec2 vv2_Texcoord;
uniform mat3 um3_ColorConversion;
uniform lowp sampler2D us2_SamplerX;
uniform lowp sampler2D us2_SamplerY;
uniform lowp sampler2D us2_SamplerZ;
void main() {
mediump vec3 yuv;
lowp vec3 rgb;
yuv.x = (texture2D(us2_SamplerX, vv2_Texcoord).r - (16.0/255.0));
yuv.y = (texture2D(us2_SamplerY, vv2_Texcoord).r - 0.5);
yuv.z = (texture2D(us2_SamplerZ, vv2_Texcoord).r - 0.5);
rgb = um3_ColorConversion * yuva;
gl_FragColor = vec4(rgb, 1.0);
}
.vsh file:
precision highp float;
varying highp vec2 vv2_Texcoord;
uniform lowp sampler2D us2_SamplerX;
void main() {
gl_FragColor = vec4(texture2D(us2_SamplerX, vv2_Texcoord).rgb, 1)
}
The rgb image:
RGB image
Update to add a GL_NEAREST image:
GL_NEAREST
I want to achieve egl offscreen rendering with pbuffer surface. but all I can read back is the background color. things I draw can't be seen.
for example, if I clear the screen with blue, the read back image via glReadPixel is just a blue-colored image, there is no other things.
I have really run out of ideas
#include <QCoreApplication>
#include <QDebug>
#include <QImage>
#include <GLES2/gl2.h>
#include <EGL/egl.h>
#include <QElapsedTimer>
GLuint LoadShader(const char *shaderSrc, GLenum type)
{
GLuint shader;
GLint compiled;
// Create the shader object
shader = glCreateShader(type);
if(shader == 0)
return 0;
// Load the shader source
glShaderSource(shader, 1, &shaderSrc, NULL);
// Compile the shader
glCompileShader(shader);
// Check the compile status
glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
if(!compiled)
{
GLint infoLen = 0;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen);
if(infoLen > 1)
{
char* infoLog = (char*)malloc(sizeof(char) * infoLen);
glGetShaderInfoLog(shader, infoLen, NULL, infoLog);
qDebug() << "Error compiling shader:" << infoLog;
free(infoLog);
}
glDeleteShader(shader);
return 0;
}
return shader;
}
int main(int argc, char *argv[])
{
GLuint renderBufferWidth = 1920;
GLuint renderBufferHeight = 1080;
EGLint ai32ContextAttribs[] = { EGL_CONTEXT_CLIENT_VERSION, 2,
EGL_NONE };
// Step 1 - Get the default display.
EGLDisplay eglDisplay = eglGetDisplay((EGLNativeDisplayType)0);
// Step 2 - Initialize EGL.
eglInitialize(eglDisplay, 0, 0);
// Step 3 - Make OpenGL ES the current API.
eglBindAPI(EGL_OPENGL_ES_API);
// Step 4 - Specify the required configuration attributes.
EGLint pi32ConfigAttribs[23];
pi32ConfigAttribs[0] = EGL_SURFACE_TYPE;
pi32ConfigAttribs[1] = EGL_PBUFFER_BIT;
pi32ConfigAttribs[2] = EGL_RENDERABLE_TYPE;
pi32ConfigAttribs[3] = EGL_OPENGL_ES2_BIT;
pi32ConfigAttribs[4] = EGL_CONFORMANT;
pi32ConfigAttribs[5] = EGL_OPENGL_ES2_BIT;
pi32ConfigAttribs[6] = EGL_COLOR_BUFFER_TYPE;
pi32ConfigAttribs[7] = EGL_RGB_BUFFER;
pi32ConfigAttribs[8] = EGL_LUMINANCE_SIZE;
pi32ConfigAttribs[9] = 0;
pi32ConfigAttribs[10] = EGL_RED_SIZE;
pi32ConfigAttribs[11] = 5;
pi32ConfigAttribs[12] = EGL_GREEN_SIZE;
pi32ConfigAttribs[13] = 6;
pi32ConfigAttribs[14] = EGL_BLUE_SIZE;
pi32ConfigAttribs[15] = 5;
pi32ConfigAttribs[16] = EGL_ALPHA_SIZE;
pi32ConfigAttribs[17] = 0;
pi32ConfigAttribs[18] = EGL_DEPTH_SIZE;
pi32ConfigAttribs[19] = 16;
pi32ConfigAttribs[20] = EGL_LEVEL;
pi32ConfigAttribs[21] = 0;
pi32ConfigAttribs[22] = EGL_NONE;
// Step 5 - Find a config that matches all requirements.
int iConfigs;
EGLConfig eglConfig;
eglChooseConfig(eglDisplay, pi32ConfigAttribs, &eglConfig, 1,
&iConfigs);
if (iConfigs != 1)
{
printf("Error: eglChooseConfig(): config not found.\n");
exit(-1);
}
EGLint pbufferAttribs[5];
pbufferAttribs[0] = EGL_WIDTH;
pbufferAttribs[1] = renderBufferWidth;
pbufferAttribs[2] = EGL_HEIGHT;
pbufferAttribs[3] = renderBufferHeight;
pbufferAttribs[4] = EGL_NONE;
// Step 6 - Create a surface to draw to.
EGLSurface eglSurface;
eglSurface = eglCreatePbufferSurface(eglDisplay, eglConfig, pbufferAttribs);
if (eglSurface == EGL_NO_SURFACE)
{
qDebug() << "surface error";
exit(1);
}
// Step 7 - Create a context.
EGLContext eglContext;
eglContext = eglCreateContext(eglDisplay, eglConfig, NULL,
ai32ContextAttribs);
if (eglContext == EGL_NO_CONTEXT)
{
qDebug() << "context error";
exit(1);
}
// Step 8 - Bind the context to the current thread
bool result = eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext);
if (!result)
{
qDebug() << "make current error" << eglGetError();
}
GLuint programObject;
{ //init
char* vShaderStr =
"attribute vec4 vPosition; \n"
"void main() \n"
"{ \n"
" gl_Position = vPosition; \n"
"} \n";
char* fShaderStr =
"precision mediump float; \n"
"void main() \n"
"{ \n"
" gl_FragColor = vec4(1.0, 0.0, 0.0, 1.0); \n"
"} \n";
GLuint vertexShader;
GLuint fragmentShader;
GLint linked;
// Load the vertex/fragment shaders
vertexShader = LoadShader(vShaderStr, GL_VERTEX_SHADER);
fragmentShader = LoadShader(fShaderStr, GL_FRAGMENT_SHADER);
// Create the program object
programObject = glCreateProgram();
if(programObject == 0)
return 0;
glAttachShader(programObject, vertexShader);
glAttachShader(programObject, fragmentShader);
// Bind vPosition to attribute 0
glBindAttribLocation(programObject, 0, "vPosition");
// Link the program
glLinkProgram(programObject);
// Check the link status
glGetProgramiv(programObject, GL_LINK_STATUS, &linked);
if(!linked)
{
GLint infoLen = 0;
glGetProgramiv(programObject, GL_INFO_LOG_LENGTH, &infoLen);
if(infoLen > 1)
{
char* infoLog = (char*)malloc(sizeof(char) * infoLen);
glGetProgramInfoLog(programObject, infoLen, NULL, infoLog);
qDebug() <<"Error linking program:" << infoLog;
free(infoLog);
}
glDeleteProgram(programObject);
}
glClearColor(0.0f, 0.0f, 1.0f, 1.0f);
}
{//draw
GLfloat vVertices[] = {0.0f, 1.5f, 0.0f,
-0.5f, -0.5f, 0.0f,
0.5f, -0.5f, 0.0f};
// Set the viewport
glViewport(0, 0, 1920, 1080);
// Clear the color buffer
glClear(GL_COLOR_BUFFER_BIT);
// Use the program object
glUseProgram(programObject);
// Load the vertex data
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, vVertices);
glEnableVertexAttribArray(0);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
int size = 4 * renderBufferHeight * renderBufferWidth;
unsigned char *data2 = new unsigned char[size];
eglSwapBuffers( eglDisplay, eglSurface);
glReadPixels(0,0,renderBufferWidth,renderBufferHeight,GL_RGBA, GL_UNSIGNED_BYTE, data2);
qDebug() << glGetError() << eglGetError();
QImage saveImage(data2, renderBufferWidth, renderBufferHeight, QImage::Format_RGBA8888_Premultiplied);
saveImage.save("haha.png");
QCoreApplication a(argc, argv);
qDebug() << "done";
return a.exec();
}
today, I ran the same program on a machine with NVIDIA graphics card and the proprietary driver installed.
it turned out that the code works very well.
this is likely a bug of intel driver.