Related
I'm trying to convert a HDR image float array I load to a 10-bit DWORD with WIC.
The type of the loading file is GUID_WICPixelFormat128bppPRGBAFloat and I got an array of 4 floats per color.
When I try to convert these to 10 bit as follows:
struct RGBX
{
unsigned int b : 10;
unsigned int g : 10;
unsigned int r : 10;
int a : 2;
} rgbx;
(which is the format requested by the NVIDIA encoding library for 10-bit rgb),
then I assume I have to divide each of the floats by 1024.0f in order to get them inside the 10 bits of a DWORD.
However, I notice that some of the floats are > 1, which means that their range is not [0,1] as it happens when the image is 8 bit.
What would their range be? How to store a floating point color into a 10-bits integer?
I'm trying to use the NVidia's HDR encoder which requires an ARGB10 like the above structure.
How is the 10 bit information of a color stored as a floating point number?
Btw I tried to convert with WIC but conversion from GUID_WICPixelFormat128bppPRGBAFloat to GUID_WICPixelFormat32bppR10G10B10A2 fails.
HRESULT ConvertFloatTo10(const float* f, int wi, int he, std::vector<DWORD>& out)
{
CComPtr<IWICBitmap> b;
wbfact->CreateBitmapFromMemory(wi, he, GUID_WICPixelFormat128bppPRGBAFloat, wi * 16, wi * he * 16, (BYTE*)f, &b);
CComPtr<IWICFormatConverter> wf;
wbfact->CreateFormatConverter(&wf);
wf->Initialize(b, GUID_WICPixelFormat32bppR10G10B10A2, WICBitmapDitherTypeNone, 0, 0, WICBitmapPaletteTypeCustom);
// This last call fails with 0x88982f50 : The component cannot be found.
}
Edit: I found a paper (https://hal.archives-ouvertes.fr/hal-01704278/document), is this relevant to this question?
Floating-point color content that is greater than the 0..1 range is High Dynamic Range (HDR) content. If you trivially convert it to 10:10:10:2 UNORM then you are using 'clipping' for values over 1. This doesn't give good results.
SDR 10:10:10 or 8:8:8
You should instead use tone-mapping which converts the HDR signal to a SDR (Standard Dynamic Range a.k.a. 0..1) before or as part of doing the conversion to 10:10:10:2.
There a many different approaches to tone-mapping, but a common 'generic' solution is the Reinhard tone-mapping operator. Here's an implementation using DirectXTex.
std::unique_ptr<ScratchImage> timage(new (std::nothrow) ScratchImage);
if (!timage)
{
wprintf(L"\nERROR: Memory allocation failed\n");
return 1;
}
// Compute max luminosity across all images
XMVECTOR maxLum = XMVectorZero();
hr = EvaluateImage(image->GetImages(), image->GetImageCount(), image->GetMetadata(),
[&](const XMVECTOR* pixels, size_t w, size_t y)
{
UNREFERENCED_PARAMETER(y);
for (size_t j = 0; j < w; ++j)
{
static const XMVECTORF32 s_luminance = { { { 0.3f, 0.59f, 0.11f, 0.f } } };
XMVECTOR v = *pixels++;
v = XMVector3Dot(v, s_luminance);
maxLum = XMVectorMax(v, maxLum);
}
});
if (FAILED(hr))
{
wprintf(L" FAILED [tonemap maxlum] (%08X%ls)\n", static_cast<unsigned int>(hr), GetErrorDesc(hr));
return 1;
}
maxLum = XMVectorMultiply(maxLum, maxLum);
hr = TransformImage(image->GetImages(), image->GetImageCount(), image->GetMetadata(),
[&](XMVECTOR* outPixels, const XMVECTOR* inPixels, size_t w, size_t y)
{
UNREFERENCED_PARAMETER(y);
for (size_t j = 0; j < w; ++j)
{
XMVECTOR value = inPixels[j];
const XMVECTOR scale = XMVectorDivide(
XMVectorAdd(g_XMOne, XMVectorDivide(value, maxLum)),
XMVectorAdd(g_XMOne, value));
const XMVECTOR nvalue = XMVectorMultiply(value, scale);
value = XMVectorSelect(value, nvalue, g_XMSelect1110);
outPixels[j] = value;
}
}, *timage);
if (FAILED(hr))
{
wprintf(L" FAILED [tonemap apply] (%08X%ls)\n", static_cast<unsigned int>(hr), GetErrorDesc(hr));
return 1;
}
HDR10
UPDATE: If you are trying to convert HDR floating-point content to an "HDR10" signal, then you need to do:
Color-space rotate from Rec.709 or P3D65 to Rec.2020.
Normalize for 'paper white' / 10,000 nits.
Apply the ST.2084 gamma curve.
Quantize to 10-bit.
// HDTV to UHDTV (Rec.709 color primaries into Rec.2020)
const XMMATRIX c_from709to2020 =
{
0.6274040f, 0.0690970f, 0.0163916f, 0.f,
0.3292820f, 0.9195400f, 0.0880132f, 0.f,
0.0433136f, 0.0113612f, 0.8955950f, 0.f,
0.f, 0.f, 0.f, 1.f
};
// DCI-P3-D65 https://en.wikipedia.org/wiki/DCI-P3 to UHDTV (DCI-P3-D65 color primaries into Rec.2020)
const XMMATRIX c_fromP3D65to2020 =
{
0.753845f, 0.0457456f, -0.00121055f, 0.f,
0.198593f, 0.941777f, 0.0176041f, 0.f,
0.047562f, 0.0124772f, 0.983607f, 0.f,
0.f, 0.f, 0.f, 1.f
};
// Custom Rec.709 into Rec.2020
const XMMATRIX c_fromExpanded709to2020 =
{
0.6274040f, 0.0457456f, -0.00121055f, 0.f,
0.3292820f, 0.941777f, 0.0176041f, 0.f,
0.0433136f, 0.0124772f, 0.983607f, 0.f,
0.f, 0.f, 0.f, 1.f
};
inline float LinearToST2084(float normalizedLinearValue)
{
const float ST2084 = pow((0.8359375f + 18.8515625f * pow(abs(normalizedLinearValue), 0.1593017578f)) / (1.0f + 18.6875f * pow(abs(normalizedLinearValue), 0.1593017578f)), 78.84375f);
return ST2084; // Don't clamp between [0..1], so we can still perform operations on scene values higher than 10,000 nits
}
// You can adjust this up to 10000.f
float paperWhiteNits = 200.f;
hr = TransformImage(image->GetImages(), image->GetImageCount(), image->GetMetadata(),
[&](XMVECTOR* outPixels, const XMVECTOR* inPixels, size_t w, size_t y)
{
UNREFERENCED_PARAMETER(y);
const XMVECTOR paperWhite = XMVectorReplicate(paperWhiteNits);
for (size_t j = 0; j < w; ++j)
{
XMVECTOR value = inPixels[j];
XMVECTOR nvalue = XMVector3Transform(value, c_from709to2020);
// Some people prefer the look of using c_fromP3D65to2020
// or c_fromExpanded709to2020 instead.
// Convert to ST.2084
nvalue = XMVectorDivide(XMVectorMultiply(nvalue, paperWhite), c_MaxNitsFor2084);
XMFLOAT4A tmp;
XMStoreFloat4A(&tmp, nvalue);
tmp.x = LinearToST2084(tmp.x);
tmp.y = LinearToST2084(tmp.y);
tmp.z = LinearToST2084(tmp.z);
nvalue = XMLoadFloat4A(&tmp);
value = XMVectorSelect(value, nvalue, g_XMSelect1110);
outPixels[j] = value;
}
}, *timage);
You should really take a look at texconv.
Reference
Reinhard et al., "Photographic tone reproduction for digital images", ACM Transactions on Graphics, Volume 21, Issue 3 (July 2002). ACM DL.
#ChuckWalbourn answer is helpful, however I don't want to tonemap to [0,1] as there is no point in tonemapping to SDR then going to 10-bit HDR.
What I 'd think it's correct is to scale to [0,4] instead by first using g_XMFour.
const XMVECTOR scale = XMVectorDivide(
XMVectorAdd(g_XMFour, XMVectorDivide(v, maxLum)),
XMVectorAdd(g_XMFour, v));
then using a specialized 10-bit store which scales by 255 instead of 1023:
void XMStoreUDecN4a(DirectX::PackedVector::XMUDECN4* pDestination,DirectX::FXMVECTOR V)
{
using namespace DirectX;
XMVECTOR N;
static const XMVECTOR Scale = { 255.0f, 255.0f, 255.0f, 3.0f };
assert(pDestination);
N = XMVectorClamp(V, XMVectorZero(), g_XMFour);
N = XMVectorMultiply(N, Scale);
pDestination->v = ((uint32_t)DirectX::XMVectorGetW(N) << 30) |
(((uint32_t)DirectX::XMVectorGetZ(N) & 0x3FF) << 20) |
(((uint32_t)DirectX::XMVectorGetY(N) & 0x3FF) << 10) |
(((uint32_t)DirectX::XMVectorGetX(N) & 0x3FF));
}
And then a specialized 10-bit load which divides with 255 instead of 1023:
DirectX::XMVECTOR XMLoadUDecN4a(DirectX::PackedVector::XMUDECN4* pSource)
{
using namespace DirectX;
fourx vectorOut;
uint32_t Element;
Element = pSource->v & 0x3FF;
vectorOut.r = (float)Element / 255.f;
Element = (pSource->v >> 10) & 0x3FF;
vectorOut.g = (float)Element / 255.f;
Element = (pSource->v >> 20) & 0x3FF;
vectorOut.b = (float)Element / 255.f;
vectorOut.a = (float)(pSource->v >> 30) / 3.f;
const DirectX::XMVECTORF32 j = { vectorOut.r,vectorOut.g,vectorOut.b,vectorOut.a };
return j;
}
i saw this video about debluring images using fourier transform in matlab
video
and i want to convert the code in emgu cv
my code in emgucv :
string path = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
Image<Bgr, byte> img = new Image<Bgr, byte>(#"lal.png");
//blur the image
Image<Gray, byte> gray = img.Convert<Gray, byte>().SmoothBlur(31,31);
//convert image to float and get the fourier transform
Mat g_fl = gray.Convert<Gray, float>().Mat;
Matrix<float> dft_image = new Matrix<float>(g_fl.Size);
CvInvoke.Dft(g_fl, dft_image, Emgu.CV.CvEnum.DxtType.Forward, 0);
//here i make an image of kernel with size of the original
Image<Gray, float> ker = new Image<Gray, float>(img.Size);
ker.SetZero();
for (int x = 0; x < 31; x++)
{
for (int y = 0; y < 31; y++)
{
//31 * 31= 961
ker[y, x] = new Gray(1/961);
}
}
//get the fourier of the kernel
Matrix<float> dft_blur = new Matrix<float>(g_fl.Size);
CvInvoke.Dft(ker, dft_blur, Emgu.CV.CvEnum.DxtType.Forward, 0);
// fouier image / fourier blur
Matrix<float> res = new Matrix<float>(g_fl.Size);
for (int x=0;x<g_fl.Cols;x++)
{
for (int y = 0; y < g_fl.Rows; y++)
{
res[y, x] = dft_image[y, x] / dft_blur[y, x];
}
}
//get the inverse of fourier
Image<Gray, float> ready = new Image<Gray, float>(g_fl.Size);
CvInvoke.Dft(res, ready, Emgu.CV.CvEnum.DxtType.Inverse, 0);
CvInvoke.Imshow("deblur", ready.Convert<Gray,byte>());
CvInvoke.Imshow("original", gray);
CvInvoke.WaitKey(0);
but the result is black and not working , where is the mistake in my code
if you have a code in opencv python you can post it :)??
Thanks :)
My old implementation of wiener filter:
#include "stdafx.h"
#pragma once
#pragma comment(lib, "opencv_legacy220.lib")
#pragma comment(lib, "opencv_core220.lib")
#pragma comment(lib, "opencv_highgui220.lib")
#pragma comment(lib, "opencv_imgproc220.lib")
#include "c:\Users\Andrey\Documents\opencv\include\opencv\cv.h"
#include "c:\Users\Andrey\Documents\opencv\include\opencv\cxcore.h"
#include "c:\Users\Andrey\Documents\opencv\include\opencv\highgui.h"
#include <string>
#include <iostream>
#include <complex>
using namespace std;
using namespace cv;
//----------------------------------------------------------
// Compute real and implicit parts of FFT for given image
//----------------------------------------------------------
void ForwardFFT(Mat &Src, Mat *FImg)
{
int M = getOptimalDFTSize( Src.rows );
int N = getOptimalDFTSize( Src.cols );
Mat padded;
copyMakeBorder(Src, padded, 0, M - Src.rows, 0, N - Src.cols, BORDER_CONSTANT, Scalar::all(0));
// Create complex representation of our image
// planes[0] Real part, planes[1] Implicit part (zeros)
Mat planes[] = {Mat_<double>(padded), Mat::zeros(padded.size(), CV_64F)};
Mat complexImg;
merge(planes, 2, complexImg);
dft(complexImg, complexImg);
// As result, we also have Re and Im planes
split(complexImg, planes);
// Crop specter, if it have odd number of rows or cols
planes[0] = planes[0](Rect(0, 0, planes[0].cols & -2, planes[0].rows & -2));
planes[1] = planes[1](Rect(0, 0, planes[1].cols & -2, planes[1].rows & -2));
FImg[0]=planes[0].clone();
FImg[1]=planes[1].clone();
}
//----------------------------------------------------------
// Restore our image using specter
//----------------------------------------------------------
void InverseFFT(Mat *FImg,Mat &Dst)
{
Mat complexImg;
merge(FImg, 2, complexImg);
// Apply inverse FFT
idft(complexImg, complexImg);
split(complexImg, FImg);
Dst=FImg[0];
}
//----------------------------------------------------------
// Wiener filter
//----------------------------------------------------------
void wienerFilter(Mat &src,Mat &dst,Mat &_h,double k)
{
//---------------------------------------------------
// small number for numeric stability
//---------------------------------------------------
const double eps=1E-8;
//---------------------------------------------------
int ImgW=src.size().width;
int ImgH=src.size().height;
//--------------------------------------------------
Mat Yf[2];
ForwardFFT(src,Yf);
//--------------------------------------------------
Mat h;
h.create(ImgH,ImgW,CV_64F);
h=0;
_h.copyTo(h(Rect(0, 0, _h.size().width, _h.size().height)));
Mat Hf[2];
ForwardFFT(h,Hf);
//--------------------------------------------------
Mat Fu[2];
Fu[0].create(ImgH,ImgW,CV_64F);
Fu[1].create(ImgH,ImgW,CV_64F);
complex<double> a;
complex<double> b;
complex<double> c;
double Hf_Re;
double Hf_Im;
double Phf;
double hfz;
double hz;
double A;
for (int i=0;i<Hf[0].size().height;i++)
{
for (int j=0;j<Hf[0].size().width;j++)
{
Hf_Re=Hf[0].at<double>(i,j);
Hf_Im=Hf[1].at<double>(i,j);
Phf = Hf_Re*Hf_Re+Hf_Im*Hf_Im;
hfz=(Phf<eps)*eps;
hz =(h.at<double>(i,j)>0);
A=Phf/(Phf+hz+k);
a=complex<double>(Yf[0].at<double>(i,j),Yf[1].at<double>(i,j));
b=complex<double>(Hf_Re+hfz,Hf_Im+hfz);
c=a/b; // Deconvolution
// Other we do to remove division by 0
Fu[0].at<double>(i,j)=(c.real()*A);
Fu[1].at<double>(i,j)=(c.imag()*A);
}
}
//--------------------------------------------------
Fu[0]/=(ImgW*ImgH);
Fu[1]/=(ImgW*ImgH);
//--------------------------------------------------
InverseFFT(Fu,dst);
// remove out of rane values
for (int i=0;i<Hf[0].size().height;i++)
{
for (int j=0;j<Hf[0].size().width;j++)
{
if(dst.at<double>(i,j)>215){dst.at<double>(i,j)=215;}
if(dst.at<double>(i,j)<(-40)){dst.at<double>(i,j)=(-40);}
}
}
}
//----------------------------------------------------------
// Main
//----------------------------------------------------------
int _tmain(int argc, _TCHAR* argv[])
{
// Input image
Mat img;
// Load it from drive
img=imread("data/motion_fuzzy_lena.bmp",0);
//---------------------------------------------
imshow("Src image", img);
// Image size
int ImgW=img.size().width;
int ImgH=img.size().height;
// Deconvolution kernel (coefficient sum must be 1)
// Image was blurred using same kernel
Mat h;
h.create(1,10,CV_64F);
h=1/double(h.size().width*h.size().height);
// Apply filter
wienerFilter(img,img,h,0.05);
normalize(img,img, 0, 1, CV_MINMAX);
imshow("Result image", img);
cvWaitKey(0);
return 0;
}
The result:
How to remove the circle content of above image. The original image is showed below:
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <cmath>
#include <iostream>
using namespace cv;
using namespace std;
/**
* Helper function to find a cosine of angle between vectors
* from pt0->pt1 and pt0->pt2
*/
static double angle(cv::Point pt1, cv::Point pt2, cv::Point pt0)
{
double dx1 = pt1.x - pt0.x;
double dy1 = pt1.y - pt0.y;
double dx2 = pt2.x - pt0.x;
double dy2 = pt2.y - pt0.y;
return (dx1*dx2 + dy1*dy2) / sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}
/**
* Helper function to display text in the center of a contour
*/
void setLabel(cv::Mat& im, const std::string label, std::vector<cv::Point>& contour)
{
int fontface = cv::FONT_HERSHEY_SIMPLEX;
double scale = 0.4;
int thickness = 1;
int baseline = 0;
cv::Size text = cv::getTextSize(label, fontface, scale, thickness, &baseline);
cv::Rect r = cv::boundingRect(contour);
cv::Point pt(r.x + ((r.width - text.width) / 2), r.y + ((r.height + text.height) / 2));
cv::rectangle(im, pt + cv::Point(0, baseline), pt + cv::Point(text.width, -text.height), CV_RGB(255, 0, 255), CV_FILLED);
cv::putText(im, label, pt, fontface, scale, CV_RGB(0, 0, 0), thickness, 8);
}
int main()
{
//cv::Mat src = cv::imread("polygon.png");
Mat src = imread("3.bmp");//2.png 3.jpg
Mat src_copy;
src.copyTo(src_copy);
resize(src, src, cvSize(0, 0), 1, 1);
cout << "src type " << src.type() << endl;
Mat mask(src.size(), src.type(), Scalar(0));
if (src.empty())
return -1;
// Convert to grayscale
cv::Mat gray;
if (src.type() != CV_8UC1)
cv::cvtColor(src, gray, CV_BGR2GRAY);
// Use Canny instead of threshold to catch squares with gradient shading
cv::Mat bw;
cv::Canny(gray, bw, 0, 50, 5);
// Find contours
std::vector<std::vector<cv::Point> > contours;
cv::findContours(bw.clone(), contours, CV_RETR_LIST, CV_CHAIN_APPROX_NONE);
drawContours(src, contours, -1, Scalar(0, 255, 0), 2);
std::vector<cv::Point> approx;
cv::Mat dst = src.clone();
for (int i = 0; i < contours.size(); i++)
{
// Approximate contour with accuracy proportional
// to the contour perimeter
cv::approxPolyDP(cv::Mat(contours[i]), approx, cv::arcLength(cv::Mat(contours[i]), true)*0.1, true);
// Skip small or non-convex objects
if (std::fabs(cv::contourArea(contours[i])) < 500 || !cv::isContourConvex(approx))
continue;
if (approx.size() == 3)
{
//setLabel(dst, "TRI", contours[i]); // Triangles
}
else if (approx.size() >= 4 && approx.size() <= 6)
{
// Number of vertices of polygonal curve
int vtc = approx.size();
// Get the cosines of all corners
std::vector<double> cos;
for (int j = 2; j < vtc + 1; j++)
cos.push_back(angle(approx[j%vtc], approx[j - 2], approx[j - 1]));
// Sort ascending the cosine values
std::sort(cos.begin(), cos.end());
// Get the lowest and the highest cosine
double mincos = cos.front();
double maxcos = cos.back();
// Use the degrees obtained above and the number of vertices
// to determine the shape of the contour
if (vtc == 4 && mincos >= -0.2 && maxcos <= 0.5)
{
Mat element = getStructuringElement(MORPH_RECT, Size(13, 13));
setLabel(dst, "RECT", contours[i]);
drawContours(mask, contours, i, Scalar(255, 255, 255), CV_FILLED);
dilate(mask, mask, element);
src_copy = src_copy - mask;
}
else if (vtc == 5 && mincos >= -0.34 && maxcos <= -0.27)
setLabel(dst, "PENTA", contours[i]);
else if (vtc == 6 && mincos >= -0.55 && maxcos <= -0.45)
setLabel(dst, "HEXA", contours[i]);
}
else
{
// Detect and label circles
double area = cv::contourArea(contours[i]);
cv::Rect r = cv::boundingRect(contours[i]);
int radius = r.width / 2;
if (std::abs(1 - ((double)r.width / r.height)) <= 0.2 &&
std::abs(1 - (area / (CV_PI * std::pow(radius, 2)))) <= 0.2)
setLabel(dst, "CIR", contours[i]);
}
}
//cv::imshow("src", src);
imwrite("mask.jpg", mask);
imwrite("src.jpg", src_copy);
imwrite("dst.jpg", dst);
cv::imshow("dst", dst);
cv::waitKey(0);
return 0;
}
I've run into a quite odd performance issue. So far I've reduced the problem to this: I'm rendering 20x20x20 cubes in a grid, using glDrawElementsInstanced, which works fine as long as my camera is far away from the origin, however when it gets closer to the origin, it starts grinding to a halt.
I'm defining my model view projection matrix through:
float distance=3.8;
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0,0,-distance),
glm::vec3(0,0,10),
glm::vec3(0,1,0));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
With distance at 40, there's no problems, but when distance decreases to about 3.8 and lower, everything grinds to a halt.
The actual call to rendering is carried out through:
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, indices.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
While putting all the vertices in a single buffer and rendering by calling:
glBindVertexArray(nonInstancedVAO);
glDrawArrays(GL_TRIANGLES, 0,vertices.size() );
Completely removes the behavior. Anyone who's experienced similar behavior who can point me in the direction of a solution? If failing that, anyone who's got an idea of how to track down something like this? I hoped I would be able to determine what was causing the slowdown using gDEBugger, however that just reconfirms that there aren't any other opengl calls, and doesn't really help figuring out what's taking up all the processing time.
Another note is that glDrawArraysInstanced also shows the same slowdown, and that splitting the call into 4 separate calls with a quarter of the geometry each also stops the slowdown.
Update
Here's an attempt at a minimal illustration of the problem.
//Minimal reproduction of problem
#include <stdio.h>
#include <string>
#include <fstream>
#include <stdlib.h>
#include <string.h>
#include <GL/glew.h>
#include <GLFW/glfw3.h>
// Include GLM
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <vector>
#include <iostream>
#include <stdio.h>
//Set to true to use instanced rendering (glDrawElementsInstanced), false to render a generated grid instead (glDrawElements)
#define Instanced true
//Translation from origin. Problme is pressent at 0 distance, but disapears at ex. 40.
const float distanceFromOrigin=0;
// Function to load shaders
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path);
int main(){
int width, height;
bool running = true;
// Initialise GLFW
glfwInit();
glfwWindowHint(GLFW_SAMPLES,1);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT,GL_TRUE);
glfwWindowHint(GLFW_VERSION_MAJOR, 4);
GLFWwindow* windowRef = glfwCreateWindow( 512, 512, "",0,0);
glfwMakeContextCurrent(windowRef);
glewInit();
//Load Shader
GLuint programID = LoadShaders( "Simple.vs.c", "Simple.fs.c" );
GLuint MatrixID = glGetUniformLocation(programID, "MVP");
glUseProgram(programID);
glm::mat4 Model,Projection,MVP,View,checkMVP;
std::vector<GLuint> sqIndice = {3,2,1,1,0,3,4,5,6,6,7,4,0,4,7,7,3,0,0,1,5,5,4,0,2,3,7,7,6,2,6,5,1,1,2,6,0,4,7,7,3,0};
std::vector<GLfloat> sqVertex = {-1, 1, -1, -1, 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1, -1, -1};
std::vector<GLfloat> sqColor = {0.2472,0.24,0.6,0.6,0.24,0.442893,0.6,0.547014,0.24,0.24,0.6,0.33692,0.24,0.353173,0.6,0.6,0.24,0.563266,0.6,0.426641,0.24,0.263452,0.6,0.24};
const float lattice = 5;
const int mxn = 10;
std::vector<GLfloat> v1 = {lattice,-1,0};
std::vector<GLfloat> v2 = {1,lattice,0};
std::vector<GLfloat> v3 = {0,0,lattice};
std::vector<GLfloat> offset = {0,0,-distanceFromOrigin};
std::vector<GLfloat> latticePoints,sqVertexGrid,sqColorGrid;// = {0,0,0};
std::vector<GLuint> sqIndiceGrid;
// Looping stuff to generate the full grid of "instances" to render in a single call.
int instanceCount=0;
//Generate Lattice vectors, aswell as a vector containing the full grids of indices,vertexes and colors
for(int x=-mxn;x<mxn;++x){
for(int y=-mxn;y<mxn;++y){
for(int z=-mxn;z<mxn;++z){
for(int n=0;n<3;++n){
latticePoints.push_back( x*v1[n]+y*v2[n]+z*v3[n]+offset[n] );
};
for(int elm=0;elm<sqVertex.size();elm+=3){
for(int n=0;n<3;++n){
sqVertexGrid.push_back(sqVertex[elm+n]+x*v1[n]+y*v2[n]+z*v3[n]+offset[n]);
sqColorGrid.push_back(sqColor[elm+n]);
};
};
for(int elm=0;elm<sqIndice.size();++elm){
sqIndiceGrid.push_back(sqIndice[elm]+instanceCount*sqVertex.size()/3);
};
++instanceCount;glewInit
};
};
};
#if Instanced==true
//Initialize and fill vertex,color and indice buffers with the relevant data.
GLuint cubeVAO;
glGenVertexArrays(1, &cubeVAO);
glBindVertexArray(cubeVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertex.size()*sizeof(GLfloat), &sqVertex[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColor.size()*sizeof(GLfloat), &sqColor[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndice.size()*sizeof(GLuint), &sqIndice[0], GL_STATIC_DRAW);
//Lattice point buffer
GLuint latticePointBuffer;
glGenBuffers(1, &latticePointBuffer);
glBindBuffer(GL_ARRAY_BUFFER, latticePointBuffer);
glBufferData(GL_ARRAY_BUFFER, latticePoints.size()*sizeof(GLfloat), &latticePoints[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(2);
glVertexAttribPointer(2,3,GL_FLOAT,GL_FALSE,0,(void*)0);
glVertexAttribDivisor(2,1);
glBindVertexArray(0);
#elif Instanced==false
GLuint cubeGridVAO;
glGenVertexArrays(1, &cubeGridVAO);
glBindVertexArray(cubeGridVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertexGrid.size()*sizeof(GLfloat), &sqVertexGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColorGrid.size()*sizeof(GLfloat), &sqColorGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndiceGrid.size()*sizeof(GLuint), &sqIndiceGrid[0], GL_STATIC_DRAW);
glBindVertexArray(0);
#endif
while(running)
{
glfwGetFramebufferSize(windowRef, &width, &height);
height = height > 0 ? height : 1;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0.0f,0.0f,-(distanceFromOrigin+3.8f)),
glm::vec3(0.0f,0.0f,100.0f),
glm::vec3(0.0f,1.0f,0.0f));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
MVP = Projection*View*Model;
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, glm::value_ptr(MVP));
#if Instanced==true
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
#elif Instanced==false
glBindVertexArray(cubeGridVAO);
glDrawElements(GL_TRIANGLES, sqIndiceGrid.size(),GL_UNSIGNED_INT,(GLvoid*)(0));
#endif
glfwPollEvents();
glfwSwapBuffers(windowRef);
std::cout<<".\n";
running = !glfwGetKey(windowRef,GLFW_KEY_ESCAPE) && !glfwWindowShouldClose(windowRef);
}
glfwDestroyWindow(windowRef);
glfwTerminate();
return 0;
};
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path){
// Create the shaders
GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
// Read the Vertex Shader code from the file
std::string VertexShaderCode;
std::ifstream VertexShaderStream(vertex_file_path, std::ios::in);
if(VertexShaderStream.is_open()){
std::string Line = "";
while(getline(VertexShaderStream, Line))
VertexShaderCode += "\n" + Line;
VertexShaderStream.close();
}else{
printf("Impossible to open %s. Are you in the right directory?\n", vertex_file_path);
return 0;
}
// Read the Fragment Shader code from the file
std::string FragmentShaderCode;
std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in);
if(FragmentShaderStream.is_open()){
std::string Line = "";
while(getline(FragmentShaderStream, Line))
FragmentShaderCode += "\n" + Line;
FragmentShaderStream.close();
}
GLint Result = GL_FALSE;
int InfoLogLength;
// Compile Vertex Shader
printf("Compiling shader : %s\n", vertex_file_path);
char const * VertexSourcePointer = VertexShaderCode.c_str();
glShaderSource(VertexShaderID, 1, &VertexSourcePointer , NULL);
glCompileShader(VertexShaderID);
// Check Vertex Shader
glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> VertexShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
printf("%s\n", &VertexShaderErrorMessage[0]);
}
// Compile Fragment Shader
printf("Compiling shader : %s\n", fragment_file_path);
char const * FragmentSourcePointer = FragmentShaderCode.c_str();
glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer , NULL);
glCompileShader(FragmentShaderID);
// Check Fragment Shader
glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> FragmentShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
printf("%s\n", &FragmentShaderErrorMessage[0]);
}
// Link the program
printf("Linking program\n");
GLuint ProgramID = glCreateProgram();
glAttachShader(ProgramID, VertexShaderID);
glAttachShader(ProgramID, FragmentShaderID);
glLinkProgram(ProgramID);
// Check the program
glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> ProgramErrorMessage(InfoLogLength+1);
glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]);
printf("%s\n", &ProgramErrorMessage[0]);
}
glDeleteShader(VertexShaderID);
glDeleteShader(FragmentShaderID);
return ProgramID;
}
Ok, take a deep breath and take a seat: your problem is graphic card memory speed.
But you could make it easier for the GPU by fixing this bug:
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
glDrawElementsInstanced expects the number of instances to draw as the last parameter. But you pass the number of elements in latticePoints. That's 3 times the number of instances. This results in a zero lettice point inside the shader (because of prevented out of bounds access). So 16000 cubes are not translated and are painted at the same position. This results in painting the front face of the cubes 16000 times. The depth buffer doesn't prevent this because the faces do not conceal each other, they are on the same spot.
So when your distanceFromOrigin decreases the 16000 center cubes get bigger and bigger. OpenGL has to draw more and more pixels. A lot more, to be exact. It has to draw so much that it hits the speed limit of the graphic card memory.
Read Diagnose OpenGl Performance Problems for the whole story.
In the code below, I am trying to implement the algorithm for affine transformation approximation presented here
#include <Eigen/Eigen>
#include <iostream>
using namespace Eigen;
using namespace std;
int main(int argc, char **argv)
{
Vector3f x1 (3.0f, 0.0f, 0.0f);
Vector3f x2 (0.0f, 2.0f, 0.0f);
Vector3f x3 (0.0f, 0.0f, 1.0f);
Vector3f translation(1.0f, -2.0f, 2.0f);
Vector3f x_bar1 = x1 + translation;
Vector3f x_bar2 = x2 + translation;
Vector3f x_bar3 = x3 + translation;
std::cerr << "x_bar1 = \n" << x_bar1 << std::endl;
std::cerr << "x_bar2 = \n" << x_bar2 << std::endl;
std::cerr << "x_bar3 = \n" << x_bar3 << std::endl;
Vector3f c = (x1+x2+x3)/3.0f;
Vector3f c_bar = (x_bar1+x_bar2+x_bar3)/3.0f;
Vector3f y1,y2,y3, y_bar1,y_bar2,y_bar3;
y1 = x1 - c;
y2 = x2 - c;
y3 = x3 - c;
y_bar1 = x_bar1 - c_bar;
y_bar2 = x_bar2 - c_bar;
y_bar3 = x_bar3 - c_bar;
Matrix3f H;
H = y1*y_bar1.transpose()+y2*y_bar2.transpose()+y3*y_bar3.transpose();
JacobiSVD<Matrix3f> svd(H, ComputeFullU | ComputeFullV);
Matrix3f R; R = svd.matrixV()*svd.matrixU().transpose();
Vector3f t; t = c-R*c_bar;
std::cerr << "R = \n" << R << std::endl;
std::cerr << "t = \n" << t << std::endl;
}
But I get wrong answer:
R =
0.836735 -0.244898 -0.489796
-0.244898 0.632653 -0.734694
-0.489796 -0.734694 -0.469388
t =
0.142857
3.71429
1.42857
Is the problem in the implementation or in the algorithm? If so, what is the correction?
You can check your computation using e.g. this SVD-demonstration.
For your example I get
H =
6 -2 -1
-2 2.666666667 -0.666666667
-1 -0.666666667 0.666666667
U =
1 0 0
0 0.957092026 -0.289784149
0 -0.289784149 -0.957092026
V =
1 0 0
0 0.957092026 -0.289784149
0 -0.289784149 -0.957092026
R =
1 0 0
0 1 0
0 0 1
So the algorithm is correct! Probably your H is wrong.
I will attempt to shove in a Java program derived from your original C++. Based on this I offer the following suggestions:
1) I think you have switched c and c_bar in t = c-R*c_bar;
2) The paper (which you can get freely at http://www.math.pku.edu.cn/teachers/yaoy/Fall2011/arun.pdf) suggests that having coplanar points Qi is uncommon, but in fact it is quite likely, because these points have their centroid subtracted, and so if you add them all up you get a zero vector. Therefore it is entirely likely that you will get a reflection back instead of a rotation and you need to switch column signs as in equation (18) and in my Java.
package uk.co.demon.mcdowella.apachemathuser;
/*
Affine transformation approximation of two triangles using Eigen SVD
In the code below, I am trying to implement the algorithm for affine transformation approximation presented here
*/
/*
#include <Eigen/Eigen>
#include <iostream>
using namespace Eigen;
using namespace std;
*/
import org.apache.commons.math.linear.ArrayRealVector;
import java.util.Arrays;
import org.apache.commons.math.linear.Array2DRowRealMatrix;
import org.apache.commons.math.linear.SingularValueDecompositionImpl;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealVector;
public class FindRotation
{
/** Utility function to return XY' */
private static RealMatrix xyt(RealVector x, RealVector y)
{
Array2DRowRealMatrix xx = new Array2DRowRealMatrix(x.getData());
return xx.multiply((
new Array2DRowRealMatrix(y.getData())).transpose());
}
// int main(int argc, char **argv)
public static void main(String[] s)
{
// Vector3f x1 (3.0f, 0.0f, 0.0f);
ArrayRealVector x1 = new ArrayRealVector(new double[]
{3.0f, 0.0f, 0.0f});
// Vector3f x2 (0.0f, 2.0f, 0.0f);
ArrayRealVector x2 = new ArrayRealVector(new double[]
{0.0f, 2.0f, 0.0f});
// Vector3f x3 (0.0f, 0.0f, 1.0f);
ArrayRealVector x3 = new ArrayRealVector(new double[]
{0.0f, 0.0f, 1.0f});
// Vector3f translation(1.0f, -2.0f, 2.0f);
ArrayRealVector translation = new ArrayRealVector(new double[]
{1.0f, -2.0f, 2.0f});
Array2DRowRealMatrix rot;
if (true)
{ // test - do simple rotation
rot = new Array2DRowRealMatrix(new double[][] {
new double[] {1.0, 0.0, 0.0},
new double[] {0.0, 0.0, -1.0},
new double[] {0.0, 1.0, 0.0},
});
System.out.println("Rot determinant is " + rot.getDeterminant());
}
else
{
rot = new Array2DRowRealMatrix(new double[][] {
new double[] {1.0, 0.0, 0.0},
new double[] {0.0, 1.0, 0.0},
new double[] {0.0, 0.0, 1.0},
});
}
// Vector3f x_bar1 = x1 + translation;
RealVector x_bar1 = rot.operate(x1).add(translation);
// Vector3f x_bar2 = x2 + translation;
RealVector x_bar2 = rot.operate(x2).add(translation);
// Vector3f x_bar3 = x3 + translation;
RealVector x_bar3 = rot.operate(x3).add(translation);
// std::cerr << "x_bar1 = \n" << x_bar1 << std::endl;
System.out.println("x_bar1 = ");
System.out.println(x_bar1);
// std::cerr << "x_bar2 = \n" << x_bar2 << std::endl;
System.out.println("x_bar2 = ");
System.out.println(x_bar2);
// std::cerr << "x_bar3 = \n" << x_bar3 << std::endl;
System.out.println("x_bar3 = ");
System.out.println(x_bar3);
// Vector3f c = (x1+x2+x3)/3.0f;
RealVector c = x1.add(x2).add(x3).mapDivide(3.0f);
// Vector3f c_bar = (x_bar1+x_bar2+x_bar3)/3.0f;
RealVector c_bar =
x_bar1.add(x_bar2).add(x_bar3).mapDivide(3.0f);
// Vector3f y1,y2,y3, y_bar1,y_bar2,y_bar3;
// y1 = x1 - c;
RealVector y1 = x1.subtract(c);
// y2 = x2 - c;
RealVector y2 = x2.subtract(c);
// y3 = x3 - c;
RealVector y3 = x3.subtract(c);
// y_bar1 = x_bar1 - c_bar;
RealVector y_bar1 = x_bar1.subtract(c_bar);
// y_bar2 = x_bar2 - c_bar;
RealVector y_bar2 = x_bar2.subtract(c_bar);
// y_bar3 = x_bar3 - c_bar;
RealVector y_bar3 = x_bar3.subtract(c_bar);
System.out.println("Y1 " + y1 + " (Q1)");
System.out.println("Y2 " + y2 + " (Q2)");
System.out.println("Y3 " + y3 + " (Q3)");
System.out.println("YB1 " + y_bar1);
System.out.println("YB2 " + y_bar2);
System.out.println("YB3 " + y_bar3);
// Matrix3f H;
// H = y1*y_bar1.transpose()+y2*y_bar2.transpose()+y3*y_bar3.transpose();
RealMatrix h = xyt(y1, y_bar1).add(xyt(y2,y_bar2)).add(
xyt(y3, y_bar3));
// JacobiSVD<Matrix3f> svd(H, ComputeFullU | ComputeFullV);
SingularValueDecompositionImpl svd =
new SingularValueDecompositionImpl(h);
System.out.println("Singular values are " + Arrays.toString(svd.getSingularValues()));
// Matrix3f R; R = svd.matrixV()*svd.matrixU().transpose();
RealMatrix r = svd.getV().multiply(svd.getUT());
double rDeterminant = r.getDeterminant();
System.out.println("Determinant " + rDeterminant);
if (rDeterminant < 0.0)
{ // coplanar case - which is not surprising because Q in the original paper sum to 0.0
// because centroid is subtracted from each of them. Try alternate r
RealMatrix changeLastColumn = new Array2DRowRealMatrix(new double[][] {
new double[] {1.0, 0.0, 0.0},
new double[] {0.0, 1.0, 0.0},
new double[] {0.0, 0.0, -1.0}});
RealMatrix vd = svd.getV().multiply(changeLastColumn);
r = vd.multiply(svd.getUT());
rDeterminant = r.getDeterminant();
System.out.println("Determinant at second go is " + rDeterminant);
}
// Vector3f t; t = c-R*c_bar;
// Note - original transpose seems to be the wrong way round
RealVector t = c_bar.subtract(r.operate(c));
// std::cerr << "R = \n" << R << std::endl;
System.out.println("R = ");
System.out.println(r);
// std::cerr << "t = \n" << t << std::endl;
System.out.println("t = ");
System.out.println(t);
// Apply supposed answer
RealVector z1 = r.operate(x1).add(t);
RealVector z2 = r.operate(x2).add(t);
RealVector z3 = r.operate(x3).add(t);
System.out.println("Z1 "+ z1);
System.out.println("Z2 "+ z2);
System.out.println("Z3 "+ z3);
}
/*
But I get wrong answer:
R =
0.836735 -0.244898 -0.489796
-0.244898 0.632653 -0.734694
-0.489796 -0.734694 -0.469388
t =
0.142857
3.71429
1.42857
Is the problem in the implementation or in the algorithm? If so, what is the correction?
*/
}