How to calculate log determinant of an Armadillo sparse matrix efficiently - eigen

I'm trying to write an MCMC procedure using RcppArmadillo which involves computing log determinants of some around 30,000 x 30,000 sparse matrices. It seems that log_det() in Armadillo does not support sp_mat right now so I'm doing something like this:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
using namespace arma;
double eigen_ldet(sp_mat arma_mat) {
Eigen::SparseMatrix<double> eigen_s = Rcpp::as<Eigen::SparseMatrix<double>>(Rcpp::wrap(arma_mat));
Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
solver.compute(eigen_s);
double det = solver.logAbsDeterminant();
return det;
}
I feel it is really crappy and slow. Any help would be much appreciated.
Edit:
Here is the mockup:
library(Matrix)
m_mat = function(i = 1688, j = 18, rho = 0.5, alp = 0.5){
w1 = matrix(runif(i^2),nrow = i, ncol = i)
w2 = matrix(runif(j^2),nrow = j, ncol = j)
w1 = w1/rowSums(w1)
w2 = w2/rowSums(w2)
diag(w1) = 0
diag(w2) = 0
w1 = diag(i) - rho*w1
w2 = diag(j) - alp*w2
w1 = kronecker(Matrix(diag(j)), w1)
w2 = kronecker(Matrix(diag(i)), w2)
ind = matrix(c(rep(seq(1,i), each = j), rep(seq(1,j),i)), ncol = 2)
w2 = cbind(ind, w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
w2 = cbind(as.matrix(ind), w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
return(w1 + w2)
}
Edit2: Here is the second mockup with a sparse w1:
m_mat2 = function(i = 1688, j = 18, nb = 4, range = 10, rho = 0.5, alp = 0.5){
w1 = Matrix(0, nrow = i, ncol = i)
for ( h in 1:i){
rnd = as.integer(rnorm(nb, h, range))
rnd = ifelse(rnd > 0 & rnd <= i, rnd, h)
rnd = unique(rnd)
w1[h, rnd] = 1
}
w1 = w1/rowSums(w1)
w2 = matrix(runif(j^2),nrow = j, ncol = j)
w2 = w2/rowSums(w2)
diag(w1) = 0
diag(w2) = 0
w1 = diag(i) - rho*w1
w2 = diag(j) - alp*w2
w1 = kronecker(Matrix(diag(j)), w1)
w2 = kronecker(Matrix(diag(i)), w2)
ind = matrix(c(rep(seq(1,i), each = j), rep(seq(1,j),i)), ncol = 2)
w2 = cbind(ind, w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
w2 = cbind(as.matrix(ind), w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
return(w1 + w2)
}
An actual sparse w1 case should be much more irregular, but it takes about the same time to calculate (by the above code) the determinant of this one as using an actual w1.

From my experiments I find that the conversion from Armadillo to Eigen matrix is quite fast. Most of the time is spent in solver.compute(). I do not know if there are any faster algorithms to determine the log determinant of a sparse matrix, but I have found an approximation that is at least applicable to your mock-up: Only use the (dense) block-diagonal (see e.g. here for ways to include other parts of the matrix). If an approximate solution is sufficient, this is quite good and fast:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
#include <Rcpp/Benchmark/Timer.h>
using namespace arma;
// [[Rcpp::export]]
double arma_sldet(sp_mat arma_mat, int blocks, int size) {
double ldet = 0.0;
double val = 0.0;
double sign = 0.0;
for (int i = 0; i < blocks; ++i) {
int begin = i * size;
int end = (i + 1) * size - 1;
sp_mat sblock = arma_mat.submat(begin, begin, end, end);
mat dblock(sblock);
log_det(val, sign, dblock);
ldet += val;
}
return ldet;
}
// [[Rcpp::export]]
Rcpp::List eigen_ldet(sp_mat arma_mat) {
Rcpp::Timer timer;
timer.step("start");
Eigen::SparseMatrix<double> eigen_s = Rcpp::as<Eigen::SparseMatrix<double>>(Rcpp::wrap(arma_mat));
timer.step("conversion");
Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
solver.compute(eigen_s);
timer.step("solver");
double det = solver.logAbsDeterminant();
timer.step("log_det");
Rcpp::NumericVector res(timer);
return Rcpp::List::create(Rcpp::Named("log_det") = det,
Rcpp::Named("timer") = res);
}
/*** R
library(Matrix)
m_mat = function(i = 1688, j = 18, rho = 0.5, alp = 0.5){
w1 = matrix(runif(i^2),nrow = i, ncol = i)
w2 = matrix(runif(j^2),nrow = j, ncol = j)
w1 = w1/rowSums(w1)
w2 = w2/rowSums(w2)
diag(w1) = 0
diag(w2) = 0
w1 = diag(i) - rho*w1
w2 = diag(j) - alp*w2
w1 = kronecker(Matrix(diag(j)), w1)
w2 = kronecker(Matrix(diag(i)), w2)
ind = matrix(c(rep(seq(1,i), each = j), rep(seq(1,j),i)), ncol = 2)
w2 = cbind(ind, w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
w2 = cbind(as.matrix(ind), w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
return(w1 + w2)
}
m <- m_mat(i = 200)
system.time(eigen <- eigen_ldet(m))
system.time(arma <- arma_sldet(m, 18, 200))
diff(eigen$timer)/1000000
all.equal(eigen$log_det, arma)
m <- m_mat()
#eigen_ldet(m) # takes to long ...
system.time(arma <- arma_sldet(m, 18, 1688))
*/
Results for a smaller mock-up:
> m <- m_mat(i = 200)
> system.time(eigen <- eigen_ldet(m))
user system elapsed
3.703 0.049 3.751
> system.time(arma <- arma_sldet(m, 18, 200))
user system elapsed
0.059 0.012 0.019
> diff(eigen$timer)/1000000
conversion solver log_det
5.208586 3738.131168 0.582578
> all.equal(eigen$log_det, arma)
[1] "Mean relative difference: 0.002874847"
The approximate solution is very close and much faster. We also see the timing distribution for the exact solution.
Results for the full mock-up:
> m <- m_mat()
> #eigen_ldet(m) # takes to long ...
> system.time(arma <- arma_sldet(m, 18, 1688))
user system elapsed
5.965 2.529 2.578
An even faster approximation can be achieved when only considering the diagonal:
// [[Rcpp::export]]
double arma_sldet_diag(sp_mat arma_mat) {
vec d(arma_mat.diag());
return sum(log(d));
}

If you have plenty of memory on your machine (say 32+ Gb), and a fast implementation of LAPACK (example: OpenBLAS or Intel MKL), a quick and dirty way is to convert the sparse matrix into a dense matrix, and compute the log determinant on the dense matrix.
Example:
sp_mat X = sprandu(30000,30000,0.01);
cx_double log_result = log_det( mat(X) );
While this obviously takes lots of memory, the advantage is that it avoids time consuming sparse solvers / factorizations. OpenBLAS or MKL will also take advantage of multiple cores.

Related

Optimizing global memory load in CUDA

My task :
I have two matrices : A - (18 x 4194304) ; B - (18 x 1024).
I have to take each 18-length vector from A and compute distance with each 18-length vector from B and find minimum distance and index.
My code :
__device__
void GetMin(float &dist, int &idx)
{
float dist2;
int idx2;
dist2 = __shfl_down_sync(0xFFFFFFFF, dist, 16, 32);
idx2 = __shfl_down_sync(0xFFFFFFFF, idx, 16);
if (dist > dist2)
{
dist = dist2;
idx = idx2;
}
dist2 = __shfl_down_sync(0xFFFFFFFF, dist, 8, 32);
idx2 = __shfl_down_sync(0xFFFFFFFF, idx, 8);
if (dist > dist2)
{
dist = dist2;
idx = idx2;
}
dist2 = __shfl_down_sync(0xFFFFFFFF, dist, 4, 32);
idx2 = __shfl_down_sync(0xFFFFFFFF, idx, 4);
if (dist > dist2)
{
dist = dist2;
idx = idx2;
}
dist2 = __shfl_down_sync(0xFFFFFFFF, dist, 2, 32);
idx2 = __shfl_down_sync(0xFFFFFFFF, idx, 2);
if (dist > dist2)
{
dist = dist2;
idx = idx2;
}
dist2 = __shfl_down_sync(0xFFFFFFFF, dist, 1, 32);
idx2 = __shfl_down_sync(0xFFFFFFFF, idx, 1);
if (dist > dist2)
{
dist = dist2;
idx = idx2;
}
}
__global__
void CalcMinDist_kernel(const float *A, const float *B, float *output, const int nNumPixels, int nNumImages)
{
int tx = threadIdx.x + blockIdx.x * blockDim.x;
int ty = threadIdx.y;
int lane_id = tx % 32;
float dist = 0;
int idx = 0;
float fMin = 99999999;
int nMinIdx = -1;
for(int i = lane_id; i < 1024; i += 32)
{
dist = 0;
for(int j = 0; j < nNumImages; ++j)
{
int img_idx = blockIdx.x * ty + j * nNumPixels;
dist += (A[img_idx] - B[i * nNumImages + j]) *
(A[img_idx] - B[i * nNumImages + j]);
}
idx = i;
GetMin(dist, idx);
if(threadIdx.x == 0)
{
if(fMin > dist)
{
fMin = dist;
nMinIdx = idx;
}
}
}
if(threadIdx.x == 0)
{
output[blockIdx.x * ty] = nMinIdx;
}
}
Looking at the profiler, I'm memory bound, and do have ~90% occupancy. Is there any way to speed up this operation?
Let me know if I need to provide any other information.
Actually, I would look at the algorithm first. This is a geometric problem - treat it as such.
You should represent the B data using a different data structure, e.g. by clustering or building a partition structure (e.g. k-d tree). That will let you avoid actually computing the distance from most B elements. (You could also consider a project onto fewer dimensions, but the benefit of this may be more elusive.)
With respect to the access pattern - you would probably benefit from having consecutive threads working on consecutive elements of the 18-element-long vectors, rather than having threads work on complete 18-element-long vectors individually. That would better fit the memory layout - right now, a warp read is of many elements which are at distance 18 from each other. If I understand the code correctly anyway.
(I also think the GetMin() could avoid some of the index swaps, but that's not significant since you only perform very few of those.)

how to calculate Otsu threshold in 1D

I'm trying to identify bimodal distributions in my analytical chemistry data. Each data set is a list of 3~70 retention times for a particular compound from the GC-MS. RTs for some compound are bimodally distributed where the library searches have assigned the same identity to two or more different features in the data with different RTs. This is quite common for isomers and other compound pairs with very similar mass spectra.
Eg. here's a histogram of RTs for one compound showing bimodal distribution.
I want to calculate the Otsu threshold to try and define bimodal data (there's also multimodal distributions but one step at a time). I'm struggling to understand the Wikipedia article on the calculations but the text indicates that the threshold can be found by finding the minimum intraclass variance. So I've tried computing this from a list of the RTs as follows:
a = list(d['Component RT'])
n = len(a)
b = [a.pop(0)]
varA = []
varB = []
for i in range(1,n-2):
b.append(a.pop(0))
varA.append(statistics.stdev(a)**2)
varB.append(statistics.stdev(b)**2)
Am I right in thinking that if I plot the sum of the variances for the above data I should be able to identify the Otsu threshold as the minimum?
In this example the threshold is obvious and there's about 35 values to work from. For most compounds there's fewer values (typically <15) and the data may be less well defined. Is this even the right threshold to use? The Wikipedia article on modality indicates a whole bunch of other tests for multimodality.
result is simillar to opencv thresh by OTSU.
uchar OTSU(const std::vector<uchar>& input_vec) {
// normalize input to 0-255 if needed
int count[256];
double u0, u1, u;
double pixelSum0, pixelSum1;
int n0, n1;
int bestThresold = 0, thresold = 0;
double w0, w1;
double variable = 0, maxVariable = 0;
for (int i = 0; i < 256; i++)
count[i] = 0;
for (int i = 0; i < input_vec.size(); i++) {
count[int(input_vec[i])] ++;
}
for (thresold = 0; thresold < 256; thresold++) {
n0 = 0;
n1 = 0;
w0 = 0;
w1 = 0;
pixelSum0 = 0;
pixelSum1 = 0;
for (int i = 0; i < thresold; i++) {
n0 += count[i];
pixelSum0 += i * count[i];
}
for (int i = thresold; i < 256; i++) {
n1 += count[i];
pixelSum1 += i * count[i];
}
w0 = double(n0) / (input_vec.size());
w1 = double(n1) / (input_vec.size());
u0 = pixelSum0 / n0;
u1 = pixelSum1 / n1;
u = u0 * w0 + u1 * w1;
variable = w0 * pow((u0 - u), 2) + w1 * pow((u1 - u), 2);
if (variable > maxVariable) {
maxVariable = variable;
bestThresold = thresold;
}
}
return bestThresold;}
ref :https://github.com/1124418652/edge_extract/blob/master/edge_extract/OTSU.cpp

Better algorithm to calculate this formula

Given an array of positive integers A, I want to find the following sum S:
S = 2^1 * (S_1) + 2^2 * (S_2) + 2^3 * (S_3) + ... + 2^n * (S_N)
where S_i is sum of product of consecutive i integers in A, for eg:
S_3 = (A[0]*A[1]*A[2]) + (A[1]*A[2]*A[3]) + .... + (A[n-3]*A[n-2]*A[n-1])
For calculating any of the S_i, I am using method similar to rolling hash, which achieve O(N):
Let tmp = A[0]*A[1]*A[2], S_3 = tmp;
For(i = 3 to n)
(tmp *= A[i]) /= A[i-3]
S_3 += tmp
And for all two's power I can precompute beforehand. So now my algorithm to calculate S is O(N^2)
My question is, is it possible to calculate S with a better complexity?
There is a recurrence pattern you could use for calculating a solution with a linear time complexity.
Consider an example input:
A = [2, 5, 3, 2, 1, 8]
Now look at the partial array with only the first element:
A0 = [2]
Call the outcome for this partial array R0. It is clear that:
R0 = 2⋅A[0] = 2⋅2 = 4
Then consider the array with one more value from the original array:
A1 = [2, 5]
Call the outcome for this partial array R1. We calculate that:
R1 = 2⋅(A[0]+A[1]) + 4⋅(A[0]⋅A[1]) = 54
We can try to write this in terms of R0, hoping we can thus limit the number of calculations to make:
R1 = 2⋅(2⋅A[0] + 1)⋅A[1] + R0
Extending again with a third element, we get:
R2 = 2⋅(A[0]+A[1]+A[2]) + 4⋅(A[0]⋅A[1] + A[1]⋅A[2]) + 8⋅(A[0]⋅A[1]⋅A[2]) = 360
Let's try to write this in terms of R1:
R2 = 2⋅(2⋅(2⋅A[0] + 1)⋅A[1] + 1)⋅A[2] + R1
There is a pattern emerging, where the first term looks much like the first term in the previous R. It is something like 2⋅(X + 1)⋅A[2], where X is the first term of the previous R. We can generally say:
Rn = 2⋅(Rn-1 − Rn-2 + 1)⋅A[n] + Rn-1
...where Rn is 0 when n < 0.
Now that is something that can be calculated in linear time. Here is an implementation in JavaScript, which also includes a second function that does the calculation in the naive, non-optimised way, so that the result can be compared:
function fastCalcS(a) {
let r0, r1, r2;
r1 = r2 = 0;
for (let i = 0; i < a.length; i++) {
r0 = r1;
r1 = r2;
r2 = 2*(r1 - r0 + 1)*a[i] + r1;
}
return r2;
}
// This function does it the slow way, and executes
// the literal definition of S without optimisation:
function slowCalcS(a) {
let coeff = 2;
let s = 0
for (let i = 0; i < a.length; i++) {
let s_i = 0;
for (let j = 0; j < a.length-i; j++) {
let prod = 1;
for (let k = 0; k <= i; k++) {
prod *= a[j+k];
}
s_i += prod;
}
s += coeff * s_i;
coeff *= 2;
}
return s;
}
var a = [2, 5, 3, 2, 1, 8];
console.log('slow way:', slowCalcS(a));
console.log('fast way:', fastCalcS(a));
In languages that provide a reduce array method (as JavaScript does), the function can look like this (ES6 syntax):
function fastCalcS(a) {
return a.reduce( ([r1, r2], v) => [r2, 2*(r2 - r1 + 1)*v + r2], [0, 0] )[1];
}
var a = [2, 5, 3, 2, 1, 8];
console.log(fastCalcS(a));
A very similar approach to trincot, but starting at the other end leads to another linear algorithm.
If we had just the array {A[n-1]} then the sum would be:
T[n-1] = 2*A[n-1]
If we then try { A[n-2], A[n-1]} we get
T[n-2] = 2*A[n-2] + 2*A[n-1] + 4*A[n-2]*A[n-1]
= T[n-1] + 2*A[n-2]*( 1 + 2*A[n-1])
Continuing this way we get the recursions
T[n-k] = T[n-k+1] + 2*A[n-k]* K[n-k+1]
K[n-k] = 1 + 2*A[n-k]*K[n-k+1]
In C
int64_t sum( int64_t n, const int64_t* A)
{
int64_t K = 1;
int64_t T = 0;
int64_t i = n;
while( --i >= 0)
{ T += 2*K*A[i];
K = 1 + 2*A[i]*K;
}
return T;
}

Obtaining orientation map of fingerprint image using OpenCV

I'm trying to implement the method of improving fingerprint images by Anil Jain. As a starter, I encountered some difficulties while extracting the orientation image, and am strictly following those steps described in Section 2.4 of that paper.
So, this is the input image:
And this is after normalization using exactly the same method as in that paper:
I'm expecting to see something like this (an example from the internet):
However, this is what I got for displaying obtained orientation matrix:
Obviously this is wrong, and it also gives non-zero values for those zero points in the original input image.
This is the code I wrote:
cv::Mat orientation(cv::Mat inputImage)
{
cv::Mat orientationMat = cv::Mat::zeros(inputImage.size(), CV_8UC1);
// compute gradients at each pixel
cv::Mat grad_x, grad_y;
cv::Sobel(inputImage, grad_x, CV_16SC1, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
cv::Sobel(inputImage, grad_y, CV_16SC1, 0, 1, 3, 1, 0, cv::BORDER_DEFAULT);
cv::Mat Vx, Vy, theta, lowPassX, lowPassY;
cv::Mat lowPassX2, lowPassY2;
Vx = cv::Mat::zeros(inputImage.size(), inputImage.type());
Vx.copyTo(Vy);
Vx.copyTo(theta);
Vx.copyTo(lowPassX);
Vx.copyTo(lowPassY);
Vx.copyTo(lowPassX2);
Vx.copyTo(lowPassY2);
// estimate the local orientation of each block
int blockSize = 16;
for(int i = blockSize/2; i < inputImage.rows - blockSize/2; i+=blockSize)
{
for(int j = blockSize / 2; j < inputImage.cols - blockSize/2; j+= blockSize)
{
float sum1 = 0.0;
float sum2 = 0.0;
for ( int u = i - blockSize/2; u < i + blockSize/2; u++)
{
for( int v = j - blockSize/2; v < j+blockSize/2; v++)
{
sum1 += grad_x.at<float>(u,v) * grad_y.at<float>(u,v);
sum2 += (grad_x.at<float>(u,v)*grad_x.at<float>(u,v)) * (grad_y.at<float>(u,v)*grad_y.at<float>(u,v));
}
}
Vx.at<float>(i,j) = sum1;
Vy.at<float>(i,j) = sum2;
double calc = 0.0;
if(sum1 != 0 && sum2 != 0)
{
calc = 0.5 * atan(Vy.at<float>(i,j) / Vx.at<float>(i,j));
}
theta.at<float>(i,j) = calc;
// Perform low-pass filtering
float angle = 2 * calc;
lowPassX.at<float>(i,j) = cos(angle * pi / 180);
lowPassY.at<float>(i,j) = sin(angle * pi / 180);
float sum3 = 0.0;
float sum4 = 0.0;
for(int u = -lowPassSize / 2; u < lowPassSize / 2; u++)
{
for(int v = -lowPassSize / 2; v < lowPassSize / 2; v++)
{
sum3 += inputImage.at<float>(u,v) * lowPassX.at<float>(i - u*lowPassSize, j - v * lowPassSize);
sum4 += inputImage.at<float>(u, v) * lowPassY.at<float>(i - u*lowPassSize, j - v * lowPassSize);
}
}
lowPassX2.at<float>(i,j) = sum3;
lowPassY2.at<float>(i,j) = sum4;
float calc2 = 0.0;
if(sum3 != 0 && sum4 != 0)
{
calc2 = 0.5 * atan(lowPassY2.at<float>(i, j) / lowPassX2.at<float>(i, j)) * 180 / pi;
}
orientationMat.at<float>(i,j) = calc2;
}
}
return orientationMat;
}
I've already searched a lot on the web, but almost all of them are in Matlab. And there exist very few ones using OpenCV, but they didn't help me either. I sincerely hope someone could go through my code and point out any error to help. Thank you in advance.
Update
Here are the steps that I followed according to the paper:
Obtain normalized image G.
Divide G into blocks of size wxw (16x16).
Compute the x and y gradients at each pixel (i,j).
Estimate the local orientation of each block centered at pixel (i,j) using equations:
Perform low-pass filtering to remove noise. For that, convert the orientation image into a continuous vector field defined as:
where W is a two-dimensional low-pass filter, and w(phi) x w(phi) is its size, which equals to 5.
Finally, compute the local ridge orientation at (i,j) using:
Update2
This is the output of orientationMat after changing the mat type to CV_16SC1 in Sobel operation as Micka suggested:
Maybe it's too late for me to answer, but anyway somebody could read this later and solve the same problem.
I've been working for a while in the same algorithm, same method you posted... But there's some writting errors when the papper was redacted (I guess). After fighting a lot with the equations I found this errors by looking other similar works.
Here is what worked for me...
Vy(i, j) = 2*dx(u,v)*dy(u,v)
Vx(i,j) = dx(u,v)^2 - dy(u,v)^2
O(i,j) = 0.5*arctan(Vy(i,j)/Vx(i,j)
(Excuse me I wasn't able to post images, so I wrote the modified ecuations. Remeber "u" and "v" are positions of the summation across the BlockSize by BlockSize window)
The first thing and most important (obviously) are the equations, I saw that in different works this expressions were really different and in every one they talked about the same algorithm of Hong et al.
The Key is finding the Least Mean Square (First 3 equations) of the gradients (Vx and Vy), I provided the corrected formulas above for this ation. Then you can compute angle theta for the non overlapping window (16x16 size recommended in the papper), after that the algorithm says you must calculate the magnitud of the doubled angle in "x" and "y" directions (Phi_x and Phi_y).
Phi_x(i,j) = V(i,j) * cos(2*O(i,j))
Phi_y(i,j) = V(i,j) * sin(2*O(i,j))
Magnitud is just:
V = sqrt(Vx(i,j)^2 + Vy(i,j)^2)
Note that in the related work doesn't mention that you have to use the gradient magnitud, but it make sense (for me) in doing it. After all this corrections you can apply the low pass filter to Phi_x and Phi_y, I used a simple Mask of size 5x5 to average this magnitudes (something like medianblur() of opencv).
Last thing is to calculate new angle, that is the average of the 25ith neighbors in the O(i,j) image, for this you just have to:
O'(i,j) = 0.5*arctan(Phi_y/Phi_x)
We're just there... All this just for calculating the angle of the NORMAL VECTOR TO THE RIDGES DIRECTIONS (O'(i,j)) in the BlockSize by BlockSize non overlapping window, what does it mean? it means that the angle we just calculated is perpendicular to the ridges, in simple words we just calculated the angle of the riges plus 90 degrees... To get the angle we need, we just have to substract to the obtained angle 90°.
To draw the lines we need to have an initial point (X0, Y0) and a final point(X1, Y1). For that imagine a circle centered on (X0, Y0) with a radious of "r":
x0 = i + blocksize/2
y0 = j + blocksize/2
r = blocksize/2
Note we add i and j to the first coordinates becouse the window is moving and we are gonna draw the line starting from the center of the non overlaping window, so we can't use just the center of the non overlaping window.
Then to calculate the end coordinates to draw a line we can just have to use a right triangle so...
X1 = r*cos(O'(i,j)-90°)+X0
Y1 = r*sin(O'(i,j)-90°)+Y0
X2 = X0-r*cos(O'(i,j)-90°)
Y2 = Y0-r*cos(O'(i,j)-90°)
Then just use opencv line function, where initial Point is (X0,Y0) and final Point is (X1, Y1). Additional to it, I drawed the windows of 16x16 and computed the oposite points of X1 and Y1 (X2 and Y2) to draw a line of the entire window.
Hope this help somebody.
My results...
Main function:
Mat mat = imread("nwmPa.png",0);
mat.convertTo(mat, CV_32F, 1.0/255, 0);
Normalize(mat);
int blockSize = 6;
int height = mat.rows;
int width = mat.cols;
Mat orientationMap;
orientation(mat, orientationMap, blockSize);
Normalize:
void Normalize(Mat & image)
{
Scalar mean, dev;
meanStdDev(image, mean, dev);
double M = mean.val[0];
double D = dev.val[0];
for(int i(0) ; i<image.rows ; i++)
{
for(int j(0) ; j<image.cols ; j++)
{
if(image.at<float>(i,j) > M)
image.at<float>(i,j) = 100.0/255 + sqrt( 100.0/255*pow(image.at<float>(i,j)-M,2)/D );
else
image.at<float>(i,j) = 100.0/255 - sqrt( 100.0/255*pow(image.at<float>(i,j)-M,2)/D );
}
}
}
Orientation map:
void orientation(const Mat &inputImage, Mat &orientationMap, int blockSize)
{
Mat fprintWithDirectionsSmoo = inputImage.clone();
Mat tmp(inputImage.size(), inputImage.type());
Mat coherence(inputImage.size(), inputImage.type());
orientationMap = tmp.clone();
//Gradiants x and y
Mat grad_x, grad_y;
// Sobel(inputImage, grad_x, CV_32F, 1, 0, 3, 1, 0, BORDER_DEFAULT);
// Sobel(inputImage, grad_y, CV_32F, 0, 1, 3, 1, 0, BORDER_DEFAULT);
Scharr(inputImage, grad_x, CV_32F, 1, 0, 1, 0);
Scharr(inputImage, grad_y, CV_32F, 0, 1, 1, 0);
//Vector vield
Mat Fx(inputImage.size(), inputImage.type()),
Fy(inputImage.size(), inputImage.type()),
Fx_gauss,
Fy_gauss;
Mat smoothed(inputImage.size(), inputImage.type());
// Local orientation for each block
int width = inputImage.cols;
int height = inputImage.rows;
int blockH;
int blockW;
//select block
for(int i = 0; i < height; i+=blockSize)
{
for(int j = 0; j < width; j+=blockSize)
{
float Gsx = 0.0;
float Gsy = 0.0;
float Gxx = 0.0;
float Gyy = 0.0;
//for check bounds of img
blockH = ((height-i)<blockSize)?(height-i):blockSize;
blockW = ((width-j)<blockSize)?(width-j):blockSize;
//average at block WхW
for ( int u = i ; u < i + blockH; u++)
{
for( int v = j ; v < j + blockW ; v++)
{
Gsx += (grad_x.at<float>(u,v)*grad_x.at<float>(u,v)) - (grad_y.at<float>(u,v)*grad_y.at<float>(u,v));
Gsy += 2*grad_x.at<float>(u,v) * grad_y.at<float>(u,v);
Gxx += grad_x.at<float>(u,v)*grad_x.at<float>(u,v);
Gyy += grad_y.at<float>(u,v)*grad_y.at<float>(u,v);
}
}
float coh = sqrt(pow(Gsx,2) + pow(Gsy,2)) / (Gxx + Gyy);
//smoothed
float fi = 0.5*fastAtan2(Gsy, Gsx)*CV_PI/180;
Fx.at<float>(i,j) = cos(2*fi);
Fy.at<float>(i,j) = sin(2*fi);
//fill blocks
for ( int u = i ; u < i + blockH; u++)
{
for( int v = j ; v < j + blockW ; v++)
{
orientationMap.at<float>(u,v) = fi;
Fx.at<float>(u,v) = Fx.at<float>(i,j);
Fy.at<float>(u,v) = Fy.at<float>(i,j);
coherence.at<float>(u,v) = (coh<0.85)?1:0;
}
}
}
} ///for
GaussConvolveWithStep(Fx, Fx_gauss, 5, blockSize);
GaussConvolveWithStep(Fy, Fy_gauss, 5, blockSize);
for(int m = 0; m < height; m++)
{
for(int n = 0; n < width; n++)
{
smoothed.at<float>(m,n) = 0.5*fastAtan2(Fy_gauss.at<float>(m,n), Fx_gauss.at<float>(m,n))*CV_PI/180;
if((m%blockSize)==0 && (n%blockSize)==0){
int x = n;
int y = m;
int ln = sqrt(2*pow(blockSize,2))/2;
float dx = ln*cos( smoothed.at<float>(m,n) - CV_PI/2);
float dy = ln*sin( smoothed.at<float>(m,n) - CV_PI/2);
arrowedLine(fprintWithDirectionsSmoo, Point(x, y+blockH), Point(x + dx, y + blockW + dy), Scalar::all(255), 1, CV_AA, 0, 0.06*blockSize);
// qDebug () << Fx_gauss.at<float>(m,n) << Fy_gauss.at<float>(m,n) << smoothed.at<float>(m,n);
// imshow("Orientation", fprintWithDirectionsSmoo);
// waitKey(0);
}
}
}///for2
normalize(orientationMap, orientationMap,0,1,NORM_MINMAX);
imshow("Orientation field", orientationMap);
orientationMap = smoothed.clone();
normalize(smoothed, smoothed, 0, 1, NORM_MINMAX);
imshow("Smoothed orientation field", smoothed);
imshow("Coherence", coherence);
imshow("Orientation", fprintWithDirectionsSmoo);
}
seems nothing forgot )
I have read your code thoroughly and found that you have made a mistake while calculating sum3 and sum4:
sum3 += inputImage.at<float>(u,v) * lowPassX.at<float>(i - u*lowPassSize, j - v * lowPassSize);
sum4 += inputImage.at<float>(u, v) * lowPassY.at<float>(i - u*lowPassSize, j - v * lowPassSize);
instead of inputImage you should use a low pass filter.

Shortest distance between points algorithm

Given a set of points on a plane, find the shortest line segment formed by any two of these points.
How can I do that? The trivial way is obviously to calculate each distance, but I need another algorithm to compare.
http://en.wikipedia.org/wiki/Closest_pair_of_points
The problem can be solved in O(n log n) time using the recursive divide and conquer approach, e.g., as follows:
Sort points along the x-coordinate
Split the set of points into two equal-sized subsets by a vertical line x = xmid
Solve the problem recursively in the left and right subsets. This will give the left-side and right-side minimal distances dLmin and dRmin respectively.
Find the minimal distance dLRmin among the pair of points in which one point lies on the left of the dividing vertical and the second point lies to the right.
The final answer is the minimum among dLmin, dRmin, and dLRmin.
I can't immediately think of a quicker alternative than the brute force technique (although there must be plenty) but whatever algorithm you choose don't calculate the distance between each point. If you need to compare distances just compare the squares of the distances to avoid the expensive and entirely redundant square root.
One possibility would be to sort the points by their X coordinates (or the Y -- doesn't really matter which, just be consistent). You can then use that to eliminate comparisons to many of the other points. When you're looking at the distance between point[i] and point[j], if the X distance alone is greater than your current shortest distance, then point[j+1]...point[N] can be eliminated as well (assuming i<j -- if j<i, then it's point[0]...point[i] that are eliminated).
If your points start out as polar coordinates, you can use a variation of the same thing -- sort by distance from the origin, and if the difference in distance from the origin is greater than your current shortest distance, you can eliminate that point, and all the others that are farther from (or closer to) the origin than the one you're currently considering.
You can extract the closest pair in linear time from the Delaunay triangulation and conversly from Voronoi diagram.
There is a standard algorithm for this problem, here you can find it:
http://www.cs.mcgill.ca/~cs251/ClosestPair/ClosestPairPS.html
And here is my implementation of this algo, sorry it's without comments:
static long distSq(Point a, Point b) {
return ((long) (a.x - b.x) * (long) (a.x - b.x) + (long) (a.y - b.y) * (long) (a.y - b.y));
}
static long ccw(Point p1, Point p2, Point p3) {
return (long) (p2.x - p1.x) * (long) (p3.y - p1.y) - (long) (p2.y - p1.y) * (long) (p3.x - p1.x);
}
static List<Point> convexHull(List<Point> P) {
if (P.size() < 3) {
//WTF
return null;
}
int k = 0;
for (int i = 0; i < P.size(); i++) {
if (P.get(i).y < P.get(k).y || (P.get(i).y == P.get(k).y && P.get(i).x < P.get(k).x)) {
k = i;
}
}
Collections.swap(P, k, P.size() - 1);
final Point o = P.get(P.size() - 1);
P.remove(P.size() - 1);
Collections.sort(P, new Comparator() {
public int compare(Object o1, Object o2) {
Point a = (Point) o1;
Point b = (Point) o2;
long t1 = (long) (a.y - o.y) * (long) (b.x - o.x) - (long) (a.x - o.x) * (long) (b.y - o.y);
if (t1 == 0) {
long tt = distSq(o, a);
tt -= distSq(o, b);
if (tt > 0) {
return 1;
} else if (tt < 0) {
return -1;
}
return 0;
}
if (t1 < 0) {
return -1;
}
return 1;
}
});
List<Point> hull = new ArrayList<Point>();
hull.add(o);
hull.add(P.get(0));
for (int i = 1; i < P.size(); i++) {
while (hull.size() >= 2 &&
ccw(hull.get(hull.size() - 2), hull.get(hull.size() - 1), P.get(i)) <= 0) {
hull.remove(hull.size() - 1);
}
hull.add(P.get(i));
}
return hull;
}
static long nearestPoints(List<Point> P, int l, int r) {
if (r - l == P.size()) {
Collections.sort(P, new Comparator() {
public int compare(Object o1, Object o2) {
int t = ((Point) o1).x - ((Point) o2).x;
if (t == 0) {
return ((Point) o1).y - ((Point) o2).y;
}
return t;
}
});
}
if (r - l <= 100) {
long ret = distSq(P.get(l), P.get(l + 1));
for (int i = l; i < r; i++) {
for (int j = i + 1; j < r; j++) {
ret = Math.min(ret, distSq(P.get(i), P.get(j)));
}
}
return ret;
}
int c = (l + r) / 2;
long lD = nearestPoints(P, l, c);
long lR = nearestPoints(P, c + 1, r);
long ret = Math.min(lD, lR);
Set<Point> set = new TreeSet<Point>(new Comparator<Point>() {
public int compare(Point o1, Point o2) {
int t = o1.y - o2.y;
if (t == 0) {
return o1.x - o2.x;
}
return t;
}
});
for (int i = l; i < r; i++) {
set.add(P.get(i));
}
int x = P.get(c).x;
double theta = Math.sqrt(ret);
Point[] Q = set.toArray(new Point[0]);
Point[] T = new Point[Q.length];
int pos = 0;
for (int i = 0; i < Q.length; i++) {
if (Q[i].x - x + 1 > theta) {
continue;
}
T[pos++] = Q[i];
}
for (int i = 0; i < pos; i++) {
for (int j = 1; j < 7 && i + j < pos; j++) {
ret = Math.min(ret, distSq(T[i], T[j + i]));
}
}
return ret;
}
From your question it is not clear if you are looking for the distance of the segment, or the segment itself. Assuming you are looking for the distance (the segment in then a simple modification, once you know which are the two points whose distance is minimal), given 5 points, numbered from 1 to 5, you need to
compare 1 with 2,3,4,5, then
compare 2, with 3,4,5, then
compare 3 with 4,5, then
compare 4 with 5.
If I am not wrong, given the commutativity of the distance you do not need to perform other comparisons.
In python, may sound like something
import numpy as np
def find_min_distance_of_a_cloud(cloud):
"""
Given a cloud of points in the n-dim space, provides the minimal distance.
:param cloud: list of nX1-d vectors, as ndarray.
:return:
"""
dist_min = None
for i, p_i in enumerate(cloud[:-1]):
new_dist_min = np.min([np.linalg.norm(p_i - p_j) for p_j in cloud[(i + 1):]])
if dist_min is None or dist_min > new_dist_min:
dist_min = new_dist_min
return dist_min
That can be tested with something like the following code:
from nose.tools import assert_equal
def test_find_min_distance_of_a_cloud_1pt():
cloud = [np.array((1, 1, 1)), np.array((0, 0, 0))]
min_out = find_min_distance_of_a_cloud(cloud)
assert_equal(min_out, np.sqrt(3))
def test_find_min_distance_of_a_cloud_5pt():
cloud = [np.array((0, 0, 0)),
np.array((1, 1, 0)),
np.array((2, 1, 4)),
np.array((3, 4, 4)),
np.array((5, 3, 4))]
min_out = find_min_distance_of_a_cloud(cloud)
assert_equal(min_out, np.sqrt(2))
If more than two points can have the same minimal distance, and you are looking for the segments, you need again to modify the proposed code, and the output will be the list of points whose distance is minimal (or couple of points). Hope it helps!
Here is a code example demonstrating how to implement the divide and conquer algorithm. For the algorithm to work, the points x-values must be unique. The non-obvious part of the algorithm is that you must sort both along the x and the y-axis. Otherwise you can't find minimum distances over the split seam in linear time.
from collections import namedtuple
from itertools import combinations
from math import sqrt
IxPoint = namedtuple('IxPoint', ['x', 'y', 'i'])
ClosestPair = namedtuple('ClosestPair', ['distance', 'i', 'j'])
def check_distance(cp, p1, p2):
xd = p1.x - p2.x
yd = p1.y - p2.y
dist = sqrt(xd * xd + yd * yd)
if dist < cp.distance:
return ClosestPair(dist, p1.i, p2.i)
return cp
def closest_helper(cp, xs, ys):
n = len(xs)
if n <= 3:
for p1, p2 in combinations(xs, 2):
cp = check_distance(cp, p1, p2)
return cp
# Divide
mid = n // 2
mid_x = xs[mid].x
xs_left = xs[:mid]
xs_right = xs[mid:]
ys_left = [p for p in ys if p.x < mid_x]
ys_right = [p for p in ys if p.x >= mid_x]
# Conquer
cp_left = closest_helper(cp, xs_left, ys_left)
cp_right = closest_helper(cp, xs_right, ys_right)
if cp_left.distance < cp_right.distance:
cp = cp_left
else:
cp = cp_right
ys_strip = [p for p in ys if abs(p.x - mid_x) < cp.distance]
n_strip = len(ys_strip)
for i in range(n_strip):
for j in range(i + 1, n_strip):
p1, p2 = ys_strip[j], ys_strip[i]
if not p1.y - p2.y < cp.distance:
break
cp = check_distance(cp, p1, p2)
return cp
def closest_pair(points):
points = [IxPoint(p[0], p[1], i)
for (i, p) in enumerate(points)]
xs = sorted(points, key = lambda p: p.x)
xs = [IxPoint(p.x + i * 1e-8, p.y, p.i)
for (i, p) in enumerate(xs)]
ys = sorted(xs, key = lambda p: p.y)
cp = ClosestPair(float('inf'), -1, -1)
return closest_helper(cp, xs, ys)

Resources