Detect and fix text skew by rotating image - image

Is there a way (using something like OpenCV) to detect text skew and correct it by rotating the image? Pretty much like this?
Rotating an image seems easy enough if you know the angle, but for the images I'm processing, I wont...it will need to be detected somehow.

Based on your above comment, here is the code based on the tutorial here, working fine for the above image,
Source
Rotated
Mat src=imread("text.png",0);
Mat thr,dst;
threshold(src,thr,200,255,THRESH_BINARY_INV);
imshow("thr",thr);
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = thr.begin<uchar>();
cv::Mat_<uchar>::iterator end = thr.end<uchar>();
for (; it != end; ++it)
if (*it)
points.push_back(it.pos());
cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
cv::Mat rot_mat = cv::getRotationMatrix2D(box.center, box.angle, 1);
//cv::Mat rotated(src.size(),src.type(),Scalar(255,255,255));
Mat rotated;
cv::warpAffine(src, rotated, rot_mat, src.size(), cv::INTER_CUBIC);
imshow("rotated",rotated);
Edit:
Also see the answer here , might be helpful.

Here's an implementation of the Projection Profile Method algorithm for skew angle estimation. Various angle points are projected into an accumulator array where the skew angle can be defined as the angle of projection within a search interval that maximizes alignment. The idea is to rotate the image at various angles and generate a histogram of pixels for each iteration. To determine the skew angle, we compare the maximum difference between peaks and using this skew angle, rotate the image to correct the skew.
Input
Result
Skew angle: -5
import cv2
import numpy as np
from scipy.ndimage import interpolation as inter
def correct_skew(image, delta=1, limit=5):
def determine_score(arr, angle):
data = inter.rotate(arr, angle, reshape=False, order=0)
histogram = np.sum(data, axis=1, dtype=float)
score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float)
return histogram, score
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
scores = []
angles = np.arange(-limit, limit + delta, delta)
for angle in angles:
histogram, score = determine_score(thresh, angle)
scores.append(score)
best_angle = angles[scores.index(max(scores))]
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
corrected = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, \
borderMode=cv2.BORDER_REPLICATE)
return best_angle, corrected
if __name__ == '__main__':
image = cv2.imread('1.png')
angle, corrected = correct_skew(image)
print('Skew angle:', angle)
cv2.imshow('corrected', corrected)
cv2.waitKey()
Note: You may have to adjust the delta or limit values depending on the image. The delta value controls iteration step, it will iterate up until the limit which controls the maximum angle. This method is straightforward by iteratively checking each angle + delta and currently only works to correct skew in the range of +/- 5 degrees. If you need to correct at a larger angle, adjust the limit value.

I would provide javacv for your reference.
package com.test13;
import org.opencv.core.*;
import org.opencv.imgproc.Imgproc;
import org.opencv.imgcodecs.Imgcodecs;
public class EdgeDetection {
static{ System.loadLibrary(Core.NATIVE_LIBRARY_NAME); }
public static void main( String[] args ) throws Exception{
Mat src = Imgcodecs.imread("src//data//inclined_text.jpg");
Mat src_gray = new Mat();
Imgproc.cvtColor(src, src_gray, Imgproc.COLOR_BGR2GRAY);
Imgcodecs.imwrite("src//data//inclined_text_src_gray.jpg", src_gray);
Mat output = new Mat();
Core.bitwise_not(src_gray, output);
Imgcodecs.imwrite("src//data//inclined_text_output.jpg", output);
Mat points = Mat.zeros(output.size(),output.type());
Core.findNonZero(output, points);
MatOfPoint mpoints = new MatOfPoint(points);
MatOfPoint2f points2f = new MatOfPoint2f(mpoints.toArray());
RotatedRect box = Imgproc.minAreaRect(points2f);
Mat src_squares = src.clone();
Mat rot_mat = Imgproc.getRotationMatrix2D(box.center, box.angle, 1);
Mat rotated = new Mat();
Imgproc.warpAffine(src_squares, rotated, rot_mat, src_squares.size(), Imgproc.INTER_CUBIC);
Imgcodecs.imwrite("src//data//inclined_text_squares_rotated.jpg",rotated);
}
}

private fun main(){
val bmp:Bitmap? = null //Any bitmap (if you are working with bitmap)
var mRgba = Mat() // else you can direct use MAT on onCameraFrame
val mGray = Mat()
val bmp32: Bitmap = bmp.copy(Bitmap.Config.ARGB_8888, true)
Utils.bitmapToMat(bmp32, mRgba)
Imgproc.cvtColor(mRgba, mGray, Imgproc.COLOR_BGR2GRAY)
mRgba = makeOrientationCorrection(mRgba,mGray)// here actual magic starts
Imgproc.cvtColor(mRgba, mGray, Imgproc.COLOR_BGR2GRAY)
val bmpOutX = Bitmap.createBitmap(
mRgba.cols(),
mRgba.rows(),
Bitmap.Config.ARGB_8888
)
Utils.matToBitmap(mRgba, bmpOutX)
binding.imagePreview.setImageBitmap(bmpOutX!!)
}
private fun makeOrientationCorrection(mRGBA:Mat, mGRAY:Mat):Mat{
val dst = Mat()
val cdst = Mat()
val cdstP: Mat
Imgproc.Canny(mGRAY, dst, 50.0, 200.0, 3, false)
Imgproc.cvtColor(dst, cdst, Imgproc.COLOR_GRAY2BGR)
cdstP = cdst.clone()
val linesP = Mat()
Imgproc.HoughLinesP(dst, linesP, 1.0, Math.PI/180, 50, 50.0, 10.0)
var biggestLineX1 = 0.0
var biggestLineY1 = 0.0
var biggestLineX2 = 0.0
var biggestLineY2 = 0.0
var biggestLine = 0.0
for (x in 0 until linesP.rows()) {
val l = linesP[x, 0]
Imgproc.line(
cdstP, org.opencv.core.Point(l[0], l[1]),
org.opencv.core.Point(l[2], l[3]),
Scalar(0.0, 0.0, 255.0), 3, Imgproc.LINE_AA, 0)
}
for (x in 0 until linesP.rows()) {
val l = linesP[x, 0]
val x1 = l[0]
val y1 = l[1]
val x2 = l[2]
val y2 = l[3]
val lineHeight = sqrt(((x2 - x1).pow(2.0)) + ((y2 - y1).pow(2.0)))
if(biggestLine<lineHeight){
val angleOfRotationX1 = angleOf(PointF(x1.toFloat(),y1.toFloat()),PointF(x2.toFloat(),y2.toFloat()))
Log.e("angleOfRotationX1","$angleOfRotationX1")
if(angleOfRotationX1<45.0 || angleOfRotationX1>270.0){
biggestLine = lineHeight
if(angleOfRotationX1<45.0){
biggestLineX1 = x1
biggestLineY1 = y1
biggestLineX2 = x2
biggestLineY2 = y2
}
if(angleOfRotationX1>270.0){
biggestLineX1 = x2
biggestLineY1 = y2
biggestLineX2 = x1
biggestLineY2 = y1
}
}
}
if(x==linesP.rows()-1){
Imgproc.line(
cdstP, org.opencv.core.Point(biggestLineX1, biggestLineY1),
org.opencv.core.Point(biggestLineX2, biggestLineY2),
Scalar(255.0, 0.0, 0.0), 3, Imgproc.LINE_AA, 0)
}
}
var angle = angleOf(PointF(biggestLineX1.toFloat(),biggestLineY1.toFloat()),PointF(biggestLineX2.toFloat(),biggestLineY2.toFloat()))
Log.e("angleOfRotationX2","$angle")
angle -= (angle * 2)
return deskew(mRGBA,angle)
}
fun angleOf(p1: PointF, p2: PointF): Double {
val deltaY = (p1.y - p2.y).toDouble()
val deltaX = (p2.x - p1.x).toDouble()
val result = Math.toDegrees(Math.atan2(deltaY, deltaX))
return if (result < 0) 360.0 + result else result
}
private fun deskew(src:Mat, angle:Double):Mat{
val center = org.opencv.core.Point((src.width() / 2).toDouble(), (src.height() / 2).toDouble())
val scaleBy = if(angle<0){
1.0+((0.5*angle)/45)//max scale down by 0.50(50%) based on angle
}else{
1.0-((0.3*angle)/45)//max scale down by 0.50(50%) based on angle
}
Log.e("scaleBy",""+scaleBy)
val rotImage = Imgproc.getRotationMatrix2D(center, angle, scaleBy)
val size = Size(src.width().toDouble(), src.height().toDouble())
Imgproc.warpAffine(src, src, rotImage, size, Imgproc.INTER_LINEAR + Imgproc.CV_WARP_FILL_OUTLIERS)
return src
}
Make sure you run this "makeOrientationCorrection()" method on another thread. otherwise, UI won't update for 2-5 sec.

Related

C# EmguCV Resize Mat but keep bounds/resolution

I've tried many things, but all my attempts fails.
I need to resize a Gray image (2560x1440) to lower or higher resolution, then i need to set the bounds to the original size (2560x1440) but keep the resized image on the center.
I'm using EmguCV 4.3 and Mat, i tried many aproach and use of ROI on Mat constructor and a copyTo, but nothing work, it always set new Mat with the resized bounds
Example of the required:
Source image: (2560x1440)
50% resized, but keep same bounds as source (2560x1440)
300% resized, but keep same bounds as source (2560x1440)
Use WarpAffine to apply an affine transformation to the image. Using the transformation matrix you can apply scale and translate transformation. Rotation is also supported but not covered in my example. Translation values can also be negative.
The WrapAffine method has some more parameter with which you can play around.
public void Test()
{
var img = new Mat("Bmv60.png", ImreadModes.Grayscale);
Mat upscaled = GetContentScaled(img, 2.0, 0.5, 0, 0);
upscaled.Save("scaled1.png");
Mat downscaled = GetContentScaled(img, 0.5, 0.5, 0, 0);
downscaled.Save("scaled2.png");
}
private Mat GetContentScaled(Mat src, double xScale, double yScale, double xTrans, double yTrans, Inter interpolation = Inter.Linear)
{
var dst = new Mat(src.Size, src.Depth, src.NumberOfChannels);
var translateTransform = new Matrix<double>(2, 3)
{
[0, 0] = xScale, // xScale
[1, 1] = yScale, // yScale
[0, 2] = xTrans + (src.Width - src.Width * xScale) / 2.0, //x translation + compensation of x scaling
[1, 2] = yTrans + (src.Height - src.Height * yScale) / 2.0 // y translation + compensation of y scaling
};
CvInvoke.WarpAffine(src, dst, translateTransform, dst.Size, interpolation);
return dst;
}
I feel as if there should be a more elegant way to do this, however, I offer two extension methods:
static void CopyToCenter(this Image<Gray,byte> imgScr, Image<Gray, byte> imgDst)
{
int dx = (imgScr.Cols - imgDst.Cols) / 2;
int dy = (imgScr.Rows - imgDst.Rows) / 2;
byte[,,] scrData = imgScr.Data;
byte[,,] dstData = imgDst.Data;
for(int v = 0; v < imgDst.Rows; v++)
{
for (int u = 0; u < imgDst.Cols; u++)
{
dstData[v,u, 0] = scrData[v + dy, u + dx, 0];
}
}
}
static void CopyFromCenter(this Image<Gray, byte> imgDst, Image<Gray, byte> imgScr)
{
int dx = (imgDst.Cols - imgScr.Cols) / 2;
int dy = (imgDst.Rows - imgScr.Rows) / 2;
byte[,,] scrData = imgScr.Data;
byte[,,] dstData = imgDst.Data;
for (int v = 0; v < imgScr.Rows; v++)
{
for (int u = 0; u < imgScr.Cols; u++)
{
dstData[v + dy, u + dx, 0] = scrData[v, u, 0];
}
}
}
Which can use them like this:
static void Main(string[] args)
{
double scaleFactor = 0.8;
Image<Gray, byte> orginalImage = new Image<Gray, byte>("Bmv60.png");
Image<Gray, byte> scaledImage = orginalImage.Resize(scaleFactor, Inter.Linear);
Image<Gray, byte> outputImage = new Image<Gray, byte>(orginalImage.Size);
if(scaleFactor > 1)
{
scaledImage.CopyToCenter(outputImage);
}
else
{
outputImage.CopyFromCenter(scaledImage);
}
}
You didn't request a specific language, so I hope C# is useful.

How to a make a curved sheet (cube) in OpenSCAD?

How can I curve a sheet (cube)? I'd like to control the angle of the bend/curve.
e.g.
cube([50,50,2]);
You can rotate_extrude() an rectangle with the parameter angle. This requires the openscad version 2016.xx or newer, see documentation.
It is necessary to install a development snapshot, see download openscad
$fn= 360;
width = 10; // width of rectangle
height = 2; // height of rectangle
r = 50; // radius of the curve
a = 30; // angle of the curve
rotate_extrude(angle = a) translate([r, 0, 0]) square(size = [height, width], center = true);
looks like this:
The curve is defined by radius and angle. I think it is more realistic, to use other dimensions like length or dh in this sketch
and calculate radius and angle
$fn= 360;
w = 10; // width of rectangle
h = 2; // height of rectangle
l = 25; // length of chord of the curve
dh = 2; // delta height of the curve
module curve(width, height, length, dh) {
// calculate radius and angle
r = ((length/2)*(length/2) - dh*dh)/(2*dh);
a = asin((length/2)/r);
rotate_extrude(angle = a) translate([r, 0, 0]) square(size = [height, width], center = true);
}
curve(w, h, l, dh);
Edit 30.09.2019:
considering comment of Cfreitas, additionally moved the resulting shape to origin, so dimensions can be seen on axes of coordinates
$fn= 360;
w = 10; // width of rectangle
h = 2; // height of rectangle
l = 30; // length of chord of the curve
dh = 4; // delta height of the curve
module curve(width, height, length, dh) {
r = (pow(length/2, 2) + pow(dh, 2))/(2*dh);
a = 2*asin((length/2)/r);
translate([-(r -dh), 0, -width/2]) rotate([0, 0, -a/2]) rotate_extrude(angle = a) translate([r, 0, 0]) square(size = [height, width], center = true);
}
curve(w, h, l, dh);
and the result:
Edit 19.09.2020: There was a typo in the last edit: In the first 'translate' the local 'width' should be used instead of 'w'. Corrected it in the code above.
I can do it this way but it would be better if you could specify the bend/curve in #degrees as an argument to the function:
$fn=300;
module oval(w, h, height, center = false) {
scale([1, h/w, 1]) cylinder(h=height, r=w, center=center);
}
module curved(w,l,h) {
difference() {
oval(w,l,h);
translate([0.5,-1,-1]) color("red") oval(w,l+2,h+2);
}
}
curved(10,20,30);
Using the concept used by a_manthey_67, corrected the math and centered (aligned the chord with y axis) the resulting object:
module bentCube(width, height, length, dh) {
// calculate radius and angle
r = (length*length + 4*dh*dh)/(8*dh);
a = 2*asin(length/(2*r));
translate([-r,0,0]) rotate([0,0,-a/2])
rotate_extrude(angle = a) translate([r, 0, 0]) square(size = [height, width], center = true);}
Or, if you just want something with a fixed length, and a certain bent angle do this:
module curve(width, height, length, a) {
if( a > 0 ) {
r = (360 * (length/a)) / (2 * pi);
translate( [-r-height/2,0,0] )
rotate_extrude(angle = a)
translate([r, 0, 0])
square(size = [height, width], center = false);
} else {
translate( [-height/2,0,width] )
rotate( a=270, v=[1,0,0] )
linear_extrude( height = length )
square(size = [height, width], center = false);
}
}
The if (a > 0) statement is needed to make an exception when the bending angle is 0 (which, if drawing a curved surface, would result in an infinite radius).
Animated GIF here

Incorrect behavior of image undistortion algorithm

I'm trying to create a program that receives a photograph of a surface from a certain angle and position, and generates an image of what an isometric projection of the plane would look like. For example, given a photo of a checkerboard
and information about the positioning and properties of the camera, it could reconstruct a section of the undistorted pattern
My approach has been divided into two parts. The first part is to create four rays, coming from the camera, following the four corners of its field of view. I compute where these rays intersect with the plane, to form the quadrangle of the area of the plane that the camera can see, like this:
The second part is to render an isomorphic projection of the plane with the textured quadrangle. I divide the quadrangle into two triangles, then for each pixel on the rendering, I convert the cartesian coordinates into barymetric coordinates relative to each triangle, then convert it back into cartesian coordinates relative to a corresponding triangle that consumes half of the photograph, so that I can sample a color.
(I am aware that this could be done more efficiently with OpenGL, but I would like to not use it for logistical reasons. I am also aware that the quality will be affected by lack of interpolation, that does not matter for this task.)
I am testing the program with some data, but the rendering does not occur as intended. Here is the photograph:
And here is the program output:
I believe that the problem is occurring in the quadrangle rendering, because I have graphed the projected vertices, and they appear to be correct:
I am by no means an expert in computer graphics, so I would very much appreciate if someone had any idea what would cause this problem. Here is the relevant code:
public class ImageProjector {
private static final EquationSystem ground = new EquationSystem(0, 1, 0, 0);
private double fov;
private double aspectRatio;
private vec3d position;
private double xAngle;
private double yAngle;
private double zAngle;
public ImageProjector(double fov, double aspectRatio, vec3d position, double xAngle, double yAngle, double zAngle) {
this.fov = fov;
this.aspectRatio = aspectRatio;
this.position = position;
this.xAngle = xAngle;
this.yAngle = yAngle;
this.zAngle = zAngle;
}
public vec3d[] computeVertices() {
return new vec3d[] {
computeVertex(1, 1),
computeVertex(1, -1),
computeVertex(-1, -1),
computeVertex(-1, 1)
};
}
private vec3d computeVertex(int horizCoef, int vertCoef) {
vec3d p2 = new vec3d(tan(fov / 2) * horizCoef, tan((fov / 2) / aspectRatio) * vertCoef, 1);
p2 = p2.rotateXAxis(xAngle);
p2 = p2.rotateYAxis(yAngle);
p2 = p2.rotateZAxis(zAngle);
if (p2.y > 0) {
throw new RuntimeException("sky is visible to camera: " + p2);
}
p2 = p2.plus(position);
//System.out.println("passing through " + p2);
EquationSystem line = new LineBuilder(position, p2).build();
return new vec3d(line.add(ground).solveVariables());
}
}
public class barypoint {
public barypoint(double u, double v, double w) {
this.u = u;
this.v = v;
this.w = w;
}
public final double u;
public final double v;
public final double w;
public barypoint(vec2d p, vec2d a, vec2d b, vec2d c) {
vec2d v0 = b.minus(a);
vec2d v1 = c.minus(a);
vec2d v2 = p.minus(a);
double d00 = v0.dotProduct(v0);
double d01 = v0.dotProduct(v1);
double d11 = v1.dotProduct(v1);
double d20 = v2.dotProduct(v0);
double d21 = v2.dotProduct(v1);
double denom = d00 * d11 - d01 * d01;
v = (d11 * d20 - d01 * d21) / denom;
w = (d00 * d21 - d01 * d20) / denom;
u = 1.0 - v - w;
}
public barypoint(vec2d p, Triangle triangle) {
this(p, triangle.a, triangle.b, triangle.c);
}
public vec2d toCartesian(vec2d a, vec2d b, vec2d c) {
return new vec2d(
u * a.x + v * b.x + w * c.x,
u * a.y + v * b.y + w * c.y
);
}
public vec2d toCartesian(Triangle triangle) {
return toCartesian(triangle.a, triangle.b, triangle.c);
}
}
public class ImageTransposer {
private BufferedImage source;
private BufferedImage receiver;
public ImageTransposer(BufferedImage source, BufferedImage receiver) {
this.source = source;
this.receiver = receiver;
}
public void transpose(Triangle sourceCoords, Triangle receiverCoords) {
int xMin = (int) Double.min(Double.min(receiverCoords.a.x, receiverCoords.b.x), receiverCoords.c.x);
int xMax = (int) Double.max(Double.max(receiverCoords.a.x, receiverCoords.b.x), receiverCoords.c.x);
int yMin = (int) Double.min(Double.min(receiverCoords.a.y, receiverCoords.b.y), receiverCoords.c.y);
int yMax = (int) Double.max(Double.max(receiverCoords.a.y, receiverCoords.b.y), receiverCoords.c.y);
for (int x = xMin; x <= xMax; x++) {
for (int y = yMin; y <= yMax; y++) {
vec2d p = new vec2d(x, y);
if (receiverCoords.contains(p) && p.x >= 0 && p.y >= 0 && p.x < receiver.getWidth() && y < receiver.getHeight()) {
barypoint bary = new barypoint(p, receiverCoords);
vec2d sp = bary.toCartesian(sourceCoords);
if (sp.x >= 0 && sp.y >= 0 && sp.x < source.getWidth() && sp.y < source.getHeight()) {
receiver.setRGB(x, y, source.getRGB((int) sp.x, (int) sp.y));
}
}
}
}
}
}
public class ProjectionRenderer {
private String imagePath;
private BufferedImage mat;
private vec3d[] vertices;
private vec2d pos;
private double scale;
private int width;
private int height;
public boolean error = false;
public ProjectionRenderer(String image, BufferedImage mat, vec3d[] vertices, vec3d pos, double scale, int width, int height) {
this.imagePath = image;
this.mat = mat;
this.vertices = vertices;
this.pos = new vec2d(pos.x, pos.z);
this.scale = scale;
this.width = width;
this.height = height;
}
public void run() {
try {
BufferedImage image = ImageIO.read(new File(imagePath));
vec2d[] transVerts = Arrays.stream(vertices)
.map(v -> new vec2d(v.x, v.z))
.map(v -> v.minus(pos))
.map(v -> v.multiply(scale))
.map(v -> v.plus(new vec2d(mat.getWidth() / 2, mat.getHeight() / 2)))
// this fixes the image being upside down
.map(v -> new vec2d(v.x, mat.getHeight() / 2 + (mat.getHeight() / 2 - v.y)))
.toArray(vec2d[]::new);
System.out.println(Arrays.toString(transVerts));
Triangle sourceTri1 = new Triangle(
new vec2d(0, 0),
new vec2d(image.getWidth(), 0),
new vec2d(0, image.getHeight())
);
Triangle sourceTri2 = new Triangle(
new vec2d(image.getWidth(), image.getHeight()),
new vec2d(0, image.getHeight()),
new vec2d(image.getWidth(), 0)
);
Triangle destTri1 = new Triangle(
transVerts[3],
transVerts[0],
transVerts[2]
);
Triangle destTri2 = new Triangle(
transVerts[1],
transVerts[2],
transVerts[0]
);
ImageTransposer transposer = new ImageTransposer(image, mat);
System.out.println("transposing " + sourceTri1 + " -> " + destTri1);
transposer.transpose(sourceTri1, destTri1);
System.out.println("transposing " + sourceTri2 + " -> " + destTri2);
transposer.transpose(sourceTri2, destTri2);
} catch (IOException e) {
e.printStackTrace();
error = true;
}
}
}
The reason it's not working is because your transpose function works entirely with 2D co-ordinates, therefore it cannot compensate for the image distortion resulting from 3D perspective. You have effectively implemented a 2D affine transformation. Parallel lines remain parallel, which they do not under a 3D perspective transform. If you draw a straight line between two points on your triangle, you can linearly interpolate between them by linearly interpolating the barycentric co-ordinates, and vice versa.
To take Z into account, you can keep the barycentric co-ordinate approach, but provide a Z co-ordinate for each point in sourceCoords. The trick is to interpolate between 1/Z values (which can be linearly interpolated in a perspective image) instead of interpolating Z itself. So instead of interpolating what are effectively the texture co-ordinates for each point, interpolate the texture co-ordinate divided by Z, along with inverse Z, and interpolate all of those using your barycentric system. Then divide by inverse Z before doing your texture lookup to get texture co-ordinates back.
You could do that like this (assume a b c contain an extra z co-ordinate giving distance from camera):
public vec3d toCartesianInvZ(vec3d a, vec3d b, vec3d c) {
// put some asserts in to check for z = 0 to avoid div by zero
return new vec3d(
u * a.x/a.z + v * b.x/b.z + w * c.x/c.z,
u * a.y/a.z + v * b.y/b.z + w * c.y/c.z,
u * 1/a.z + v * 1/b.z + w * 1/c.z
);
}
(You could obviously speed up/simplify this by pre-computing all those divides and storing in sourceCoords, and just doing regular barycentric interpolation in 3D)
Then after you call it in transpose, divide by inv Z to get the texture co-ords back:
vec3d spInvZ = bary.toCartesianInvZ(sourceCoords);
vec2d sp = new vec2d(spInvZ.x / spInvZ.z, spInvZ.y / spInvZ.z);
etc. The Z co-ordinate that you need is the distance of the point in 3D space from the camera position, in the direction the camera is pointing. You can compute it with a dot product if you aren't getting it some other way:
float z = point.subtract(camera_pos).dot(camera_direction);
etc

Binary Image "Lines-of-Sight" Edge Detection

Consider this binary image:
A normal edge detection algorithm (Like Canny) takes the binary image as input and results into the contour shown in red. I need another algorithm that takes a point "P" as a second piece of input data. "P" is the black point in the previous image. This algorithm should result into the blue contour. The blue contours represents the point "P" lines-of-sight edge of the binary image.
I searched a lot of an image processing algorithm that achieve this, but didn't find any. I also tried to think about a new one, but I still have a lot of difficulties.
Since you've got a bitmap, you could use a bitmap algorithm.
Here's a working example (in JSFiddle or see below). (Firefox, Chrome, but not IE)
Pseudocode:
// part 1: occlusion
mark all pixels as 'outside'
for each pixel on the edge of the image
draw a line from the source pixel to the edge pixel and
for each pixel on the line starting from the source and ending with the edge
if the pixel is gray mark it as 'inside'
otherwise stop drawing this line
// part 2: edge finding
for each pixel in the image
if pixel is not marked 'inside' skip this pixel
if pixel has a neighbor that is outside mark this pixel 'edge'
// part 3: draw the edges
highlight all the edges
At first this sounds pretty terrible... But really, it's O(p) where p is the number of pixels in your image.
Full code here, works best full page:
var c = document.getElementById('c');
c.width = c.height = 500;
var x = c.getContext("2d");
//////////// Draw some "interesting" stuff ////////////
function DrawScene() {
x.beginPath();
x.rect(0, 0, c.width, c.height);
x.fillStyle = '#fff';
x.fill();
x.beginPath();
x.rect(c.width * 0.1, c.height * 0.1, c.width * 0.8, c.height * 0.8);
x.fillStyle = '#000';
x.fill();
x.beginPath();
x.rect(c.width * 0.25, c.height * 0.02 , c.width * 0.5, c.height * 0.05);
x.fillStyle = '#000';
x.fill();
x.beginPath();
x.rect(c.width * 0.3, c.height * 0.2, c.width * 0.03, c.height * 0.4);
x.fillStyle = '#fff';
x.fill();
x.beginPath();
var maxAng = 2.0;
function sc(t) { return t * 0.3 + 0.5; }
function sc2(t) { return t * 0.35 + 0.5; }
for (var i = 0; i < maxAng; i += 0.1)
x.lineTo(sc(Math.cos(i)) * c.width, sc(Math.sin(i)) * c.height);
for (var i = maxAng; i >= 0; i -= 0.1)
x.lineTo(sc2(Math.cos(i)) * c.width, sc2(Math.sin(i)) * c.height);
x.closePath();
x.fill();
x.beginPath();
x.moveTo(0.2 * c.width, 0.03 * c.height);
x.lineTo(c.width * 0.9, c.height * 0.8);
x.lineTo(c.width * 0.8, c.height * 0.8);
x.lineTo(c.width * 0.1, 0.03 * c.height);
x.closePath();
x.fillStyle = '#000';
x.fill();
}
//////////// Pick a point to start our operations: ////////////
var v_x = Math.round(c.width * 0.5);
var v_y = Math.round(c.height * 0.5);
function Update() {
if (navigator.appName == 'Microsoft Internet Explorer'
|| !!(navigator.userAgent.match(/Trident/)
|| navigator.userAgent.match(/rv 11/))
|| $.browser.msie == 1)
{
document.getElementById("d").innerHTML = "Does not work in IE.";
return;
}
DrawScene();
//////////// Make our image binary (white and gray) ////////////
var id = x.getImageData(0, 0, c.width, c.height);
for (var i = 0; i < id.width * id.height * 4; i += 4) {
id.data[i + 0] = id.data[i + 0] > 128 ? 255 : 64;
id.data[i + 1] = id.data[i + 1] > 128 ? 255 : 64;
id.data[i + 2] = id.data[i + 2] > 128 ? 255 : 64;
}
// Adapted from http://rosettacode.org/wiki/Bitmap/Bresenham's_line_algorithm#JavaScript
function line(x1, y1) {
var x0 = v_x;
var y0 = v_y;
var dx = Math.abs(x1 - x0), sx = x0 < x1 ? 1 : -1;
var dy = Math.abs(y1 - y0), sy = y0 < y1 ? 1 : -1;
var err = (dx>dy ? dx : -dy)/2;
while (true) {
var d = (y0 * c.height + x0) * 4;
if (id.data[d] === 255) break;
id.data[d] = 128;
id.data[d + 1] = 128;
id.data[d + 2] = 128;
if (x0 === x1 && y0 === y1) break;
var e2 = err;
if (e2 > -dx) { err -= dy; x0 += sx; }
if (e2 < dy) { err += dx; y0 += sy; }
}
}
for (var i = 0; i < c.width; i++) line(i, 0);
for (var i = 0; i < c.width; i++) line(i, c.height - 1);
for (var i = 0; i < c.height; i++) line(0, i);
for (var i = 0; i < c.height; i++) line(c.width - 1, i);
// Outline-finding algorithm
function gb(x, y) {
var v = id.data[(y * id.height + x) * 4];
return v !== 128 && v !== 0;
}
for (var y = 0; y < id.height; y++) {
var py = Math.max(y - 1, 0);
var ny = Math.min(y + 1, id.height - 1);
console.log(y);
for (var z = 0; z < id.width; z++) {
var d = (y * id.height + z) * 4;
if (id.data[d] !== 128) continue;
var pz = Math.max(z - 1, 0);
var nz = Math.min(z + 1, id.width - 1);
if (gb(pz, py) || gb(z, py) || gb(nz, py) ||
gb(pz, y) || gb(z, y) || gb(nz, y) ||
gb(pz, ny) || gb(z, ny) || gb(nz, ny)) {
id.data[d + 0] = 0;
id.data[d + 1] = 0;
id.data[d + 2] = 255;
}
}
}
x.putImageData(id, 0, 0);
// Draw the starting point
x.beginPath();
x.arc(v_x, v_y, c.width * 0.01, 0, 2 * Math.PI, false);
x.fillStyle = '#800';
x.fill();
}
Update();
c.addEventListener('click', function(evt) {
var x = evt.pageX - c.offsetLeft,
y = evt.pageY - c.offsetTop;
v_x = x;
v_y = y;
Update();
}, false);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.2.3/jquery.min.js"></script>
<center><div id="d">Click on image to change point</div>
<canvas id="c"></canvas></center>
I would just estimate P's line of sight contour with ray collisions.
RESOLUTION = PI / 720;
For rad = 0 To PI * 2 Step RESOLUTION
ray = CreateRay(P, rad)
hits = Intersect(ray, contours)
If Len(hits) > 0
Add(hits[0], lineOfSightContour)
https://en.wikipedia.org/wiki/Hidden_surface_determination with e.g. a Z-Buffer is relatively easy. Edge detection looks a lot trickier and probably needs a bit of tuning. Why not take an existing edge detection algorithm from a library that somebody else has tuned, and then stick in some Z-buffering code to compute the blue contour from the red?
First approach
Main idea
Run an edge detection algorithm (Canny should do it just fine).
For each contour point C compute the triplet (slope, dir, dist), where:
slope is the slope of the line that passes through P and C
dir is a bit which is set if C is to the right of P (on the x axis) and reset if it is to the left; it used in order to distinguish in between points having the same slope, but on opposite sides of P
dist is the distance in between P and C.
Classify the set of contour points such that a class contains the points with the same key (slope, dir) and keep the one point from each such class having the minimum dist. Let S be the set of these closest points.
Sort S in clockwise order.
Iterate once more through the sorted set and, whenever two consecutive points are too far apart, draw a segment in between them, otherwise just draw the points.
Notes
You do not really need to compute the real distance in between P and C since you only use dist to determine the closest point to P at step 3. Instead you can keep C.x - P.x in dist. This piece of information should also tell you which of two points with the same slope is closest to P. Also, C.x - P.x swallows the dir parameter (in the sign bit). So you do not really need dir either.
The classification in step 3 can ideally be done by hashing (thus, in linear number of steps), but since doubles/floats are subject to rounding, you might need to allow small errors to occur by rounding the values of the slopes.
Second approach
Main idea
You can perform a sort of BFS starting from P, like when trying to determine the country/zone that P resides in. For each pixel, look at the pixels around it that were already visited by BFS (called neighbors). Depending on the distribution of the neighbor pixels that are in the line of sight, determine if the currently visited pixel is in the line of sight too or not. You can probably apply a sort of convolution operator on the neighbor pixels (like with any other filter). Also, you do not really need to decide right away if a pixel is for sure in the line of sight. You could instead compute some probability of that to be true.
Notes
Due to the fact that your graph is a 2D image, BFS should be pretty fast (since the number of edges is linear in the number of vertices).
This second approach eliminates the need to run an edge detection algorithm. Also, if the country/zone P resides in is considerably smaller than the image the overall performance should be better than running an edge detection algorithm solely.

Gamma Adjustment on the HTML5 Canvas?

I found a way to increase the gamma, but no way to decrease it! This article states a formula for increasing the gamma. The formula works for increasing the gamma but not for decreasing, even if I apply the reduction on a new instance of the canvas. I tried redrawing the canvas and using a negative value for gamma calculation, but I don't get my original canvas back.
//For increasing, I tried
gamma = 0.5;
gammacorrection = 1/gamma;
r = Math.pow(255 * (r / 255), gammacorrection);
g = ...
b = ...
//For decreasing
gamma = -0.5;
gammacorrection = 1/gamma;
r = Math.pow(255 * (r / 255), gammacorrection);
g = ...
b = ...
First part works. Second doesn't.
For sake of completeness here's a working piece of code
async function adjustGamma(gamma) {
const gammaCorrection = 1 / gamma;
const canvas = document.getElementById('canvasOutput');
const ctx = canvas.getContext('2d');
const imageData = ctx.getImageData(0.0, 0.0, canvas.width, canvas.height);
const data = imageData.data;
for (var i = 0; i < data.length; i += 4) {
data[i] = 255 * Math.pow((data[i] / 255), gammaCorrection);
data[i+1] = 255 * Math.pow((data[i+1] / 255), gammaCorrection);
data[i+2] = 255 * Math.pow((data[i+2] / 255), gammaCorrection);
}
ctx.putImageData(imageData, 0, 0);
}
Here the function adjusts the gamma based on the formula in the Article linked by OP on the Canvas with id "canvasOutput"
There is no negative gamma correction. You should save the original values and use them when making gamma changes, and set gamma to 1.0 to revert back to the original.
Also note that you have the wrong order of operations (exponents come before multiplication).
var originals = { r: r, g: g, b: b };
// increase
gamma = 0.5;
gammacorrection = 1/gamma;
r = 255 * Math.pow(( originals.r / 255), gammacorrection);
g = ...
b = ...
// revert to original
gamma = 1;
gammacorrection = 1/gamma;
r = 255 * Math.pow(( originals.r / 255), gammacorrection);
g = ...
b = ...
There is no negative value for gamma. Ideally this value will range between 0.01 and 7.99. So reverting back the gamma to the original value should be possible either by creating a new canvas instance with the original values of the image, then instantiating it, or either by creating a pool of pixels with the original image and reverting back to it.
I wrote a script how would i construct the algorithm for gamma reduction.
var gamma = 0.5;
var gammaCorrection = 1 / gamma;
var canvas = document.getElementById('canvas');
var ctx = canvas.getContext('2d');
var imageData = ctx.getImageData(0.0, canvas.width, canvas.height);
function GetPixelColor(x, y) {
var index = parseInt(x + canvas.width * y) * 4;
var rgb = {
r : imageData.data[index + 0],
g : imageData.data[index + 1],
b : imageData.data[index + 2]
};
return rgb;
}
function SetPixelColor(x, y, color) {
var index = parseInt(x + this.width * y) * 4;
var data = imageData.data;
data[index+0] = color.r;
data[index+1] = color.g;
data[index+2] = color.b;
};
for (y = 0; y < canvas.height; y++) {
for (x = 0; x < canvas.width; x++) {
var color = GetPixelColor(x, y)
var newRed = Math.pow(255 * (color.r / 255), gammaCorrection);
var newGreen = Math.pow(255 * (color.g / 255), gammaCorrection);
var newBlue = Math.pow(255 * (color.b / 255), gammaCorrection);
var color = {
r: newRed,
g: newGreen,
b: newBlue
}
SetPixelColor(x, y, color);
}
}
I don't know how the application should adjust the gamma value, but i suppose it's done with a value adjuster. If so you should adjust the gamma value dynamically giving the min and max range. I didn't tested the code, this wasn't my scope, but the idea is hopefully clear.
EDIT:
To understand the principle of gamma correction first how about to define the gamma instead.
Gamma is the monitor particularity altering the pixels input. Gamma correction is the act of inverting that process for linear RGB values so that the final output remains linear. For example, if you calculated the light intensity of an object is 0.5, you don't store the result as 0.5 in the pixel. Store it as pow(0.5, 1.0/2.2) = 0.73. When you send 0.73 to the monitor, it will apply a gamma on the value and produce pow(0.73, 2.2) = 0.5, which is what you want. To do this, you apply the inverse gamma function.
o=pow(i, 1.0/gamma)
Where
o is the output value.
i is the input value.
gamma is the gamma value used by your monitor.
So the gamma correction is nothing else than the rise of input value to the power of inverse of gamma. So to restore the gamma to the original value you apply the formula before the gamma correction has been applied.
The blue line represents the inverse gamma curve you need to apply to your pixels before they're sent to the monitor. When your monitor applies its gamma curve (red line) to the pixels, the result is a linear line (green line) that represents your intended RGB pixel values.

Resources