Integer Bilinear interpolation optimization - go

My code was very much bottlenecked by bilinear interpolation so I wrote a version (ScaleBlerpI) that does not use floating point math. This is already 1.5 1.85 times faster but I am wondering how I could make it even faster.
Any hints are appreciated.
func ScaleBlerpI(src, dst *ValueFieldI) {
mx := uint64((src.Width - 1) * math.MaxUint32 / dst.Width)
my := uint64((src.Height - 1) * math.MaxUint32 / dst.Height)
for y := uint64(0); y < uint64(dst.Height); y++ {
for x := uint64(0); x < uint64(dst.Width); x++ {
gx := (x * mx) >> 32 // eq. / math.MaxUint32
tx := (x * mx) & math.MaxUint32 // eq. % (math.MaxUint32 + 1) or % 2^32
gy := (y * my) >> 32
ty := (y * my) & math.MaxUint32
srcX, srcY := int(gx), int(gy)
rgba00 := src.GetComponent(srcX, srcY)
rgba10 := src.GetComponent(srcX+1, srcY)
rgba01 := src.GetComponent(srcX, srcY+1)
rgba11 := src.GetComponent(srcX+1, srcY+1)
result := []uint32{
blerpI(rgba00[0], rgba10[0], rgba01[0], rgba11[0], tx, ty),
blerpI(rgba00[1], rgba10[1], rgba01[1], rgba11[1], tx, ty),
blerpI(rgba00[2], rgba10[2], rgba01[2], rgba11[2], tx, ty),
}
dst.SetComponent(int(x), int(y), result)
}
}
}
func lerpI(s, e uint32, f uint64) uint32 {
// basically s * (1 - f) + b * f
return uint32(
(uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
math.MaxUint32)
}
func blerpI(c00, c10, c01, c11 uint32, tx, ty uint64) uint32 {
return lerpI(
lerpI(c00, c10, tx),
lerpI(c01, c11, tx),
ty,
)
}
type ValueFieldI struct {
Width, Height int
ComponentSize int
Values []uint32
}
func (vf *ValueFieldI) GetComponent(x, y int) []uint32 {
componentIdx := x + y*vf.Width
return vf.Values[componentIdx*vf.ComponentSize : componentIdx*vf.ComponentSize+vf.ComponentSize]
}
func (vf *ValueFieldI) SetComponent(x, y int, c []uint32) {
copy(vf.GetComponent(x, y), c)
}
Profiling has shown me that the most time is lost on blerpI, src.GetComponent and dst.SetComponent
Edit 1
Replaced
// basically s * (1 - f) + e * f
return uint32(
(uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
math.MaxUint32)
With
// basically s + f*(e-s)
return s + uint32((f*(uint64(e)-uint64(s)))>>32)
Integer version is now 1.85 times faster.
Edit 2
Benchmark:
func BenchmarkBlerpIRand(b *testing.B) {
src := &ValueFieldI{
Width: 37,
Height: 37,
ComponentSize: 3,
Values: make([]uint32, 37*37*3),
}
for i := range src.Values {
src.Values[i] = rand.Uint32()
}
dst := &ValueFieldI{
Width: 37 * 8,
Height: 37 * 8,
ComponentSize: 3,
Values: make([]uint32, 37*8*37*8*3),
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
ScaleBlerpI(src, dst)
}
}

Related

Gaussian Blur implementation generates weird output

I'm trying to implement a Gaussian Blur on golang image.Image objects. For the following image:
The output image generated is:
As one can see, the output image contains some unprocessed borders that corresponds to the current implementation decision to not process the edges, which leads me to think that I might have messed up on calculations somehow (what I mean is, this part of the implementation works, so I can discard off-by-one errors while iterating through image pixels). I've reviewed this code many times, but I can't find my mistake. I would really appreciate some help and considerations on the implementation, that could help me solve the problem. The code is contained below. If any edits or clarifications are necessary, please let me know!
package main
import (
"image"
"image/color"
"image/draw"
"image/jpeg"
"math"
"os"
)
func main() {
f, err := os.Open("dog.jpeg")
if err != nil {
panic(err)
}
img, err := jpeg.Decode(f)
if err != nil {
panic(err)
}
newImg := gaussianBlur(img, 3)
out, err := os.Create("dog-blurred.jpeg")
if err != nil {
panic(err)
}
err = jpeg.Encode(out, newImg, nil)
if err != nil {
panic(err)
}
}
func applyGaussianFunction(x, y, stdDev float64) float64 {
// eFactor := 1 / (2 * math.Pi * stdDev*stdDev);
ePowNominator := -(x*x + y*y);
ePowDenominator := 2 * stdDev*stdDev;
return math.Pow(math.E, (ePowNominator/ePowDenominator));
}
func generateKernel(radius int) [][]float64 {
size := 1 + (radius * 2);
kernel := make([][]float64, size);
stdDev := math.Max(float64(radius / 2), 1);
sum := float64(0);
for i := 0; i < size; i++ {
kernel[i] = make([]float64, size);
}
for i := -radius; i < radius + 1; i++ {
for j := -radius; j < radius + 1; j++ {
val := applyGaussianFunction(float64(j), float64(i), stdDev);
kernel[i + radius][j + radius] = val;
sum += val;
}
}
for i := 0; i < size; i++ {
for j := 0; j < size; j++ {
kernel[i][j] /= sum;
}
}
return kernel;
}
func makeImageRGBA(src image.Image) *image.RGBA {
b := src.Bounds().Size();
rgba := image.NewRGBA(image.Rect(0, 0, b.X, b.Y));
draw.Draw(rgba, rgba.Bounds(), src, image.Pt(0, 0), draw.Src);
return rgba;
}
func gaussianBlur(img image.Image, radius int) image.Image {
size := img.Bounds().Size();
rgbaImg := image.NewRGBA(image.Rect(0, 0, size.X, size.Y));
kernel := generateKernel(radius);
for y := radius; y < size.Y - radius; y++ {
for x := radius; x < size.X - radius; x++ {
var nr, ng, nb, na float64 = 0, 0, 0, 0;
for i := -radius; i < radius + 1; i++ {
for j := -radius; j < radius + 1; j++ {
// NEW: Get pixels from original Image
pr, pg, pb, pa := img.At(x - j, y - i).RGBA();
nr += float64(pr) * kernel[i + radius][j + radius];
ng += float64(pg) * kernel[i + radius][j + radius];
nb += float64(pb) * kernel[i + radius][j + radius];
na += float64(pa) * kernel[i + radius][j + radius];
}
}
// Handle overflow by using 64-bit alphapremultiplied values
rgbaImg.Set(x, y, color.RGBA64{uint16(nr), uint16(ng), uint16(nb), uint16(na)});
}
}
return rgbaImg;
}
EDITS
I modified the code so that pixels are read from the original image, not from rgbaImg
I've also commented eFactor from the applyGaussianFunction function, since I'm already normalizing the kernel with the sum variable
Modified .Set method to use 64-bit RGBA struct
This is the newly generated image
Those black borders are easy to solve, I'm already working them out. This is not a part of the problem anymore.
You're reading from the same image that you're writing to. You shall read from the original image instead:
pr, pg, pb, pa := img.At(x+j, y+i).RGBA()
EDIT:
Additionally, Image.At returns color.RGBA, and func (color.RGBA) RGBA returns colors in the 0 to 0xFFFF range. However color.RGBA constructor expects them to be in 0 to 255 range. You may want to use color.RGBA64 when writing the result:
rgbaImg.Set(x, y, color.RGBA64{uint16(nr), uint16(ng), uint16(nb), uint16(na)});

parallelism in Golang loop

I have a project and need to run it on multiple cores of an cpu to get more speed . I have used omplib in fortran but I am not familiar with Golang parallelism . I tried goroutines but that went wrong and made a mess and I got false results. This is my code :
package main
import (
"bufio"
"fmt"
"log"
"math"
"math/rand"
"os"
"time"
)
const (
n_particles int = 2048
n_steps int = 1000000
dt float64 = 1.0
v0 float64 = 0.50
radius float64 = 1.0
f_intensity float64 = 1.8
scale float64 = 32.0
alpha float64 = 1.0 / 36.0
)
var (
x [n_particles + 1]float64
y [n_particles + 1]float64
angles [n_particles + 1]float64
vx [n_particles + 1]float64
vy [n_particles + 1]float64
order [n_steps + 1]float64
)
func main() {
/////randomizer
vstart := time.Now()
rsource := rand.NewSource(time.Now().UnixNano())
randomizer := rand.New(rsource)
for i := 0; i <= n_particles; i++ {
x[i] = (randomizer.Float64()) * scale
y[i] = (randomizer.Float64()) * scale
angles[i] = (randomizer.Float64()) * math.Pi * 2
sin, cos := math.Sincos(angles[i])
vx[i] = v0 * cos
vy[i] = v0 * sin
}
//////main loop
for i := 0; i <= n_steps; i++ {
start := time.Now()
for j := 0; j <= n_particles; j++ {
x[j] = x[j] + (vx[j] * dt)
//x[j] = math.Mod(x[j], scale)
if x[j] < 0.0 {
x[j] = x[j] + scale
}
if x[j] >= scale {
x[j] = x[j] - scale
}
y[j] = y[j] + (vy[j] * dt)
//y[j] = math.Mod(x[j], scale)
if y[j] < 0.0 {
y[j] = y[j] + scale
}
if y[j] >= scale {
y[j] = y[j] - scale
}
}
type intpos struct {
x, y int64
}
adjacencyIndex := make(map[intpos][]int)
////getting each boxes particles
for j := 0; j <= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j])) // getting particle box
adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j) // adding particles to boxes
}
/////////
m_angles := angles
Now I want following loop run in parallel :
////particle loop - I WANT FOLLOWING LOOP PARALLEL
for j := 0; j <= n_particles; j++ {
sumanglesx := 0.0
sumanglesy := 0.0
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
// fxi = math.Floor(x[j])
// fyi = math.Floor(y[j])
for dx := -1; dx <= 1; dx++ {
for dy := -1; dy <= 1; dy++ {
adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
for _, k := range adjacentParticles {
dist := ((x[k] - x[j]) * (x[k] - x[j])) + ((y[k] - y[j]) * (y[k] - y[j]))
if dist < radius {
sy, sx := math.Sincos(angles[k])
if k <= j {
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
} else {
sx = alpha * sx
sy = alpha * sy
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
}
}
}
}
}
bsource := rand.NewSource(time.Now().UnixNano())
bandomizer := rand.New(bsource)
sumanglesy = sumanglesy
sumanglesx = sumanglesx
r_angles := math.Atan2(sumanglesy, sumanglesx)
}
}
}
I specified one loop which should run parallelly .
Here are two approaches to try out: https://play.golang.org/p/O1uB2zzJEC5
package main
import (
"fmt"
"sync"
)
func main() {
waitGroupApproach()
channelApproach()
}
func waitGroupApproach() {
fmt.Println("waitGroupApproach")
var waitgroup sync.WaitGroup
result_table := make([]int, 6, 6)
for j := 0; j <= 5; j++ {
waitgroup.Add(1)
go func(index int) {
fmt.Println(index) // try putting here `j` instea of `index`
result_table[index] = index*2
waitgroup.Done()
}(j) // you have to put any for-loop variables into closure
// because otherwsie all routines inside will likely get the last j == n_particles + 1
// as they will likely run after the loop has finished
}
fmt.Println("waiting")
waitgroup.Wait()
// process results further
fmt.Println("finished")
fmt.Println(result_table)
}
func channelApproach() {
fmt.Println("\nchannelApproach")
type intpos struct {
x, y, index int
}
results := make(chan intpos)
// initialize routines
for j := 0; j <= 5; j++ {
go func(index int) {
// do processing
results <- intpos{index*2, index*3, index}
}(j)
}
fmt.Println("Waiting..")
// collect results, iterate the same number of times
result_table := make([]int, 6)
for j := 0; j <= 5; j++ {
r := <- results
// watch out order, migth not be the same as in invocation,
// so that's why I store j in results as well
fmt.Println(r.index, r.x, r.y)
result_table[r.index] = r.x
}
fmt.Println("Finished..")
fmt.Println(result_table)
}
I prefer the channel approach because it's more go idiomatic to me and it allows to easier handle panic, error conditions, etc.

How does one count a number of iterations with Go?

So I have this small piece of code that iterates as long as needed until the difference between the value sought after is abysmal. I want to count and print the number of iterations after the code is done running and preferably in my main function (along with printing everything else I need).
Edit: Okay, I've managed to do it like this. I wonder if there's an easier way of counting the iterations and passing them to the output function.
func sqrt(x float64) (float64, int) {
k := 1
z := 1.0
q := (z*z - x)/(2*z)
for {
if math.Abs(-q) > 0.001 {
z -= q
q = (z*z - x)/(2*z)
k += 1
} else {
break
}
}
return z, k
}
func main() {
k := 1
z := 1.0
z, k = sqrt(9)
fmt.Println("Your sqrt = ", z)
fmt.Println("Math Sqrt = ",math.Sqrt(9))
fmt.Println("Iterations: ", k)
}
You can return your float value and an int (as the number of iterations). I made very minor revision to your example to demonstrate.
func sqrt(x float64) (float64, int) {
z := 1.0
i := 1
q := (z*z - x) / (2 * z)
for {
if math.Abs(-q) > 0.01 {
i++
z -= q
q = (z*z - x) / (2 * z)
} else {
break
}
}
return z, i
}
func main() {
f, i := sqrt(9)
fmt.Printf("result: %f iterations: %d\n", f, i)
fmt.Println(math.Sqrt(9))
}
You can provide multiple return values through your function:
func main() {
numLoops, newNum := sqrt(9)
}
func sqrt(x float64) (int, float64) {
<implementation>
}
GoPlay here: https://play.golang.org/p/R2lV41EbEd

Julia set image rendering ruined by concurrency

I have the following code that I am to change into a concurrent program.
// Stefan Nilsson 2013-02-27
// This program creates pictures of Julia sets (en.wikipedia.org/wiki/Julia_set).
package main
import (
"image"
"image/color"
"image/png"
"log"
"math/cmplx"
"os"
"strconv"
)
type ComplexFunc func(complex128) complex128
var Funcs []ComplexFunc = []ComplexFunc{
func(z complex128) complex128 { return z*z - 0.61803398875 },
func(z complex128) complex128 { return z*z + complex(0, 1) },
}
func main() {
for n, fn := range Funcs {
err := CreatePng("picture-"+strconv.Itoa(n)+".png", fn, 1024)
if err != nil {
log.Fatal(err)
}
}
}
// CreatePng creates a PNG picture file with a Julia image of size n x n.
func CreatePng(filename string, f ComplexFunc, n int) (err error) {
file, err := os.Create(filename)
if err != nil {
return
}
defer file.Close()
err = png.Encode(file, Julia(f, n))
return
}
// Julia returns an image of size n x n of the Julia set for f.
func Julia(f ComplexFunc, n int) image.Image {
bounds := image.Rect(-n/2, -n/2, n/2, n/2)
img := image.NewRGBA(bounds)
s := float64(n / 4)
for i := bounds.Min.X; i < bounds.Max.X; i++ {
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
n := Iterate(f, complex(float64(i)/s, float64(j)/s), 256)
r := uint8(0)
g := uint8(0)
b := uint8(n % 32 * 8)
img.Set(i, j, color.RGBA{r, g, b, 255})
}
}
return img
}
// Iterate sets z_0 = z, and repeatedly computes z_n = f(z_{n-1}), n ≥ 1,
// until |z_n| > 2 or n = max and returns this n.
func Iterate(f ComplexFunc, z complex128, max int) (n int) {
for ; n < max; n++ {
if real(z)*real(z)+imag(z)*imag(z) > 4 {
break
}
z = f(z)
}
return
}
I have decided to try and make the Julia() function concurrent. So I changed it to:
func Julia(f ComplexFunc, n int) image.Image {
bounds := image.Rect(-n/2, -n/2, n/2, n/2)
img := image.NewRGBA(bounds)
s := float64(n / 4)
for i := bounds.Min.X; i < bounds.Max.X; i++ {
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
go func(){
n := Iterate(f, complex(float64(i)/s, float64(j)/s), 256)
r := uint8(0)
g := uint8(0)
b := uint8(n % 32 * 8)
img.Set(i, j, color.RGBA{r, g, b, 255})
}()
}
}
return img
This change causes the images to look very different. The patterns are essentially the same, but there are a lot of white pixels that were not there before.
What is happening here?
There are 2 problems:
You don't actually wait for your goroutines to finish.
You don't pass i and j to the goroutine, so they will almost always be the last i and j.
Your function should look something like:
func Julia(f ComplexFunc, n int) image.Image {
var wg sync.WaitGroup
bounds := image.Rect(-n/2, -n/2, n/2, n/2)
img := image.NewRGBA(bounds)
s := float64(n / 4)
for i := bounds.Min.X; i < bounds.Max.X; i++ {
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
wg.Add(1)
go func(i, j int) {
n := Iterate(f, complex(float64(i)/s, float64(j)/s), 256)
r := uint8(0)
g := uint8(0)
b := uint8(n % 32 * 8)
img.Set(i, j, color.RGBA{r, g, b, 255})
wg.Done()
}(i, j)
}
}
wg.Wait()
return img
}
A bonus tip, when diving into concurrency, it's usually a good idea to try your code with the race detector.
You might have to use a mutex to call img.Set but I'm not very sure and I can't test atm.

How can I calculate a p-value for a hypergeometric distribution in Go?

In R, I can calculate a p-value for a hypergeometric distribution by using the phyper() function, of which the first value in the returned array is the p-value.
I was wondering whether there is any package in Go / Golang, that lets me do this calculation completely within Go?
You should check out:
probab - Probability distribution functions. Bayesian inference. Written in pure Go.
stat - Pure Go implementation of the GSL Statistics library.
gostat - A statistics library for the go language
When I find problems dealing with stats, my second line of attack after having found that a library does not exist is to port from the R code. This is mixed in ease since code may be R, C/C++ or fortran.
In this case it was pure C, so the port was trivial. Note that the Qhyper() implementation is not an exact port since I have used stirlerr() in place of lgammacor() for the lbeta() implementation. This doesn't seem to make a great deal of difference, but I advise caution if using this lbeta() (and so Qhyper()).
// Direct port of R code from nmath/{phyper,dbinom,stirlerr}.c and {dpq,nmath}.h.
// Code licensed under GPL for that reason (c) Dan Kortschak.
package main
import (
"errors"
"fmt"
"math"
)
func main() {
// Example values come from:
// http://stackoverflow.com/questions/8382806/r-hypergeometric-test-phyper
fmt.Println(Phyper(62, 1998, 5260-1998, 131, true, false))
for x := 0.; x < 10; x++ {
fmt.Println(Phyper(x, 10, 7, 8, true, false))
}
fmt.Println()
for x := 0.; x < 10; x++ {
fmt.Println(Dhyper(x, 10, 7, 8, false))
}
fmt.Println()
for x := 0.; x < 10; x++ {
fmt.Println(Qhyper(x, 10, 7, 8, true, false))
}
}
var ErrDomain = errors.New("hyper: argument out of domain")
const (
epsilon = 2.2204460492503131e-16
min = 2.2250738585072014e-308
)
// Sample of n balls from r red and b black ones; x are red
func Phyper(x, r, b, n float64, lowerTail, logP bool) (float64, error) {
x = math.Floor(x + 1e-7)
r = round(r)
b = round(b)
n = round(n)
if r < 0 || b < 0 || notFinite(r+b) || n < 0 || n > r+b {
return math.NaN(), ErrDomain
}
if x*(r+b) > n*r {
b, r = r, b
x = n - x - 1
lowerTail = !lowerTail
}
if x < 0 {
return dt0(lowerTail, logP), nil
}
if x >= r || x >= n {
return dt1(lowerTail, logP), nil
}
d, err := Dhyper(x, r, b, n, logP)
if err != nil {
return d, err
}
pd := pdhyper(x, r, b, n, logP)
if logP {
return log(d+pd, lowerTail), nil
}
res := d * pd
if lowerTail {
return res, nil
}
// Use 0.5 - p + 0.5 to perhaps gain 1 bit of accuracy
res = 0.5 - res
return res + 0.5, nil
}
func Dhyper(x, r, b, n float64, giveLog bool) (float64, error) {
if negativeOrNotInteger(r) || negativeOrNotInteger(b) || negativeOrNotInteger(n) || n > r+b {
return math.NaN(), ErrDomain
}
if x < 0 {
return 0, nil
}
if x != math.Floor(x) {
return 0, fmt.Errorf("non-integer x = %f", x)
}
x = round(x)
r = round(r)
b = round(b)
n = round(n)
if n < x || r < x || n-x > b {
return 0, nil
}
if n == 0 {
if x == 0 {
return 1, nil
}
return 0, nil
}
p := n / (r + b)
q := (r + b - n) / (r + b)
p1, err := dbinom(x, r, p, q, giveLog)
if err != nil {
return math.NaN(), err
}
p2, err := dbinom(n-x, b, p, q, giveLog)
if err != nil {
return math.NaN(), err
}
p3, err := dbinom(n, r+b, p, q, giveLog)
if err != nil {
return math.NaN(), err
}
if giveLog {
return p1 + p2 - p3, nil
}
return p1 * p2 / p3, nil
}
func Qhyper(p, NR, NB, n float64, lowerTail, logP bool) (float64, error) {
if notFinite(p) || notFinite(NR) || notFinite(NB) || notFinite(n) {
return math.NaN(), ErrDomain
}
NR = round(NR)
NB = round(NB)
N := NR + NB
n = round(n)
if NR < 0 || NB < 0 || n < 0 || n > N {
return math.NaN(), ErrDomain
}
/* Goal: Find xr (= #{red balls in sample}) such that
* phyper(xr, NR,NB, n) >= p > phyper(xr - 1, NR,NB, n)
*/
xstart := math.Max(0, n-NB)
xend := math.Min(n, NR)
if logP {
if p > 0 {
return math.NaN(), ErrDomain
}
if p == 0 { /* upper bound*/
if lowerTail {
return xend, nil
}
return xstart, nil
}
if math.IsInf(p, -1) {
if lowerTail {
return xstart, nil
}
return xend, nil
}
} else { /* !logP */
if p < 0 || p > 1 {
return math.NaN(), ErrDomain
}
if p == 0 {
if lowerTail {
return xstart, nil
}
return xend, nil
}
if p == 1 {
if lowerTail {
return xend, nil
}
return xstart, nil
}
}
xr := xstart
xb := n - xr /* always ( = #{black balls in sample} ) */
smallN := N < 1000 /* won't have underflow in product below */
/* if N is small, term := product.ratio( bin.coef );
otherwise work with its logarithm to protect against underflow */
t1, err := lfastchoose(NR, xr)
if err != nil {
return 0, err
}
t2, err := lfastchoose(NB, xb)
if err != nil {
return 0, err
}
t3, err := lfastchoose(N, n)
if err != nil {
return 0, err
}
term := t1 + t2 - t3
if smallN {
term = math.Exp(term)
}
NR -= xr
NB -= xb
if !lowerTail || logP {
p = qIv(p, lowerTail, logP)
}
p *= 1 - 1000*epsilon /* was 64, but failed on FreeBSD sometimes */
var sum float64
if smallN {
sum = term
} else {
sum = math.Exp(term)
}
for sum < p && xr < xend {
xr++
NB++
if smallN {
term *= (NR / xr) * (xb / NB)
} else {
term += math.Log((NR / xr) * (xb / NB))
}
if smallN {
sum += term
} else {
sum += math.Exp(term)
}
xb--
NR--
}
return xr, nil
}
func lfastchoose(n, k float64) (float64, error) {
lb, err := lbeta(n-k+1, k+1)
if err != nil {
return math.NaN(), err
}
return -math.Log(n+1) - lb, nil
}
func lbeta(a, b float64) (float64, error) {
p := a
q := a
if b < p {
p = b
} /* := min(a,b) */
if b > q {
q = b
} /* := max(a,b) */
/* both arguments must be >= 0 */
if p < 0 {
return math.NaN(), ErrDomain
} else if p == 0 {
return math.Inf(1), nil
} else if notFinite(q) { /* q == +Inf */
return math.Inf(1), nil
}
if p >= 10 {
/* p and q are big. */
corr := stirlerr(p) + stirlerr(q) - stirlerr(p+q)
return math.Log(q)*-0.5 + logSqrt2Pi + corr + (p-0.5)*math.Log(p/(p+q)) + q*math.Log1p(-p/(p+q)), nil
} else if q >= 10 {
/* p is small, but q is big. */
corr := stirlerr(q) - stirlerr(p+q)
return math.Gamma(p) + corr + p - p*math.Log(p+q) + (q-0.5)*math.Log1p(-p/(p+q)), nil
} else {
/* p and q are small: p <= q < 10. */
/* R change for very small args */
if p < min {
return lgamma(p) + (lgamma(q) - lgamma(p+q)), nil
}
}
return math.Log(math.Gamma(p) * (math.Gamma(q) / math.Gamma(p+q))), nil
}
func lgamma(p float64) float64 {
r, _ := math.Lgamma(p)
return r
}
func qIv(p float64, lowerTail, logP bool) float64 {
if logP {
if lowerTail {
return math.Exp(p)
}
return -math.Expm1(p)
}
if lowerTail {
return p
}
p = 0.5 - p
return p + 0.5
}
// Calculate
//
// phyper (x, r, b, n, TRUE, FALSE)
// [log] ----------------------------------
// dhyper (x, r, b, n, FALSE)
//
// without actually calling phyper. This assumes that
//
// x * (r + b) <= n * r
func pdhyper(x, r, b, n float64, logP bool) float64 {
sum := 0.
term := 1.
for x > 0 && term >= epsilon*sum {
term *= x * (b - n + x) / (n + 1 - x) / (r + 1 - x)
sum += term
x--
}
if logP {
return math.Log1p(sum)
}
return 1 + sum
}
var (
ln2 = math.Log(2)
ln2Pi = math.Log(2 * math.Pi)
)
func log(x float64, lowerTail bool) float64 {
if lowerTail {
return math.Log(x)
}
if x > -ln2 {
return math.Log(-math.Expm1(x))
}
return math.Log1p(-math.Exp(x))
}
func dbinom(x, n, p, q float64, giveLog bool) (float64, error) {
if p == 0 {
if x == 0 {
return 1, nil
}
return 0, nil
}
if q == 0 {
if x == n {
return 1, nil
}
return 0, nil
}
if x == 0 {
if n == 0 {
return 1, nil
}
if p < 0.1 {
t, err := bd0(n, n*q)
if err != nil {
return math.NaN(), err
}
return exp(-t-n*p, giveLog), nil
}
return exp(n*math.Log(q), giveLog), nil
}
if x == n {
if q < 0.1 {
t, err := bd0(n, n*p)
if err != nil {
return math.NaN(), err
}
return exp(-t-n*q, giveLog), nil
}
return exp(n*math.Log(p), giveLog), nil
}
if x < 0 || x > n {
return 0, nil
}
// n*p or n*q can underflow to zero if n and p or q are small. This
// used to occur in dbeta, and gives NaN as from R 2.3.0.
t1, err := bd0(x, n*p)
if err != nil {
return math.NaN(), err
}
t2, err := bd0(n-x, n*q)
if err != nil {
return math.NaN(), err
}
lc := stirlerr(n) - stirlerr(x) - stirlerr(n-x) - t1 - t2
// f = (M_2PI*x*(n-x))/n; could overflow or underflow
// Upto R 2.7.1:
// lf = log(M_2PI) + log(x) + log(n-x) - log(n);
// -- following is much better for x << n :
lf := ln2Pi + math.Log(x) + math.Log1p(-x/n)
return exp(lc-0.5*lf, giveLog), nil
}
func negativeOrNotInteger(x float64) bool {
return x < 0 || x != math.Floor(x)
}
func notFinite(x float64) bool {
return math.IsNaN(x) || math.IsInf(x, 0)
}
func round(x float64) float64 {
if _, frac := math.Modf(x); frac >= 0.5 {
return math.Ceil(x)
}
return math.Floor(x)
}
func exp(x float64, giveLog bool) float64 {
if giveLog {
return x
}
return math.Exp(x)
}
func dt0(lowerTail, logP bool) float64 {
if lowerTail {
return d0(logP)
}
return d1(logP)
}
func dt1(lowerTail, logP bool) float64 {
if lowerTail {
return d1(logP)
}
return d0(logP)
}
func d0(logP bool) float64 {
if logP {
return math.Inf(-1)
}
return 0
}
func d1(logP bool) float64 {
if logP {
return 0
}
return 1
}
// bd0(x,M) := M * D0(x/M) = M*[ x/M * log(x/M) + 1 - (x/M) ] =
// = x * log(x/M) + M - x
// where M = E[X] = n*p (or = lambda), for x, M > 0
//
// in a manner that should be stable (with small relative error)
// for all x and M=np. In particular for x/np close to 1, direct
// evaluation fails, and evaluation is based on the Taylor series
// of log((1+v)/(1-v)) with v = (x-M)/(x+M) = (x-np)/(x+np).
//
func bd0(x, np float64) (float64, error) {
if notFinite(x) || notFinite(np) || np == 0 {
return math.NaN(), ErrDomain
}
if math.Abs(x-np) < 0.1*(x+np) {
v := (x - np) / (x + np) // might underflow to 0
s := (x - np) * v // s using v -- change by MM
if math.Abs(s) < min {
return s, nil
}
ej := 2 * x * v
v = v * v
for j := 1; j < 1000; j++ {
// Taylor series; 1000: no infinite loop
// as |v| < .1, v^2000 is "zero"
ej *= v // = v^(2j+1)
s1 := s + ej/float64((j<<1)+1)
if s1 == s { // last term was effectively 0
return s1, nil
}
s = s1
}
}
/* else: | x - np | is not too small */
return x*math.Log(x/np) + np - x, nil
}
var (
// error for 0, 0.5, 1.0, 1.5, ..., 14.5, 15.0.
sfErrHalves = [31]float64{
0.0, // n=0 - wrong, place holder only
0.1534264097200273452913848, // 0.5
0.0810614667953272582196702, // 1.0
0.0548141210519176538961390, // 1.5
0.0413406959554092940938221, // 2.0
0.03316287351993628748511048, // 2.5
0.02767792568499833914878929, // 3.0
0.02374616365629749597132920, // 3.5
0.02079067210376509311152277, // 4.0
0.01848845053267318523077934, // 4.5
0.01664469118982119216319487, // 5.0
0.01513497322191737887351255, // 5.5
0.01387612882307074799874573, // 6.0
0.01281046524292022692424986, // 6.5
0.01189670994589177009505572, // 7.0
0.01110455975820691732662991, // 7.5
0.010411265261972096497478567, // 8.0
0.009799416126158803298389475, // 8.5
0.009255462182712732917728637, // 9.0
0.008768700134139385462952823, // 9.5
0.008330563433362871256469318, // 10.0
0.007934114564314020547248100, // 10.5
0.007573675487951840794972024, // 11.0
0.007244554301320383179543912, // 11.5
0.006942840107209529865664152, // 12.0
0.006665247032707682442354394, // 12.5
0.006408994188004207068439631, // 13.0
0.006171712263039457647532867, // 13.5
0.005951370112758847735624416, // 14.0
0.005746216513010115682023589, // 14.5
0.005554733551962801371038690, // 15.0
}
logSqrt2Pi = math.Log(math.Sqrt(2 * math.Pi))
)
// stirlerr(n) = log(n!) - log( sqrt(2*pi*n)*(n/e)^n )
// = log Gamma(n+1) - 1/2 * [log(2*pi) + log(n)] - n*[log(n) - 1]
// = log Gamma(n+1) - (n + 1/2) * log(n) + n - log(2*pi)/2
func stirlerr(n float64) float64 {
const (
S0 = 1. / 12.
S1 = 1. / 360.
S2 = 1. / 1260.
S3 = 1. / 1680.
S4 = 1. / 1188.
)
var nn float64
if n <= 15.0 {
nn = n + n
if nn == math.Floor(nn) {
return sfErrHalves[int(nn)]
}
lg, _ := math.Lgamma(n + 1)
return lg - (n+0.5)*math.Log(n) + n - logSqrt2Pi
}
nn = n * n
switch {
case n > 500:
return ((S0 - S1/nn) / n)
case n > 80:
return ((S0 - (S1-S2/nn)/nn) / n)
case n > 35:
return ((S0 - (S1-(S2-S3/nn)/nn)/nn) / n)
default: // 15 < n <= 35
return (S0 - (S1-(S2-(S3-S4/nn)/nn)/nn)/nn) / n
}
}

Resources