My code was very much bottlenecked by bilinear interpolation so I wrote a version (ScaleBlerpI) that does not use floating point math. This is already 1.5 1.85 times faster but I am wondering how I could make it even faster.
Any hints are appreciated.
func ScaleBlerpI(src, dst *ValueFieldI) {
mx := uint64((src.Width - 1) * math.MaxUint32 / dst.Width)
my := uint64((src.Height - 1) * math.MaxUint32 / dst.Height)
for y := uint64(0); y < uint64(dst.Height); y++ {
for x := uint64(0); x < uint64(dst.Width); x++ {
gx := (x * mx) >> 32 // eq. / math.MaxUint32
tx := (x * mx) & math.MaxUint32 // eq. % (math.MaxUint32 + 1) or % 2^32
gy := (y * my) >> 32
ty := (y * my) & math.MaxUint32
srcX, srcY := int(gx), int(gy)
rgba00 := src.GetComponent(srcX, srcY)
rgba10 := src.GetComponent(srcX+1, srcY)
rgba01 := src.GetComponent(srcX, srcY+1)
rgba11 := src.GetComponent(srcX+1, srcY+1)
result := []uint32{
blerpI(rgba00[0], rgba10[0], rgba01[0], rgba11[0], tx, ty),
blerpI(rgba00[1], rgba10[1], rgba01[1], rgba11[1], tx, ty),
blerpI(rgba00[2], rgba10[2], rgba01[2], rgba11[2], tx, ty),
}
dst.SetComponent(int(x), int(y), result)
}
}
}
func lerpI(s, e uint32, f uint64) uint32 {
// basically s * (1 - f) + b * f
return uint32(
(uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
math.MaxUint32)
}
func blerpI(c00, c10, c01, c11 uint32, tx, ty uint64) uint32 {
return lerpI(
lerpI(c00, c10, tx),
lerpI(c01, c11, tx),
ty,
)
}
type ValueFieldI struct {
Width, Height int
ComponentSize int
Values []uint32
}
func (vf *ValueFieldI) GetComponent(x, y int) []uint32 {
componentIdx := x + y*vf.Width
return vf.Values[componentIdx*vf.ComponentSize : componentIdx*vf.ComponentSize+vf.ComponentSize]
}
func (vf *ValueFieldI) SetComponent(x, y int, c []uint32) {
copy(vf.GetComponent(x, y), c)
}
Profiling has shown me that the most time is lost on blerpI, src.GetComponent and dst.SetComponent
Edit 1
Replaced
// basically s * (1 - f) + e * f
return uint32(
(uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
math.MaxUint32)
With
// basically s + f*(e-s)
return s + uint32((f*(uint64(e)-uint64(s)))>>32)
Integer version is now 1.85 times faster.
Edit 2
Benchmark:
func BenchmarkBlerpIRand(b *testing.B) {
src := &ValueFieldI{
Width: 37,
Height: 37,
ComponentSize: 3,
Values: make([]uint32, 37*37*3),
}
for i := range src.Values {
src.Values[i] = rand.Uint32()
}
dst := &ValueFieldI{
Width: 37 * 8,
Height: 37 * 8,
ComponentSize: 3,
Values: make([]uint32, 37*8*37*8*3),
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
ScaleBlerpI(src, dst)
}
}
I need to form a header (8 bits) using a version (4 bits), count (3 bits), identifier (1 bit). How can I achieve this in Go? For example:
version: 1 (0001)
count: 3 (011)
identifier: 1(1)
Header: 00010111 (23)
I'm doing the following which works but there's a lot of cumbersome code. How can I do this efficiently?
const (
VersionSize binary.Bits = 4
countSize binary.Bits = 3
IdentifierSize binary.Bits = 1
)
type header struct {
version uint8
count uint8
identifier uint8
}
func main() {
headerObj := &header{version:1, count:3, identifier:1}
headerBytes := encode(headerObj)
// prints [23]
fmt.PrintLn(headerBytes)
}
func (h *header) encode() []byte {
var header []byte
vercountIdBinary := toBinary(h.version, versionSize) + toBinary(h.count,countSize) + toBinary(h.identifier, IdentifierSize)
vercountIdByte, _ := strconv.ParseInt(vercountIdBinary, 2, 8)
header = append(header, byte(vercountIdByte))
return header
}
func toBinary(value interface{}, bitSize binary.Bits) string {
format := "%0" + strconv.Itoa(int(bitSize)) + "b"
return fmt.Sprintf(format, value)
}
Packing and unpacking bits into a number can be achieved simply with bit masking and shifting.
For example to pack bits into a number, mask and assign the first, then shift left the result with the bits number or the next data (to make enough room for it). Then mask the 2nd number, and "add" it using bitwise OR. Then shift again with the size of the 3rd number, and repeat.
To unpack: mask the result with the size of the last field, and you got the last number. Shift right the data with the size of the decoded number, and mask with the next (in reverse order) number's size, and you got the number. Repeat this process until you have decoded all numbers.
For example, this packs identifier to most significant bits, count in middle and version to least significant bits, but you may do the opposite order by packing fields in reverse order:
const (
BitsVersion = 4
BitsCount = 3
BitsId = 1
)
const (
MaskVersion = 1<<BitsVersion - 1
MaskCount = 1<<BitsCount - 1
MaskId = 1<<BitsId - 1
)
type header struct {
version uint8
count uint8
identifier uint8
}
func (h *header) ToByte() uint8 {
var b uint8
b = h.identifier & MaskId
b <<= BitsCount
b |= h.count & MaskCount
b <<= BitsVersion
b |= h.version & MaskVersion
return b
}
func (h *header) ParseByte(b uint8) {
h.version = b & MaskVersion
b >>= BitsVersion
h.count = b & MaskCount
b >>= BitsCount
h.identifier = b & MaskId
}
Testing it:
h := &header{
version: 3,
count: 2,
identifier: 1,
}
fmt.Printf("%+v\n", h)
b := h.ToByte()
h2 := &header{}
h2.ParseByte(b)
fmt.Printf("%+v\n", h2)
Which will output (try it on the Go Playground):
&{version:3 count:2 identifier:1}
&{version:3 count:2 identifier:1}
Note: the above example encodes the fields in id-count-version order. The order of fields doesn't matter as long as both the packing and unpacking works with the same order. If you need reverse order (version-count-id), simply reverse the order in which fields are packed / unpacked. Here's how to do that:
func (h *header) ToByte() uint8 {
var b uint8
b = h.version & MaskVersion
b <<= BitsCount
b |= h.count & MaskCount
b <<= BitsId
b |= h.identifier & MaskId
return b
}
func (h *header) ParseByte(b uint8) {
h.identifier = b & MaskId
b >>= BitsId
h.count = b & MaskCount
b >>= BitsCount
h.version = b & MaskVersion
}
This outputs the same. Try this one on the Go Playground.
Note that if you have to do this with multiple data, targeting an io.Writer stream, you may use the github.com/icza/bitio library (disclosure: I'm the author).
In R, I can calculate a p-value for a hypergeometric distribution by using the phyper() function, of which the first value in the returned array is the p-value.
I was wondering whether there is any package in Go / Golang, that lets me do this calculation completely within Go?
You should check out:
probab - Probability distribution functions. Bayesian inference. Written in pure Go.
stat - Pure Go implementation of the GSL Statistics library.
gostat - A statistics library for the go language
When I find problems dealing with stats, my second line of attack after having found that a library does not exist is to port from the R code. This is mixed in ease since code may be R, C/C++ or fortran.
In this case it was pure C, so the port was trivial. Note that the Qhyper() implementation is not an exact port since I have used stirlerr() in place of lgammacor() for the lbeta() implementation. This doesn't seem to make a great deal of difference, but I advise caution if using this lbeta() (and so Qhyper()).
// Direct port of R code from nmath/{phyper,dbinom,stirlerr}.c and {dpq,nmath}.h.
// Code licensed under GPL for that reason (c) Dan Kortschak.
package main
import (
"errors"
"fmt"
"math"
)
func main() {
// Example values come from:
// http://stackoverflow.com/questions/8382806/r-hypergeometric-test-phyper
fmt.Println(Phyper(62, 1998, 5260-1998, 131, true, false))
for x := 0.; x < 10; x++ {
fmt.Println(Phyper(x, 10, 7, 8, true, false))
}
fmt.Println()
for x := 0.; x < 10; x++ {
fmt.Println(Dhyper(x, 10, 7, 8, false))
}
fmt.Println()
for x := 0.; x < 10; x++ {
fmt.Println(Qhyper(x, 10, 7, 8, true, false))
}
}
var ErrDomain = errors.New("hyper: argument out of domain")
const (
epsilon = 2.2204460492503131e-16
min = 2.2250738585072014e-308
)
// Sample of n balls from r red and b black ones; x are red
func Phyper(x, r, b, n float64, lowerTail, logP bool) (float64, error) {
x = math.Floor(x + 1e-7)
r = round(r)
b = round(b)
n = round(n)
if r < 0 || b < 0 || notFinite(r+b) || n < 0 || n > r+b {
return math.NaN(), ErrDomain
}
if x*(r+b) > n*r {
b, r = r, b
x = n - x - 1
lowerTail = !lowerTail
}
if x < 0 {
return dt0(lowerTail, logP), nil
}
if x >= r || x >= n {
return dt1(lowerTail, logP), nil
}
d, err := Dhyper(x, r, b, n, logP)
if err != nil {
return d, err
}
pd := pdhyper(x, r, b, n, logP)
if logP {
return log(d+pd, lowerTail), nil
}
res := d * pd
if lowerTail {
return res, nil
}
// Use 0.5 - p + 0.5 to perhaps gain 1 bit of accuracy
res = 0.5 - res
return res + 0.5, nil
}
func Dhyper(x, r, b, n float64, giveLog bool) (float64, error) {
if negativeOrNotInteger(r) || negativeOrNotInteger(b) || negativeOrNotInteger(n) || n > r+b {
return math.NaN(), ErrDomain
}
if x < 0 {
return 0, nil
}
if x != math.Floor(x) {
return 0, fmt.Errorf("non-integer x = %f", x)
}
x = round(x)
r = round(r)
b = round(b)
n = round(n)
if n < x || r < x || n-x > b {
return 0, nil
}
if n == 0 {
if x == 0 {
return 1, nil
}
return 0, nil
}
p := n / (r + b)
q := (r + b - n) / (r + b)
p1, err := dbinom(x, r, p, q, giveLog)
if err != nil {
return math.NaN(), err
}
p2, err := dbinom(n-x, b, p, q, giveLog)
if err != nil {
return math.NaN(), err
}
p3, err := dbinom(n, r+b, p, q, giveLog)
if err != nil {
return math.NaN(), err
}
if giveLog {
return p1 + p2 - p3, nil
}
return p1 * p2 / p3, nil
}
func Qhyper(p, NR, NB, n float64, lowerTail, logP bool) (float64, error) {
if notFinite(p) || notFinite(NR) || notFinite(NB) || notFinite(n) {
return math.NaN(), ErrDomain
}
NR = round(NR)
NB = round(NB)
N := NR + NB
n = round(n)
if NR < 0 || NB < 0 || n < 0 || n > N {
return math.NaN(), ErrDomain
}
/* Goal: Find xr (= #{red balls in sample}) such that
* phyper(xr, NR,NB, n) >= p > phyper(xr - 1, NR,NB, n)
*/
xstart := math.Max(0, n-NB)
xend := math.Min(n, NR)
if logP {
if p > 0 {
return math.NaN(), ErrDomain
}
if p == 0 { /* upper bound*/
if lowerTail {
return xend, nil
}
return xstart, nil
}
if math.IsInf(p, -1) {
if lowerTail {
return xstart, nil
}
return xend, nil
}
} else { /* !logP */
if p < 0 || p > 1 {
return math.NaN(), ErrDomain
}
if p == 0 {
if lowerTail {
return xstart, nil
}
return xend, nil
}
if p == 1 {
if lowerTail {
return xend, nil
}
return xstart, nil
}
}
xr := xstart
xb := n - xr /* always ( = #{black balls in sample} ) */
smallN := N < 1000 /* won't have underflow in product below */
/* if N is small, term := product.ratio( bin.coef );
otherwise work with its logarithm to protect against underflow */
t1, err := lfastchoose(NR, xr)
if err != nil {
return 0, err
}
t2, err := lfastchoose(NB, xb)
if err != nil {
return 0, err
}
t3, err := lfastchoose(N, n)
if err != nil {
return 0, err
}
term := t1 + t2 - t3
if smallN {
term = math.Exp(term)
}
NR -= xr
NB -= xb
if !lowerTail || logP {
p = qIv(p, lowerTail, logP)
}
p *= 1 - 1000*epsilon /* was 64, but failed on FreeBSD sometimes */
var sum float64
if smallN {
sum = term
} else {
sum = math.Exp(term)
}
for sum < p && xr < xend {
xr++
NB++
if smallN {
term *= (NR / xr) * (xb / NB)
} else {
term += math.Log((NR / xr) * (xb / NB))
}
if smallN {
sum += term
} else {
sum += math.Exp(term)
}
xb--
NR--
}
return xr, nil
}
func lfastchoose(n, k float64) (float64, error) {
lb, err := lbeta(n-k+1, k+1)
if err != nil {
return math.NaN(), err
}
return -math.Log(n+1) - lb, nil
}
func lbeta(a, b float64) (float64, error) {
p := a
q := a
if b < p {
p = b
} /* := min(a,b) */
if b > q {
q = b
} /* := max(a,b) */
/* both arguments must be >= 0 */
if p < 0 {
return math.NaN(), ErrDomain
} else if p == 0 {
return math.Inf(1), nil
} else if notFinite(q) { /* q == +Inf */
return math.Inf(1), nil
}
if p >= 10 {
/* p and q are big. */
corr := stirlerr(p) + stirlerr(q) - stirlerr(p+q)
return math.Log(q)*-0.5 + logSqrt2Pi + corr + (p-0.5)*math.Log(p/(p+q)) + q*math.Log1p(-p/(p+q)), nil
} else if q >= 10 {
/* p is small, but q is big. */
corr := stirlerr(q) - stirlerr(p+q)
return math.Gamma(p) + corr + p - p*math.Log(p+q) + (q-0.5)*math.Log1p(-p/(p+q)), nil
} else {
/* p and q are small: p <= q < 10. */
/* R change for very small args */
if p < min {
return lgamma(p) + (lgamma(q) - lgamma(p+q)), nil
}
}
return math.Log(math.Gamma(p) * (math.Gamma(q) / math.Gamma(p+q))), nil
}
func lgamma(p float64) float64 {
r, _ := math.Lgamma(p)
return r
}
func qIv(p float64, lowerTail, logP bool) float64 {
if logP {
if lowerTail {
return math.Exp(p)
}
return -math.Expm1(p)
}
if lowerTail {
return p
}
p = 0.5 - p
return p + 0.5
}
// Calculate
//
// phyper (x, r, b, n, TRUE, FALSE)
// [log] ----------------------------------
// dhyper (x, r, b, n, FALSE)
//
// without actually calling phyper. This assumes that
//
// x * (r + b) <= n * r
func pdhyper(x, r, b, n float64, logP bool) float64 {
sum := 0.
term := 1.
for x > 0 && term >= epsilon*sum {
term *= x * (b - n + x) / (n + 1 - x) / (r + 1 - x)
sum += term
x--
}
if logP {
return math.Log1p(sum)
}
return 1 + sum
}
var (
ln2 = math.Log(2)
ln2Pi = math.Log(2 * math.Pi)
)
func log(x float64, lowerTail bool) float64 {
if lowerTail {
return math.Log(x)
}
if x > -ln2 {
return math.Log(-math.Expm1(x))
}
return math.Log1p(-math.Exp(x))
}
func dbinom(x, n, p, q float64, giveLog bool) (float64, error) {
if p == 0 {
if x == 0 {
return 1, nil
}
return 0, nil
}
if q == 0 {
if x == n {
return 1, nil
}
return 0, nil
}
if x == 0 {
if n == 0 {
return 1, nil
}
if p < 0.1 {
t, err := bd0(n, n*q)
if err != nil {
return math.NaN(), err
}
return exp(-t-n*p, giveLog), nil
}
return exp(n*math.Log(q), giveLog), nil
}
if x == n {
if q < 0.1 {
t, err := bd0(n, n*p)
if err != nil {
return math.NaN(), err
}
return exp(-t-n*q, giveLog), nil
}
return exp(n*math.Log(p), giveLog), nil
}
if x < 0 || x > n {
return 0, nil
}
// n*p or n*q can underflow to zero if n and p or q are small. This
// used to occur in dbeta, and gives NaN as from R 2.3.0.
t1, err := bd0(x, n*p)
if err != nil {
return math.NaN(), err
}
t2, err := bd0(n-x, n*q)
if err != nil {
return math.NaN(), err
}
lc := stirlerr(n) - stirlerr(x) - stirlerr(n-x) - t1 - t2
// f = (M_2PI*x*(n-x))/n; could overflow or underflow
// Upto R 2.7.1:
// lf = log(M_2PI) + log(x) + log(n-x) - log(n);
// -- following is much better for x << n :
lf := ln2Pi + math.Log(x) + math.Log1p(-x/n)
return exp(lc-0.5*lf, giveLog), nil
}
func negativeOrNotInteger(x float64) bool {
return x < 0 || x != math.Floor(x)
}
func notFinite(x float64) bool {
return math.IsNaN(x) || math.IsInf(x, 0)
}
func round(x float64) float64 {
if _, frac := math.Modf(x); frac >= 0.5 {
return math.Ceil(x)
}
return math.Floor(x)
}
func exp(x float64, giveLog bool) float64 {
if giveLog {
return x
}
return math.Exp(x)
}
func dt0(lowerTail, logP bool) float64 {
if lowerTail {
return d0(logP)
}
return d1(logP)
}
func dt1(lowerTail, logP bool) float64 {
if lowerTail {
return d1(logP)
}
return d0(logP)
}
func d0(logP bool) float64 {
if logP {
return math.Inf(-1)
}
return 0
}
func d1(logP bool) float64 {
if logP {
return 0
}
return 1
}
// bd0(x,M) := M * D0(x/M) = M*[ x/M * log(x/M) + 1 - (x/M) ] =
// = x * log(x/M) + M - x
// where M = E[X] = n*p (or = lambda), for x, M > 0
//
// in a manner that should be stable (with small relative error)
// for all x and M=np. In particular for x/np close to 1, direct
// evaluation fails, and evaluation is based on the Taylor series
// of log((1+v)/(1-v)) with v = (x-M)/(x+M) = (x-np)/(x+np).
//
func bd0(x, np float64) (float64, error) {
if notFinite(x) || notFinite(np) || np == 0 {
return math.NaN(), ErrDomain
}
if math.Abs(x-np) < 0.1*(x+np) {
v := (x - np) / (x + np) // might underflow to 0
s := (x - np) * v // s using v -- change by MM
if math.Abs(s) < min {
return s, nil
}
ej := 2 * x * v
v = v * v
for j := 1; j < 1000; j++ {
// Taylor series; 1000: no infinite loop
// as |v| < .1, v^2000 is "zero"
ej *= v // = v^(2j+1)
s1 := s + ej/float64((j<<1)+1)
if s1 == s { // last term was effectively 0
return s1, nil
}
s = s1
}
}
/* else: | x - np | is not too small */
return x*math.Log(x/np) + np - x, nil
}
var (
// error for 0, 0.5, 1.0, 1.5, ..., 14.5, 15.0.
sfErrHalves = [31]float64{
0.0, // n=0 - wrong, place holder only
0.1534264097200273452913848, // 0.5
0.0810614667953272582196702, // 1.0
0.0548141210519176538961390, // 1.5
0.0413406959554092940938221, // 2.0
0.03316287351993628748511048, // 2.5
0.02767792568499833914878929, // 3.0
0.02374616365629749597132920, // 3.5
0.02079067210376509311152277, // 4.0
0.01848845053267318523077934, // 4.5
0.01664469118982119216319487, // 5.0
0.01513497322191737887351255, // 5.5
0.01387612882307074799874573, // 6.0
0.01281046524292022692424986, // 6.5
0.01189670994589177009505572, // 7.0
0.01110455975820691732662991, // 7.5
0.010411265261972096497478567, // 8.0
0.009799416126158803298389475, // 8.5
0.009255462182712732917728637, // 9.0
0.008768700134139385462952823, // 9.5
0.008330563433362871256469318, // 10.0
0.007934114564314020547248100, // 10.5
0.007573675487951840794972024, // 11.0
0.007244554301320383179543912, // 11.5
0.006942840107209529865664152, // 12.0
0.006665247032707682442354394, // 12.5
0.006408994188004207068439631, // 13.0
0.006171712263039457647532867, // 13.5
0.005951370112758847735624416, // 14.0
0.005746216513010115682023589, // 14.5
0.005554733551962801371038690, // 15.0
}
logSqrt2Pi = math.Log(math.Sqrt(2 * math.Pi))
)
// stirlerr(n) = log(n!) - log( sqrt(2*pi*n)*(n/e)^n )
// = log Gamma(n+1) - 1/2 * [log(2*pi) + log(n)] - n*[log(n) - 1]
// = log Gamma(n+1) - (n + 1/2) * log(n) + n - log(2*pi)/2
func stirlerr(n float64) float64 {
const (
S0 = 1. / 12.
S1 = 1. / 360.
S2 = 1. / 1260.
S3 = 1. / 1680.
S4 = 1. / 1188.
)
var nn float64
if n <= 15.0 {
nn = n + n
if nn == math.Floor(nn) {
return sfErrHalves[int(nn)]
}
lg, _ := math.Lgamma(n + 1)
return lg - (n+0.5)*math.Log(n) + n - logSqrt2Pi
}
nn = n * n
switch {
case n > 500:
return ((S0 - S1/nn) / n)
case n > 80:
return ((S0 - (S1-S2/nn)/nn) / n)
case n > 35:
return ((S0 - (S1-(S2-S3/nn)/nn)/nn) / n)
default: // 15 < n <= 35
return (S0 - (S1-(S2-(S3-S4/nn)/nn)/nn)/nn) / n
}
}