func Benchmark_foreach1(b *testing.B) {
var test map[int]int
test = make(map[int]int)
for i := 0; i < 100000; i++ {
test[i] = 1
}
for i := 0; i < b.N; i++ {
for i, _ := range test {
if test[i] != 1 {
panic("ds")
}
}
}
}
func Benchmark_foreach2(b *testing.B) {
var test map[int]int
test = make(map[int]int)
for i := 0; i < 100000; i++ {
test[i] = 1
}
for i := 0; i < b.N; i++ {
for _, v := range test {
if v != 1 {
panic("heh")
}
}
}
}
run with result as below
goos: linux
goarch: amd64
Benchmark_foreach1-2 500 3172323 ns/op
Benchmark_foreach2-2 1000 1707214 ns/op
why is foreach-2 slow?
I think Benchmark_foreach2-2 is about 2 times faster - it requires 1707214 nanoseconds per operation, and first one takes 3172323. So second one is 3172323 / 1707214 = 1.85 times faster.
Reason: second doesn't need to take value from a memory again, it already used value in v variable.
The test[k] statement in BenchmarkForeachK takes time to randomly read the value, so BenchmarkForeachK takes more time than BenchmarkForeachV, 9362945 ns/op versus 4213940 ns/op .
For example,
package main
import "testing"
func testMap() map[int]int {
test := make(map[int]int)
for i := 0; i < 100000; i++ {
test[i] = 1
}
return test
}
func BenchmarkForeachK(b *testing.B) {
test := testMap()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for k := range test {
if test[k] != 1 {
panic("eh")
}
}
}
}
func BenchmarkForeachV(b *testing.B) {
test := testMap()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range test {
if v != 1 {
panic("heh")
}
}
}
}
Output:
$ go test foreach_test.go -bench=.
BenchmarkForeachK-4 200 9362945 ns/op 0 B/op 0 allocs/op
BenchmarkForeachV-4 300 4213940 ns/op 0 B/op 0 allocs/op
Related
I'm benchmarking unmarshaling from string to int and uint with this code:
package main
import (
"strconv"
"testing"
)
func BenchmarkUnmarshalInt(b *testing.B) {
for i := 0; i < b.N; i++ {
UnmarshalInt("123456")
}
}
func BenchmarkUnmarshalUint(b *testing.B) {
for i := 0; i < b.N; i++ {
UnmarshalUint("123456")
}
}
func UnmarshalInt(v string) int {
i, _ := strconv.Atoi(v)
return i
}
func UnmarshalUint(v string) uint {
i, _ := strconv.ParseUint(v, 10, 64)
return uint(i)
}
Result:
Running tool: C:\Go\bin\go.exe test -benchmem -run=^$ myBench/main -bench .
goos: windows
goarch: amd64
pkg: myBench/main
BenchmarkUnmarshalInt-8 99994166 11.7 ns/op 0 B/op 0 allocs/op
BenchmarkUnmarshalUint-8 54550413 21.0 ns/op 0 B/op 0 allocs/op
Is it possible that the second (uint) is almost twice as slow as the first (int)?
Yes, it's possible. strconv.Atoi has a fast path when the input string length is less than 19 (or 10 if int is 32 bit). This allows it to be a lot faster because it doesn't need to check for overflow.
If you change your test number to "1234567890123456789" (assuming 64 bit int), then your int benchmark is slightly slower than the uint benchmark because the fast path can't be used. On my machine, it takes 37.6 ns/op for the signed version vs 31.5 ns/op for the unsigned version.
Here's the modified benchmark code (note I added a variable that sums up the parsed results, just in case the compiler got clever and optimized it away).
package main
import (
"fmt"
"strconv"
"testing"
)
const X = "1234567890123456789"
func BenchmarkUnmarshalInt(b *testing.B) {
var T int
for i := 0; i < b.N; i++ {
T += UnmarshalInt(X)
}
fmt.Println(T)
}
func BenchmarkUnmarshalUint(b *testing.B) {
var T uint
for i := 0; i < b.N; i++ {
T += UnmarshalUint(X)
}
fmt.Println(T)
}
func UnmarshalInt(v string) int {
i, _ := strconv.Atoi(v)
return i
}
func UnmarshalUint(v string) uint {
i, _ := strconv.ParseUint(v, 10, 64)
return uint(i)
}
For reference, the code for strconv.Atoi in the standard library is currently as follows:
func Atoi(s string) (int, error) {
const fnAtoi = "Atoi"
sLen := len(s)
if intSize == 32 && (0 < sLen && sLen < 10) ||
intSize == 64 && (0 < sLen && sLen < 19) {
// Fast path for small integers that fit int type.
s0 := s
if s[0] == '-' || s[0] == '+' {
s = s[1:]
if len(s) < 1 {
return 0, &NumError{fnAtoi, s0, ErrSyntax}
}
}
n := 0
for _, ch := range []byte(s) {
ch -= '0'
if ch > 9 {
return 0, &NumError{fnAtoi, s0, ErrSyntax}
}
n = n*10 + int(ch)
}
if s0[0] == '-' {
n = -n
}
return n, nil
}
// Slow path for invalid, big, or underscored integers.
i64, err := ParseInt(s, 10, 0)
if nerr, ok := err.(*NumError); ok {
nerr.Func = fnAtoi
}
return int(i64), err
}
Referring to the following benchmarking test codes:
func BenchmarkRuneCountNoDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++{
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make (chan int)
for i := 0; i < runtime.NumCPU(); i++{
go RuneCountNoDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func(){
for n := 0; n < numStrings; n++{
<-results
}
return
}()
for n := 0; n < numStrings; n++{
jobs <- strings[n]
}
}
close(jobs)
}
func RuneCountNoDefault(jobs chan string, results chan int){
for{
select{
case j, ok := <-jobs:
if ok{
results <- utf8.RuneCountInString(j)
} else {
return
}
}
}
}
func BenchmarkRuneCountWithDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++{
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make (chan int)
for i := 0; i < runtime.NumCPU(); i++{
go RuneCountWithDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func(){
for n := 0; n < numStrings; n++{
<-results
}
return
}()
for n := 0; n < numStrings; n++{
jobs <- strings[n]
}
}
close(jobs)
}
func RuneCountWithDefault(jobs chan string, results chan int){
for{
select{
case j, ok := <-jobs:
if ok{
results <- utf8.RuneCountInString(j)
} else {
return
}
default: //DIFFERENCE
}
}
}
//https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
When I benchmarked both the functions where one function, RuneCountNoDefault has no default clause in the select and the other, RuneCountWithDefault has a default clause, I'm getting the following benchmark:
BenchmarkRuneCountNoDefault-4 200000 8910 ns/op
BenchmarkRuneCountWithDefault-4 5 277798660 ns/op
Checking the cpuprofile generated by the tests, I noticed that the function with the default clause spends a lot of time in the following channel operations:
Why having a default clause in the goroutine's select makes it slower?
I'm using Go version 1.10 for windows/amd64
The Go Programming Language
Specification
Select statements
If one or more of the communications can proceed, a single one that
can proceed is chosen via a uniform pseudo-random selection.
Otherwise, if there is a default case, that case is chosen. If there
is no default case, the "select" statement blocks until at least one
of the communications can proceed.
Modifying your benchmark to count the number of proceed and default cases taken:
$ go test default_test.go -bench=.
goos: linux
goarch: amd64
BenchmarkRuneCountNoDefault-4 300000 4108 ns/op
BenchmarkRuneCountWithDefault-4 10 209890782 ns/op
--- BENCH: BenchmarkRuneCountWithDefault-4
default_test.go:90: proceeds 114
default_test.go:91: defaults 128343308
$
While other cases were unable to proceed, the default case was taken 128343308 times in 209422470, (209890782 - 114*4108), nanoseconds or 1.63 nanoseconds per default case. If you do something small a large number of times, it adds up.
default_test.go:
package main
import (
"math/rand"
"runtime"
"sync/atomic"
"testing"
"time"
"unicode/utf8"
)
func BenchmarkRuneCountNoDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++ {
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make(chan int)
for i := 0; i < runtime.NumCPU(); i++ {
go RuneCountNoDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func() {
for n := 0; n < numStrings; n++ {
<-results
}
return
}()
for n := 0; n < numStrings; n++ {
jobs <- strings[n]
}
}
close(jobs)
}
func RuneCountNoDefault(jobs chan string, results chan int) {
for {
select {
case j, ok := <-jobs:
if ok {
results <- utf8.RuneCountInString(j)
} else {
return
}
}
}
}
var proceeds ,defaults uint64
func BenchmarkRuneCountWithDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++ {
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make(chan int)
for i := 0; i < runtime.NumCPU(); i++ {
go RuneCountWithDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func() {
for n := 0; n < numStrings; n++ {
<-results
}
return
}()
for n := 0; n < numStrings; n++ {
jobs <- strings[n]
}
}
close(jobs)
b.Log("proceeds", atomic.LoadUint64(&proceeds))
b.Log("defaults", atomic.LoadUint64(&defaults))
}
func RuneCountWithDefault(jobs chan string, results chan int) {
for {
select {
case j, ok := <-jobs:
atomic.AddUint64(&proceeds, 1)
if ok {
results <- utf8.RuneCountInString(j)
} else {
return
}
default: //DIFFERENCE
atomic.AddUint64(&defaults, 1)
}
}
}
//https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
Playground: https://play.golang.org/p/DLnAY0hovQG
I'm looking for the most efficient way to tell whether a byte slice is a float.
This is to be done on huge datasets, so performance is key.
Tried approaches:
strconv.ParseFloat
regexp.Match
CheckNumber - home rolled function using IsNumber + looking at whether the byte slice contains a ..
func CheckNumber(p []byte) bool {
r := string(p)
sep := 0
for _, b := range r {
if unicode.IsNumber(b) {
continue
}
if b == rune('.') {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
The benchmark code:
func BenchmarkFloatStrconv(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
_, err := strconv.ParseFloat(string(p), 64)
if err != nil {
log.Fatalf("NaN")
}
}
}
func BenchmarkFloatRegex(b *testing.B) {
p := []byte("15.34234234234")
r := `[-+]?[0-9]*\.?[0-9]`
c, _ := regexp.Compile(r)
for i := 0; i < b.N; i++ {
ok := c.Match(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func BenchmarkCheckNumber(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := CheckNumber(p)
if !ok {
log.Fatalf("NaN")
}
}
}
Benchmark results:
BenchmarkFloatStrconv-8 20000000 85.8 ns/op 16 B/op 1 allocs/op
BenchmarkFloatRegex-8 5000000 252 ns/op 0 B/op 0 allocs/op
BenchmarkCheckNumber-8 20000000 64.3 ns/op 0 B/op 0 allocs/op
Am I doing the different solutions fairness?
Are there better solutions?
Edit: thanks to pointers from Adrian and icza, this avoids converting to strings/runes
func CheckNumberNoStringConvert(r []byte) bool {
sep := 0
for i := range r {
if r[i] >= 48 && r[i] <= 57 {
continue
}
if r[i] == 46 {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
and performs quite well ;-)
BenchmarkCheckNumberNoStringConvert-8 200000000 8.55 ns/op 0 B/op 0 allocs/op
For a simple real (floating-point) number (no scientific or engineering floating-point format, no group separators),
func IsReal(n []byte) bool {
if len(n) > 0 && n[0] == '-' {
n = n[1:]
}
if len(n) == 0 {
return false
}
var point bool
for _, c := range n {
if '0' <= c && c <= '9' {
continue
}
if c == '.' && len(n) > 1 && !point {
point = true
continue
}
return false
}
return true
}
Benchmark:
$ go test -run=! -bench=. -benchmem -cpu=1 real_test.go
goos: linux
goarch: amd64
BenchmarkIsReal 100000000 20.8 ns/op 0 B/op 0 allocs/op
BenchmarkFloatStrconv 20000000 101 ns/op 16 B/op 1 allocs/op
BenchmarkFloatRegex 5000000 284 ns/op 0 B/op 0 allocs/op
BenchmarkCheckNumber 20000000 73.0 ns/op 0 B/op 0 allocs/op
PASS
ok command-line-arguments 7.380s
real_test.go:
package main
import (
"log"
"regexp"
"strconv"
"testing"
"unicode"
)
func IsReal(n []byte) bool {
if len(n) > 0 && n[0] == '-' {
n = n[1:]
}
if len(n) == 0 {
return false
}
var point bool
for _, c := range n {
if '0' <= c && c <= '9' {
continue
}
if c == '.' && len(n) > 1 && !point {
point = true
continue
}
return false
}
return true
}
func BenchmarkIsReal(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := IsReal(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func CheckNumber(p []byte) bool {
r := string(p)
sep := 0
for _, b := range r {
if unicode.IsNumber(b) {
continue
}
if b == rune('.') {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
func BenchmarkFloatStrconv(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
_, err := strconv.ParseFloat(string(p), 64)
if err != nil {
log.Fatalf("NaN")
}
}
}
func BenchmarkFloatRegex(b *testing.B) {
p := []byte("15.34234234234")
r := `[-+]?[0-9]*\.?[0-9]`
c, _ := regexp.Compile(r)
for i := 0; i < b.N; i++ {
ok := c.Match(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func BenchmarkCheckNumber(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := CheckNumber(p)
if !ok {
log.Fatalf("NaN")
}
}
}
I took upon it as a challenge for myself to rewrite this as some kind of state machine synthesizing the collective input from everyone here :)
func Validate(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case b[i] == '.':
if len(b) == 1 {
return false
}
if len(b) > i {
return fractional(b[i+1:])
}
return true
case i == 0 && b[i] == '-':
if len(b) == 1 {
return false
}
continue
default:
return false
}
}
return true
}
func fractional(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case b[i] == 'e' || b[i] == 'E':
if len(b[:i]) == 0 {
return false
}
if len(b) > i+1 {
return scientific(b[i+1:])
}
return false
default:
return false
}
}
return true
}
func scientific(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case i == 0 && b[i] == '-':
if len(b) == 1 {
return false
}
continue
default:
return false
}
}
return true
}
It seems to work on a few different number formats:
type v struct {
Input []byte
Expected bool
}
func TestPermutations(t *testing.T) {
b := []v{
v{[]byte("123.456"), true},
v{[]byte("123"), true},
v{[]byte("123."), true},
v{[]byte(".123"), true},
v{[]byte("12.1e12"), true},
v{[]byte("12.1e-12"), true},
v{[]byte("-123.456"), true},
v{[]byte("-123"), true},
v{[]byte("-123."), true},
v{[]byte("-.123"), true},
v{[]byte("-12.1e12"), true},
v{[]byte("-12.1e-12"), true},
v{[]byte(".1e-12"), true},
v{[]byte(".e-12"), false},
v{[]byte(".e"), false},
v{[]byte("e"), false},
v{[]byte("abcdef"), false},
v{[]byte("-"), false},
v{[]byte("."), false},
}
for _, test := range b {
ok := Validate(test.Input)
if ok != test.Expected {
t.Errorf("could not handle case %s", test.Input)
}
}
}
and perform quite well on the original benchmark:
BenchmarkValidate-8 100000000 13.0 ns/op 0 B/op 0 allocs/op
Benchmark code:
func BenchmarkValidate(b *testing.B) {
p := []byte("15.1234567890")
for i := 0; i < b.N; i++ {
ok := Validate(p)
if !ok {
log.Fatalf("problem")
}
}
}
so I have the Quicksort algorithm implemented with concurrency (the one without as well). Now I wanted to compare the times. I wrote this:
func benchmarkConcurrentQuickSort(size int, b *testing.B) {
A := RandomArray(size)
var wg sync.WaitGroup
b.ResetTimer()
ConcurrentQuicksort(A, 0, len(A)-1, &wg)
wg.Wait()
}
func BenchmarkConcurrentQuickSort500(b *testing.B) {
benchmarkConcurrentQuickSort(500, b)
}
func BenchmarkConcurrentQuickSort1000(b *testing.B) {
benchmarkConcurrentQuickSort(1000, b)
}
func BenchmarkConcurrentQuickSort5000(b *testing.B) {
benchmarkConcurrentQuickSort(5000, b)
}
func BenchmarkConcurrentQuickSort10000(b *testing.B) {
benchmarkConcurrentQuickSort(10000, b)
}
func BenchmarkConcurrentQuickSort20000(b *testing.B) {
benchmarkConcurrentQuickSort(20000, b)
}
func BenchmarkConcurrentQuickSort1000000(b *testing.B) {
benchmarkConcurrentQuickSort(1000000, b)
}
The results are like this:
C:\projects\go\src\github.com\frynio\mysort>go test -bench=.
BenchmarkConcurrentQuickSort500-4 2000000000 0.00 ns/op
BenchmarkConcurrentQuickSort1000-4 2000000000 0.00 ns/op
BenchmarkConcurrentQuickSort5000-4 2000000000 0.00 ns/op
BenchmarkConcurrentQuickSort10000-4 2000000000 0.00 ns/op
BenchmarkConcurrentQuickSort20000-4 2000000000 0.00 ns/op
BenchmarkConcurrentQuickSort1000000-4 30 49635266 ns/op
PASS
ok github.com/frynio/mysort 8.342s
I can believe the last one, but I definitely think that sorting 500-element array takes longer than 1ns. What am i doing wrong? I am pretty sure that RandomArray returns array of wanted size, as we can see in the last benchmark. Why does it print out the 0.00 ns?
func RandomArray(n int) []int {
a := []int{}
for i := 0; i < n; i++ {
a = append(a, rand.Intn(500))
}
return a
}
// ConcurrentPartition - ConcurrentQuicksort function for partitioning the array (randomized choice of a pivot)
func ConcurrentPartition(A []int, p int, r int) int {
index := rand.Intn(r-p) + p
pivot := A[index]
A[index] = A[r]
A[r] = pivot
x := A[r]
j := p - 1
i := p
for i < r {
if A[i] <= x {
j++
tmp := A[j]
A[j] = A[i]
A[i] = tmp
}
i++
}
temp := A[j+1]
A[j+1] = A[r]
A[r] = temp
return j + 1
}
// ConcurrentQuicksort - a concurrent version of a quicksort algorithm
func ConcurrentQuicksort(A []int, p int, r int, wg *sync.WaitGroup) {
if p < r {
q := ConcurrentPartition(A, p, r)
wg.Add(2)
go func() {
ConcurrentQuicksort(A, p, q-1, wg)
wg.Done()
}()
go func() {
ConcurrentQuicksort(A, q+1, r, wg)
wg.Done()
}()
}
}
Package testing
A sample benchmark function looks like this:
func BenchmarkHello(b *testing.B) {
for i := 0; i < b.N; i++ {
fmt.Sprintf("hello")
}
}
The benchmark function must run the target code b.N times. During
benchmark execution, b.N is adjusted until the benchmark function
lasts long enough to be timed reliably.
I don't see a benchmark loop in your code. Try
func benchmarkConcurrentQuickSort(size int, b *testing.B) {
A := RandomArray(size)
var wg sync.WaitGroup
b.ResetTimer()
for i := 0; i < b.N; i++ {
ConcurrentQuicksort(A, 0, len(A)-1, &wg)
wg.Wait()
}
}
Output:
BenchmarkConcurrentQuickSort500-4 10000 122291 ns/op
BenchmarkConcurrentQuickSort1000-4 5000 221154 ns/op
BenchmarkConcurrentQuickSort5000-4 1000 1225230 ns/op
BenchmarkConcurrentQuickSort10000-4 500 2568024 ns/op
BenchmarkConcurrentQuickSort20000-4 300 5808130 ns/op
BenchmarkConcurrentQuickSort1000000-4 1 1371751710 ns/op
Can someone please point at a more efficient version of the following
b:=make([]byte,0,sizeTotal)
b=append(b,size...)
b=append(b,contentType...)
b=append(b,lenCallbackid...)
b=append(b,lenTarget...)
b=append(b,lenAction...)
b=append(b,lenContent...)
b=append(b,callbackid...)
b=append(b,target...)
b=append(b,action...)
b=append(b,content...)
every variable is a byte slice apart from size sizeTotal
Update:
Code:
type Message struct {
size uint32
contentType uint8
callbackId string
target string
action string
content string
}
var res []byte
var b []byte = make([]byte,0,4096)
func (m *Message)ToByte()[]byte{
callbackIdIntLen:=len(m.callbackId)
targetIntLen := len(m.target)
actionIntLen := len(m.action)
contentIntLen := len(m.content)
lenCallbackid:=make([]byte,4)
binary.LittleEndian.PutUint32(lenCallbackid, uint32(callbackIdIntLen))
callbackid := []byte(m.callbackId)
lenTarget := make([]byte,4)
binary.LittleEndian.PutUint32(lenTarget, uint32(targetIntLen))
target:=[]byte(m.target)
lenAction := make([]byte,4)
binary.LittleEndian.PutUint32(lenAction, uint32(actionIntLen))
action := []byte(m.action)
lenContent:= make([]byte,4)
binary.LittleEndian.PutUint32(lenContent, uint32(contentIntLen))
content := []byte(m.content)
sizeTotal:= 21+callbackIdIntLen+targetIntLen+actionIntLen+contentIntLen
size := make([]byte,4)
binary.LittleEndian.PutUint32(size, uint32(sizeTotal))
b=b[:0]
b=append(b,size...)
b=append(b,byte(m.contentType))
b=append(b,lenCallbackid...)
b=append(b,lenTarget...)
b=append(b,lenAction...)
b=append(b,lenContent...)
b=append(b,callbackid...)
b=append(b,target...)
b=append(b,action...)
b=append(b,content...)
res = b
return b
}
func FromByte(bytes []byte)(*Message){
size :=binary.LittleEndian.Uint32(bytes[0:4])
contentType :=bytes[4:5][0]
lenCallbackid:=binary.LittleEndian.Uint32(bytes[5:9])
lenTarget :=binary.LittleEndian.Uint32(bytes[9:13])
lenAction :=binary.LittleEndian.Uint32(bytes[13:17])
lenContent :=binary.LittleEndian.Uint32(bytes[17:21])
callbackid := string(bytes[21:21+lenCallbackid])
target:= string(bytes[21+lenCallbackid:21+lenCallbackid+lenTarget])
action:= string(bytes[21+lenCallbackid+lenTarget:21+lenCallbackid+lenTarget+lenAction])
content:=string(bytes[size-lenContent:size])
return &Message{size,contentType,callbackid,target,action,content}
}
Benchs:
func BenchmarkMessageToByte(b *testing.B) {
m:=NewMessage(uint8(3),"agsdggsdasagdsdgsgddggds","sometarSFAFFget","somFSAFSAFFSeaction","somfasfsasfafsejsonzhit")
for n := 0; n < b.N; n++ {
m.ToByte()
}
}
func BenchmarkMessageFromByte(b *testing.B) {
m:=NewMessage(uint8(1),"sagdsgaasdg","soSASFASFASAFSFASFAGmetarget","adsgdgsagdssgdsgd","agsdsdgsagdsdgasdg").ToByte()
for n := 0; n < b.N; n++ {
FromByte(m)
}
}
func BenchmarkStringToByte(b *testing.B) {
for n := 0; n < b.N; n++ {
_ = []byte("abcdefghijklmnoqrstuvwxyz")
}
}
func BenchmarkStringFromByte(b *testing.B) {
s:=[]byte("abcdefghijklmnoqrstuvwxyz")
for n := 0; n < b.N; n++ {
_ = string(s)
}
}
func BenchmarkUintToByte(b *testing.B) {
for n := 0; n < b.N; n++ {
i:=make([]byte,4)
binary.LittleEndian.PutUint32(i, uint32(99))
}
}
func BenchmarkUintFromByte(b *testing.B) {
i:=make([]byte,4)
binary.LittleEndian.PutUint32(i, uint32(99))
for n := 0; n < b.N; n++ {
binary.LittleEndian.Uint32(i)
}
}
Bench results:
BenchmarkMessageToByte 10000000 280 ns/op
BenchmarkMessageFromByte 10000000 293 ns/op
BenchmarkStringToByte 50000000 55.1 ns/op
BenchmarkStringFromByte 50000000 49.7 ns/op
BenchmarkUintToByte 1000000000 2.14 ns/op
BenchmarkUintFromByte 2000000000 1.71 ns/op
Provided memory is already allocated, a sequence of x=append(x,a...) is rather efficient in Go.
In your example, the initial allocation (make) probably costs more than the sequence of appends. It depends on the size of the fields. Consider the following benchmark:
package main
import (
"testing"
)
const sizeTotal = 25
var res []byte // To enforce heap allocation
func BenchmarkWithAlloc(b *testing.B) {
a := []byte("abcde")
for i := 0; i < b.N; i++ {
x := make([]byte, 0, sizeTotal)
x = append(x, a...)
x = append(x, a...)
x = append(x, a...)
x = append(x, a...)
x = append(x, a...)
res = x // Make sure x escapes, and is therefore heap allocated
}
}
func BenchmarkWithoutAlloc(b *testing.B) {
a := []byte("abcde")
x := make([]byte, 0, sizeTotal)
for i := 0; i < b.N; i++ {
x = x[:0]
x = append(x, a...)
x = append(x, a...)
x = append(x, a...)
x = append(x, a...)
x = append(x, a...)
res = x
}
}
On my box, the result is:
testing: warning: no tests to run
PASS
BenchmarkWithAlloc 10000000 116 ns/op 32 B/op 1 allocs/op
BenchmarkWithoutAlloc 50000000 24.0 ns/op 0 B/op 0 allocs/op
Systematically reallocating the buffer (even a small one) makes this benchmark at least 5 times slower.
So your best hope to optimize this code it to make sure you do not reallocate a buffer for each packet you build. On the contrary, you should keep your buffer, and reuse it for each marshalling operation.
You can reset a slice while keeping its underlying buffer allocated with the following statement:
x = x[:0]
I looked carefully at that and made the following benchmarks.
package append
import "testing"
func BenchmarkAppend(b *testing.B) {
as := 1000
a := make([]byte, as)
s := make([]byte, 0, b.N*as)
for i := 0; i < b.N; i++ {
s = append(s, a...)
}
}
func BenchmarkCopy(b *testing.B) {
as := 1000
a := make([]byte, as)
s := make([]byte, 0, b.N*as)
for i := 0; i < b.N; i++ {
copy(s[i*as:(i+1)*as], a)
}
}
The results are
grzesiek#klapacjusz ~/g/s/t/append> go test -bench . -benchmem
testing: warning: no tests to run
PASS
BenchmarkAppend 10000000 202 ns/op 1000 B/op 0 allocs/op
BenchmarkCopy 10000000 201 ns/op 1000 B/op 0 allocs/op
ok test/append 4.564s
If the totalSize is big enough then your code makes no memory allocations. It copies only the amount of bytes it needs to copy. It is perfectly fine.