I'm looking for the most efficient way to tell whether a byte slice is a float.
This is to be done on huge datasets, so performance is key.
Tried approaches:
strconv.ParseFloat
regexp.Match
CheckNumber - home rolled function using IsNumber + looking at whether the byte slice contains a ..
func CheckNumber(p []byte) bool {
r := string(p)
sep := 0
for _, b := range r {
if unicode.IsNumber(b) {
continue
}
if b == rune('.') {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
The benchmark code:
func BenchmarkFloatStrconv(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
_, err := strconv.ParseFloat(string(p), 64)
if err != nil {
log.Fatalf("NaN")
}
}
}
func BenchmarkFloatRegex(b *testing.B) {
p := []byte("15.34234234234")
r := `[-+]?[0-9]*\.?[0-9]`
c, _ := regexp.Compile(r)
for i := 0; i < b.N; i++ {
ok := c.Match(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func BenchmarkCheckNumber(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := CheckNumber(p)
if !ok {
log.Fatalf("NaN")
}
}
}
Benchmark results:
BenchmarkFloatStrconv-8 20000000 85.8 ns/op 16 B/op 1 allocs/op
BenchmarkFloatRegex-8 5000000 252 ns/op 0 B/op 0 allocs/op
BenchmarkCheckNumber-8 20000000 64.3 ns/op 0 B/op 0 allocs/op
Am I doing the different solutions fairness?
Are there better solutions?
Edit: thanks to pointers from Adrian and icza, this avoids converting to strings/runes
func CheckNumberNoStringConvert(r []byte) bool {
sep := 0
for i := range r {
if r[i] >= 48 && r[i] <= 57 {
continue
}
if r[i] == 46 {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
and performs quite well ;-)
BenchmarkCheckNumberNoStringConvert-8 200000000 8.55 ns/op 0 B/op 0 allocs/op
For a simple real (floating-point) number (no scientific or engineering floating-point format, no group separators),
func IsReal(n []byte) bool {
if len(n) > 0 && n[0] == '-' {
n = n[1:]
}
if len(n) == 0 {
return false
}
var point bool
for _, c := range n {
if '0' <= c && c <= '9' {
continue
}
if c == '.' && len(n) > 1 && !point {
point = true
continue
}
return false
}
return true
}
Benchmark:
$ go test -run=! -bench=. -benchmem -cpu=1 real_test.go
goos: linux
goarch: amd64
BenchmarkIsReal 100000000 20.8 ns/op 0 B/op 0 allocs/op
BenchmarkFloatStrconv 20000000 101 ns/op 16 B/op 1 allocs/op
BenchmarkFloatRegex 5000000 284 ns/op 0 B/op 0 allocs/op
BenchmarkCheckNumber 20000000 73.0 ns/op 0 B/op 0 allocs/op
PASS
ok command-line-arguments 7.380s
real_test.go:
package main
import (
"log"
"regexp"
"strconv"
"testing"
"unicode"
)
func IsReal(n []byte) bool {
if len(n) > 0 && n[0] == '-' {
n = n[1:]
}
if len(n) == 0 {
return false
}
var point bool
for _, c := range n {
if '0' <= c && c <= '9' {
continue
}
if c == '.' && len(n) > 1 && !point {
point = true
continue
}
return false
}
return true
}
func BenchmarkIsReal(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := IsReal(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func CheckNumber(p []byte) bool {
r := string(p)
sep := 0
for _, b := range r {
if unicode.IsNumber(b) {
continue
}
if b == rune('.') {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
func BenchmarkFloatStrconv(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
_, err := strconv.ParseFloat(string(p), 64)
if err != nil {
log.Fatalf("NaN")
}
}
}
func BenchmarkFloatRegex(b *testing.B) {
p := []byte("15.34234234234")
r := `[-+]?[0-9]*\.?[0-9]`
c, _ := regexp.Compile(r)
for i := 0; i < b.N; i++ {
ok := c.Match(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func BenchmarkCheckNumber(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := CheckNumber(p)
if !ok {
log.Fatalf("NaN")
}
}
}
I took upon it as a challenge for myself to rewrite this as some kind of state machine synthesizing the collective input from everyone here :)
func Validate(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case b[i] == '.':
if len(b) == 1 {
return false
}
if len(b) > i {
return fractional(b[i+1:])
}
return true
case i == 0 && b[i] == '-':
if len(b) == 1 {
return false
}
continue
default:
return false
}
}
return true
}
func fractional(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case b[i] == 'e' || b[i] == 'E':
if len(b[:i]) == 0 {
return false
}
if len(b) > i+1 {
return scientific(b[i+1:])
}
return false
default:
return false
}
}
return true
}
func scientific(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case i == 0 && b[i] == '-':
if len(b) == 1 {
return false
}
continue
default:
return false
}
}
return true
}
It seems to work on a few different number formats:
type v struct {
Input []byte
Expected bool
}
func TestPermutations(t *testing.T) {
b := []v{
v{[]byte("123.456"), true},
v{[]byte("123"), true},
v{[]byte("123."), true},
v{[]byte(".123"), true},
v{[]byte("12.1e12"), true},
v{[]byte("12.1e-12"), true},
v{[]byte("-123.456"), true},
v{[]byte("-123"), true},
v{[]byte("-123."), true},
v{[]byte("-.123"), true},
v{[]byte("-12.1e12"), true},
v{[]byte("-12.1e-12"), true},
v{[]byte(".1e-12"), true},
v{[]byte(".e-12"), false},
v{[]byte(".e"), false},
v{[]byte("e"), false},
v{[]byte("abcdef"), false},
v{[]byte("-"), false},
v{[]byte("."), false},
}
for _, test := range b {
ok := Validate(test.Input)
if ok != test.Expected {
t.Errorf("could not handle case %s", test.Input)
}
}
}
and perform quite well on the original benchmark:
BenchmarkValidate-8 100000000 13.0 ns/op 0 B/op 0 allocs/op
Benchmark code:
func BenchmarkValidate(b *testing.B) {
p := []byte("15.1234567890")
for i := 0; i < b.N; i++ {
ok := Validate(p)
if !ok {
log.Fatalf("problem")
}
}
}
Related
Given below code
package main
import (
"fmt"
"sort"
"strings"
)
func main() {
s := []string{"eat", "tea", "tan", "ate", "nat", "bat"}
result := groupAnagrams(s)
fmt.Println(result)
}
func groupAnagrams(s []string) (out [][]string) {
tmp := map[string][]string{}
for _, v := range s {
x := strings.Split(v, "")
sort.Strings(x)
anagram := strings.Join(x, "")
items, ok := tmp[anagram]
if ok {
items = append(items, v)
tmp[anagram] = items
continue
}
tmp[anagram] = []string{v}
}
var keys []string
for key := range tmp {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
sort.Strings(tmp[key])
out = append(out, tmp[key])
}
return
}
And its tests here https://play.golang.org/p/k8F1-FAC_au
can you help figuring the complexity ?
In my understanding, and without checking thoroughly the documentation.
for _, v := range s { // o(n)
sort.Strings(keys) //o(log n)
x := strings.Split(v, "") / anagram := strings.Join(x, "") //o(n)
Are those correct ? Am i missing some ? How to compute the total ?
Do you account for total allocations when computing the complexity of a code ?
(not an answer, more like a formatted comment)
You get to choose what counts as "1 operation".
For example : in your for _, v := range s { ... } loop, I wouldn't count the processing of one single v value :
x := strings.Split(v, "")
sort.Strings(x)
anagram := strings.Join(x, "")
items, ok := tmp[anagram]
if ok {
items = append(items, v)
tmp[anagram] = items
continue
}
tmp[anagram] = []string{v}
as "1 operation". More like something that depends on len(v).
So the length of the items in your starting set will probably appear in your end formula.
this is not an answer, but, little insight as to anyone else having to deal with such things. may that help you.
I slightly revised stuff here and their, then gave a shot to a Godel-inspired scheme as described at https://stackoverflow.com/a/396077/11892070
-- main.go --
package main
import (
"fmt"
"sort"
"strings"
)
func main() {
input := []string{"tan", "nat", "⌘", "日本語", "語日本"}
freq := map[rune]int{}
for _, word := range input {
x, err := hashWord(word, freq)
fmt.Println(word, "=>", x, "err=", err)
}
}
func groupAnagramsUsingSort(s []string, tmp map[string][]string, out [][]string) [][]string {
for k := range tmp {
delete(tmp, k)
}
for i := 0; i < len(out); i++ {
out[i] = out[i][:0]
}
out = out[:0]
for _, v := range s {
x := strings.Split(v, "")
sort.Strings(x)
anagram := strings.Join(x, "")
items, ok := tmp[anagram]
if ok {
items = append(items, v)
tmp[anagram] = items
continue
}
tmp[anagram] = []string{v}
}
for key := range tmp {
out = append(out, tmp[key])
}
return out
}
func groupAnagramsUsingHash(s []string, tmp map[int][]string, out [][]string) [][]string {
for k := range tmp {
delete(tmp, k)
}
for i := 0; i < len(out); i++ {
out[i] = out[i][:0]
}
out = out[:0]
freq := map[rune]int{}
for _, v := range s {
hash, _ := hashWord(v, freq)
items, ok := tmp[hash]
if ok {
items = append(items, v)
tmp[hash] = items
continue
}
tmp[hash] = []string{v}
}
for key := range tmp {
out = append(out, tmp[key])
}
return out
}
var primes = []int{2, 41, 37, 47, 3, 67, 71, 23, 5, 101, 61, 17, 19, 13, 31, 43, 97, 29, 11, 7, 73, 83, 79, 89, 59, 53}
var ErrNonASCII = fmt.Errorf("non ascii letter detected")
func getFrequencyMap(word string, freq map[rune]int) (map[rune]int, error) {
for k := range freq {
delete(freq, k)
}
for _, r := range word {
if r-97 < 0 || int(r-97) > len(primes) {
return nil, ErrNonASCII
}
x := freq[r]
freq[r] = x + 1
}
return freq, nil
}
func hashWord(word string, freq map[rune]int) (int, error) {
var err error
freq, err = getFrequencyMap(word, freq)
if err != nil {
return -1, err
}
product := 1
for letter, r := range freq {
product = product * primes[letter-97]
for e := 1; e < r; e++ {
product = product * product
}
}
return product, nil
}
-- main_test.go --
package main
import (
"reflect"
"sort"
"testing"
)
type expectation struct {
input []string
want [][]string
}
var expectations = []expectation{
expectation{
input: []string{"eat", "tea", "tan", "ate", "nat", "bat"},
want: [][]string{
[]string{"ate", "eat", "tea"},
[]string{"bat"},
[]string{"nat", "tan"},
},
},
expectation{
input: []string{"eaft", "tea", "taen", "ate", "nate", "batf"},
want: [][]string{
[]string{"batf"},
[]string{"eaft"},
[]string{"tea", "ate"},
[]string{"taen", "nate"},
},
},
expectation{
input: []string{""},
want: [][]string{
[]string{""},
},
},
expectation{
input: []string{"a"},
want: [][]string{
[]string{"a"},
},
},
}
func TestUsingSort(t *testing.T) {
tmp := map[string][]string{}
out := [][]string{}
for _, expectation := range expectations {
out = groupAnagramsUsingSort(expectation.input, tmp, out)
if len(out) != len(expectation.want) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
for i := 0; i < len(out); i++ {
sort.Strings(out[i])
sort.Strings(expectation.want[i])
}
sort.Slice(out, func(i int, j int) bool {
return len(out[i]) < len(out[j])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return len(expectation.want[i]) < len(expectation.want[j])
})
sort.Slice(out, func(i int, j int) bool {
return (len(out[i]) > 0 &&
len(out[j]) > 0 &&
out[i][0] < out[j][0])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return (len(expectation.want[i]) > 0 &&
len(expectation.want[j]) > 0 &&
expectation.want[i][0] < expectation.want[j][0])
})
for i := 0; i < len(out); i++ {
if !reflect.DeepEqual(out[i], expectation.want[i]) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
}
}
}
func TestUsingHash(t *testing.T) {
tmp := map[int][]string{}
out := [][]string{}
for _, expectation := range expectations {
out = groupAnagramsUsingHash(expectation.input, tmp, out)
if len(out) != len(expectation.want) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
for i := 0; i < len(out); i++ {
sort.Strings(out[i])
sort.Strings(expectation.want[i])
}
sort.Slice(out, func(i int, j int) bool {
return len(out[i]) < len(out[j])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return len(expectation.want[i]) < len(expectation.want[j])
})
sort.Slice(out, func(i int, j int) bool {
return (len(out[i]) > 0 &&
len(out[j]) > 0 &&
out[i][0] < out[j][0])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return (len(expectation.want[i]) > 0 &&
len(expectation.want[j]) > 0 &&
expectation.want[i][0] < expectation.want[j][0])
})
for i := 0; i < len(out); i++ {
if !reflect.DeepEqual(out[i], expectation.want[i]) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
}
}
}
func BenchmarkUsingSort(b *testing.B) {
tmp := map[string][]string{}
out := [][]string{}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, expectation := range expectations {
out = groupAnagramsUsingSort(expectation.input, tmp, out)
_ = out
}
}
}
func BenchmarkUsingHash(b *testing.B) {
tmp := map[int][]string{}
out := [][]string{}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, expectation := range expectations {
out = groupAnagramsUsingHash(expectation.input, tmp, out)
_ = out
}
}
}
Benchmark result
$ go test -bench=. -v .
=== RUN TestUsingSort
--- PASS: TestUsingSort (0.00s)
=== RUN TestUsingHash
--- PASS: TestUsingHash (0.00s)
goos: linux
goarch: amd64
BenchmarkUsingSort
BenchmarkUsingSort-4 344438 3315 ns/op 787 B/op 29 allocs/op
BenchmarkUsingHash
BenchmarkUsingHash-4 410810 2911 ns/op 496 B/op 17 allocs/op
PASS
ok _/home/clementauger/tmp 2.408s
I'm benchmarking unmarshaling from string to int and uint with this code:
package main
import (
"strconv"
"testing"
)
func BenchmarkUnmarshalInt(b *testing.B) {
for i := 0; i < b.N; i++ {
UnmarshalInt("123456")
}
}
func BenchmarkUnmarshalUint(b *testing.B) {
for i := 0; i < b.N; i++ {
UnmarshalUint("123456")
}
}
func UnmarshalInt(v string) int {
i, _ := strconv.Atoi(v)
return i
}
func UnmarshalUint(v string) uint {
i, _ := strconv.ParseUint(v, 10, 64)
return uint(i)
}
Result:
Running tool: C:\Go\bin\go.exe test -benchmem -run=^$ myBench/main -bench .
goos: windows
goarch: amd64
pkg: myBench/main
BenchmarkUnmarshalInt-8 99994166 11.7 ns/op 0 B/op 0 allocs/op
BenchmarkUnmarshalUint-8 54550413 21.0 ns/op 0 B/op 0 allocs/op
Is it possible that the second (uint) is almost twice as slow as the first (int)?
Yes, it's possible. strconv.Atoi has a fast path when the input string length is less than 19 (or 10 if int is 32 bit). This allows it to be a lot faster because it doesn't need to check for overflow.
If you change your test number to "1234567890123456789" (assuming 64 bit int), then your int benchmark is slightly slower than the uint benchmark because the fast path can't be used. On my machine, it takes 37.6 ns/op for the signed version vs 31.5 ns/op for the unsigned version.
Here's the modified benchmark code (note I added a variable that sums up the parsed results, just in case the compiler got clever and optimized it away).
package main
import (
"fmt"
"strconv"
"testing"
)
const X = "1234567890123456789"
func BenchmarkUnmarshalInt(b *testing.B) {
var T int
for i := 0; i < b.N; i++ {
T += UnmarshalInt(X)
}
fmt.Println(T)
}
func BenchmarkUnmarshalUint(b *testing.B) {
var T uint
for i := 0; i < b.N; i++ {
T += UnmarshalUint(X)
}
fmt.Println(T)
}
func UnmarshalInt(v string) int {
i, _ := strconv.Atoi(v)
return i
}
func UnmarshalUint(v string) uint {
i, _ := strconv.ParseUint(v, 10, 64)
return uint(i)
}
For reference, the code for strconv.Atoi in the standard library is currently as follows:
func Atoi(s string) (int, error) {
const fnAtoi = "Atoi"
sLen := len(s)
if intSize == 32 && (0 < sLen && sLen < 10) ||
intSize == 64 && (0 < sLen && sLen < 19) {
// Fast path for small integers that fit int type.
s0 := s
if s[0] == '-' || s[0] == '+' {
s = s[1:]
if len(s) < 1 {
return 0, &NumError{fnAtoi, s0, ErrSyntax}
}
}
n := 0
for _, ch := range []byte(s) {
ch -= '0'
if ch > 9 {
return 0, &NumError{fnAtoi, s0, ErrSyntax}
}
n = n*10 + int(ch)
}
if s0[0] == '-' {
n = -n
}
return n, nil
}
// Slow path for invalid, big, or underscored integers.
i64, err := ParseInt(s, 10, 0)
if nerr, ok := err.(*NumError); ok {
nerr.Func = fnAtoi
}
return int(i64), err
}
I tested simple string concatenation in Go with “+” and bytes.Buffer (both “WriteString” and “Write(bytes)”. The result shows that “+” is much slower than the other two, which makes sense.
However, when I use the three ways to implement Fibonacci-like string concatenation (i.e. a, b, ab, bab, abbab, bababbab, abbabbababbab), “+” performs the best. The sample codes and the benchmarking results are shown as follows.
String “+”
func Fibonacci(n int) string {
FiboResult := ""
prev_result := "a"
next_result := "b"
if n == 1{
FiboResult = "a"
}else if n == 2 {
FiboResult = "b"
}else{
for i := 3; i <= n; i++ {
FiboResult = prev_result + next_result
prev_result = next_result
next_result = FiboResult
}
}
return FiboResult
}
bytes.Buffer (WriteString)
func Fibonacci(n int) bytes.Buffer {
var FiboResult bytes.Buffer
var prev_result bytes.Buffer
prev_result.WriteString("a")
var next_result bytes.Buffer
next_result.WriteString("b")
if n == 1{
FiboResult.WriteString("a")
}else if n == 2 {
FiboResult.WriteString("b")
}else{
for i := 3; i <= n; i++ {
FiboResult.Reset()
FiboResult.WriteString(prev_result.String())
FiboResult.WriteString(next_result.String())
prev_result.Reset()
prev_result.WriteString(next_result.String())
next_result.Reset()
next_result.WriteString(FiboResult.String())
}
}
return FiboResult
}
the benchmarking results
I believe it is the overhead of bytes.Buffer.String() that make this happen. But I could not figure out how to use bytes.Buffer correctly in this case. Or how could I modify my code to avoid the problem? Hints, sample codes, or explanations are all appreciated. Many thanks in advance!
In Go, use the testing package for benchmarks.
Write reasonably efficient Go functions. Don't perform unnecessary conversions. Minimize allocations and copies. And so on. Allow for non-ASCII characters, for example Chinese characters. Allow for strings of more than one character. Consider using a byte slice. For example,
func fibonacciN(n int) uint64 {
f := uint64(0)
a, b := uint64(0), uint64(1)
for i := 0; i < n; i++ {
f, a, b = a, b, a+b
if a > b {
break
}
}
return f
}
func Fibonacci(a, b string, n int) string {
if n < 0 {
n = 0
}
switch n {
case 0:
return ""
case 1:
return a
case 2:
return b
}
f := make([]byte, len(a)*int(fibonacciN(n-1))+len(b)*int(fibonacciN(n)))
ab := a + b
copy(f[len(f)-len(ab):], ab)
for i := 4; i <= n; i++ {
end := len(f) - (len(a)*int(fibonacciN(i-3)) + len(b)*int(fibonacciN(i-2)))
start := len(f) - (len(a)*int(fibonacciN(i-1)) + len(b)*int(fibonacciN(i)))
copy(f[start:end], f[end:])
}
return string(f)
}
Benchmark functions. For example, with n = 20,
$ go test fib_test.go -bench=. -benchmem
goos: linux
goarch: amd64
BenchmarkPeterSO-8 1000000 1851 ns/op 13568 B/op 2 allocs/op
BenchmarkPlus-8 500000 2493 ns/op 18832 B/op 18 allocs/op
BenchmarkBuffer-8 100000 12773 ns/op 90256 B/op 60 allocs/op
PASS
$
fib_test.go:
package main
import (
"bytes"
"testing"
)
var benchN = 20
func fibonacciN(n int) uint64 {
f := uint64(0)
a, b := uint64(0), uint64(1)
for i := 0; i < n; i++ {
f, a, b = a, b, a+b
if a > b {
break
}
}
return f
}
func FibonacciPeterSO(a, b string, n int) string {
if n < 0 {
n = 0
}
switch n {
case 0:
return ""
case 1:
return a
case 2:
return b
}
f := make([]byte, len(a)*int(fibonacciN(n-1))+len(b)*int(fibonacciN(n)))
ab := a + b
copy(f[len(f)-len(ab):], ab)
for i := 4; i <= n; i++ {
end := len(f) - (len(a)*int(fibonacciN(i-3)) + len(b)*int(fibonacciN(i-2)))
start := len(f) - (len(a)*int(fibonacciN(i-1)) + len(b)*int(fibonacciN(i)))
copy(f[start:end], f[end:])
}
return string(f)
}
func BenchmarkPeterSO(b *testing.B) {
for i := 0; i < b.N; i++ {
FibonacciPeterSO("a", "b", benchN)
}
}
func FibonacciPlus(n int) string {
FiboResult := ""
prev_result := "a"
next_result := "b"
if n == 1 {
FiboResult = "a"
} else if n == 2 {
FiboResult = "b"
} else {
for i := 3; i <= n; i++ {
FiboResult = prev_result + next_result
prev_result = next_result
next_result = FiboResult
}
}
return FiboResult
}
func BenchmarkPlus(b *testing.B) {
for i := 0; i < b.N; i++ {
FibonacciPlus(benchN)
}
}
func FibonacciBuffer(n int) bytes.Buffer {
var FiboResult bytes.Buffer
var prev_result bytes.Buffer
prev_result.WriteString("a")
var next_result bytes.Buffer
next_result.WriteString("b")
if n == 1 {
FiboResult.WriteString("a")
} else if n == 2 {
FiboResult.WriteString("b")
} else {
for i := 3; i <= n; i++ {
FiboResult.Reset()
FiboResult.WriteString(prev_result.String())
FiboResult.WriteString(next_result.String())
prev_result.Reset()
prev_result.WriteString(next_result.String())
next_result.Reset()
next_result.WriteString(FiboResult.String())
}
}
return FiboResult
}
func BenchmarkBuffer(b *testing.B) {
for i := 0; i < b.N; i++ {
FibonacciBuffer(benchN)
}
}
var testN = benchN
func TestPeterSO(t *testing.T) {
for n := 0; n <= testN; n++ {
got := FibonacciPeterSO("a", "b", n)
want := FibonacciPlus(n)
if want != got {
t.Errorf("want: %s got: %s", want, got)
}
}
}
bytes.Buffer (or the newer and faster strings.Builder) wins over simple + string concatenation if you want to append "many" values, and obtain the result once in the end, because intermediate allocations are not needed compared to using + multiple times.
And you are not using bytes.Buffer this way: you just write one string into it, and you obtain its content and you reset it. That's just a roundtrip which turns out to be an overhead.
The problem here is that generating the Fibonacci string you are seeking, that requires prepending text to the buffer, not appending to it. And bytes.Buffer only supports appending to it, so using it like this is not a good fit at all.
Generating reverse with bytes.Buffer
Note that a prepend operation is basically an append operation if you generate the reverse of a string. Which means if we first would generate the reverse of the result, we could use bytes.Buffer to perform an append when otherwise a prepend would be needed. Of course the appended string would have to also be the reverse of what otherwise would be prepended.
And of course when we're done, we have to reverse the result to get what we originally wanted.
Also note that when building result in an iterative way, the successive intermediate result is the concatenation of the previous and the one before that. So to obtain the nth result, we can simply append the substring of what we already have! This is a nice optimization.
Here's how it would look like:
func FibonacciReverseBuf(n int) string {
switch n {
case 0:
return ""
case 1:
return "a"
case 2:
return "b"
}
prev, prev2 := 1, 1
buf := bytes.NewBufferString("ba")
for i := 3; i < n; i++ {
buf.Write(buf.Bytes()[:buf.Len()-prev2])
prev2, prev = prev, prev+prev2
}
// Reverse
b := buf.Bytes()
for i, j := 0, len(b)-1; i < j; i, j = i+1, j-1 {
b[i], b[j] = b[j], b[i]
}
return string(b)
}
Generating reverse with []byte and append()
Also note that since we're only appending, we can just as easily use a []byte and use the builtin append() function:
func FibonacciReverse(n int) string {
switch n {
case 0:
return ""
case 1:
return "a"
case 2:
return "b"
}
prev, prev2 := 1, 1
b := []byte("ba")
for i := 3; i < n; i++ {
b = append(b, b[:len(b)-prev2]...)
prev2, prev = prev, prev+prev2
}
// Reverse
for i, j := 0, len(b)-1; i < j; i, j = i+1, j-1 {
b[i], b[j] = b[j], b[i]
}
return string(b)
}
Preallocating and using copy() in a single []byte
Still, using append() may cause reallocations, because we don't know how big the buffer (the result) will be. So we start with a small buffer, and append() will increase it as needed. Also append() requires slice value (slice header) assignments. And we also have to reverse the result.
A much faster solution would be to get rid of those cons.
First let's calculate how big the result will be (this is essentially calculating the Fibonacci numbers), and allocate the required byte slice in one step.
If we do so, we can do the "prepend" operations by copying parts of our buffer (which is a []byte) to specific positions. So no append(), no reallocations, no reversing.
This is how it will look like:
func Fibonacci(n int) string {
switch n {
case 0:
return ""
case 1:
return "a"
case 2:
return "b"
}
fibs := make([]int, n)
fibs[0], fibs[1] = 1, 1
for i := 2; i < n; i++ {
fibs[i] = fibs[i-1] + fibs[i-2]
}
l := fibs[n-1]
b := make([]byte, l)
b[l-2], b[l-1] = 'a', 'b'
for i := 3; i < n; i++ {
copy(b[l-fibs[i]:], b[l-fibs[i-2]:])
}
return string(b)
}
Testing the output
To test if the above functions give the result we expect them to give, we may use the following testing function:
func TestFibonacci(t *testing.T) {
cases := []struct {
n int
exp string
}{
{0, ""},
{1, "a"},
{2, "b"},
{3, "ab"},
{4, "bab"},
{5, "abbab"},
{6, "bababbab"},
{7, "abbabbababbab"},
}
funcs := []struct {
name string
f func(int) string
}{
{"FibonacciReverseBuf", FibonacciReverseBuf},
{"FibonacciReverse", FibonacciReverse},
{"Fibonacci", Fibonacci},
}
for _, c := range cases {
for _, f := range funcs {
if got := f.f(c.n); got != c.exp {
t.Errorf("%s: Expected: %s, got: %s, n: %d",
f.name, c.exp, got, c.n)
}
}
}
}
Benchmark results
Benchmarking with n = 20:
BenchmarkFibonacciReverseBuf-4 200000 10739 ns/op 18024 B/op 10 allocs/op
BenchmarkFibonacciReverse-4 100000 13208 ns/op 28864 B/op 10 allocs/op
BenchmarkFibonacci-4 500000 3383 ns/op 13728 B/op 3 allocs/op
BenchmarkPeterSO-4 300000 4417 ns/op 13568 B/op 2 allocs/op
BenchmarkPlus-4 200000 6072 ns/op 18832 B/op 18 allocs/op
BenchmarkBuffer-4 50000 29608 ns/op 90256 B/op 60 allocs/op
We can see that this use of bytes.Buffer was much better than yours. Still, using concatenation was faster, because there aren't many concatenations here, they are small ones, and that doesn't require reversing in the end.
On the other hand my Fibonacci() solution outperformed all other presented solutions.
func Benchmark_foreach1(b *testing.B) {
var test map[int]int
test = make(map[int]int)
for i := 0; i < 100000; i++ {
test[i] = 1
}
for i := 0; i < b.N; i++ {
for i, _ := range test {
if test[i] != 1 {
panic("ds")
}
}
}
}
func Benchmark_foreach2(b *testing.B) {
var test map[int]int
test = make(map[int]int)
for i := 0; i < 100000; i++ {
test[i] = 1
}
for i := 0; i < b.N; i++ {
for _, v := range test {
if v != 1 {
panic("heh")
}
}
}
}
run with result as below
goos: linux
goarch: amd64
Benchmark_foreach1-2 500 3172323 ns/op
Benchmark_foreach2-2 1000 1707214 ns/op
why is foreach-2 slow?
I think Benchmark_foreach2-2 is about 2 times faster - it requires 1707214 nanoseconds per operation, and first one takes 3172323. So second one is 3172323 / 1707214 = 1.85 times faster.
Reason: second doesn't need to take value from a memory again, it already used value in v variable.
The test[k] statement in BenchmarkForeachK takes time to randomly read the value, so BenchmarkForeachK takes more time than BenchmarkForeachV, 9362945 ns/op versus 4213940 ns/op .
For example,
package main
import "testing"
func testMap() map[int]int {
test := make(map[int]int)
for i := 0; i < 100000; i++ {
test[i] = 1
}
return test
}
func BenchmarkForeachK(b *testing.B) {
test := testMap()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for k := range test {
if test[k] != 1 {
panic("eh")
}
}
}
}
func BenchmarkForeachV(b *testing.B) {
test := testMap()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, v := range test {
if v != 1 {
panic("heh")
}
}
}
}
Output:
$ go test foreach_test.go -bench=.
BenchmarkForeachK-4 200 9362945 ns/op 0 B/op 0 allocs/op
BenchmarkForeachV-4 300 4213940 ns/op 0 B/op 0 allocs/op
I want to check if a string is numeric.
For example:
"abcd123" should return false.
"1.4" or "240" should return true.
I thought about using ParseInt and ParseFloat (from the strconv package), but am not sure if that is the right way.
I was thinking of using strconv ParseInt and ParseFloat but not sure
if that is the right way.
Well, it's certainly a right way.
You don't need to use ParseInt, though. ParseFloat will do the job.
func isNumeric(s string) bool {
_, err := strconv.ParseFloat(s, 64)
return err == nil
}
See an example here: https://play.golang.org/p/D53HRS-KIL
If you need to convert the string to a floating-point number strconv.ParseFloat is the first choice.
Here you just need to know that there is only "0123456789" and maximum one '.' in your string, here for me isNumDot is 12x faster than isNumeric, see:
Consider this (1.7 seconds) - optimized for performance:
func isNumDot(s string) bool {
dotFound := false
for _, v := range s {
if v == '.' {
if dotFound {
return false
}
dotFound = true
} else if v < '0' || v > '9' {
return false
}
}
return true
}
and this (21.7 seconds - doing more extra works "converts the string to a floating-point number"):
func isNumeric(s string) bool {
_, err := strconv.ParseFloat(s, 64)
return err == nil
}
Try it:
package main
import (
"fmt"
"strconv"
"time"
)
func isNumDot(s string) bool {
dotFound := false
for _, v := range s {
if v == '.' {
if dotFound {
return false
}
dotFound = true
} else if v < '0' || v > '9' {
return false
}
}
return true
}
func isNumeric(s string) bool {
_, err := strconv.ParseFloat(s, 64)
return err == nil
}
func main() {
fmt.Println(isNumDot("240")) //true
fmt.Println(isNumDot("abcd123")) //false
fmt.Println(isNumDot("0.4.")) //false
fmt.Println(isNumDot("240 ")) //false
benchmark(isNumDot)
benchmark(isNumeric)
}
func benchmark(f func(string) bool) {
var res bool
t := time.Now()
for i := 0; i < 100000000; i++ {
res = f("a 240") || f("abcd123") || f("0.4.") || f("240 ")
}
fmt.Println(time.Since(t))
fmt.Println(res)
}
output:
true
false
false
false
1.7822s
false
21.723s
false
Using the benchmark (isNumDot is faster than isNumeric):
BenchmarkIsNumDot-8 34117197 31.2 ns/op 0 B/op 0 allocs/op
BenchmarkIsNumeric-8 1931089 630 ns/op 192 B/op 4 allocs/op
// r = isNumDot("2.22")
BenchmarkIsNumDot-8 102849996 11.4 ns/op 0 B/op 0 allocs/op
BenchmarkIsNumeric-8 21994874 48.5 ns/op 0 B/op 0 allocs/op
// r = isNumDot("a 240")
BenchmarkIsNumDot-8 256610877 4.58 ns/op 0 B/op 0 allocs/op
BenchmarkIsNumeric-8 8962381 140 ns/op 48 B/op 1 allocs/op
The benchmark:
package main
import (
"testing"
)
var r bool
func BenchmarkIsNumDot(b *testing.B) {
for i := 0; i < b.N; i++ {
r = isNumDot("a 240") || isNumDot("abcd123") || isNumDot("0.4.") || isNumDot("240 ")
}
}
func BenchmarkIsNumeric(b *testing.B) {
for i := 0; i < b.N; i++ {
r = isNumeric("a 240") || isNumeric("abcd123") || isNumeric("0.4.") || isNumeric("240 ")
}
}
I tried to comment on Adrian's answer but I guess I don't have enough reputation points. Building on his excellent response, here is a variation using PCRE. Some brief explanation on the symbols if you are unfamiliar with regular expressions:
"^" matches the start of input (i.e. beginning of your string)
"$" matches the end of input (i.e. the end of your string)
"()" are grouping operators
"*" matches 0 or more
"+" matches 1 or more
"?" matches exactly 0 or 1
"\d" is a character class which represents the character values 0 through 9
So, the following would require at least a leading 0, permit "0.", and everything else that is normally identified as a floating point value. You can experiment with this a bit.
func isFloat(s string) bool {
return regexp.MatchString(`^\d+(\.\d*)?$`, s)
}
Naturally, if you are calling this function to validate data, it should be cleaned:
str := strings.TrimSpace(someString)
if isFloat(str) {
...
}
That only works on ASCII characters. If you are dealing with UTF8 or another multi-byte character set (MBCS), it can be done with regexp but more work would be required and perhaps another approach altogether.
You can use the strconv.Atoi function for check integer values, and the strconv.ParseFloat for float values. Below is an example:
package main
import (
"fmt"
"strconv"
)
func main() {
v1 := "14"
if _, err := strconv.Atoi(v1); err == nil {
fmt.Printf("%q looks like a number.\n", v1)
} else {
fmt.Printf("%q is not a number.\n", v1)
}
v2 := "1.4"
if _, err := strconv.ParseFloat(v2, 64); err == nil {
fmt.Printf("%q looks like a float.\n", v2)
} else {
fmt.Printf("%q is not a float.\n", v2)
}
}
/* Output:
"14" looks like a number.
"1.4" looks like a float.
*/
You can check it on the Go Playground.
All the answers are valid, but there's another option not yet suggested:
re := regexp.MustCompile(`^[0-9]+(\.[0-9]+)?$`)
isNum := re.Match([]byte("ab123"))
Playground demo
I hit this in a high-throughput system today and did a benchmark of the three suggestions. Results:
BenchmarkNumDot-4: 657966132: 18.2 ns/op
BenchmarkNumeric-4: 49575919: 226 ns/op
BenchmarkRegexp-4: 18817201: 628 ns/op
Code follows since the playground does not support benchmarking.
package main
import (
"regexp"
"strconv"
"testing"
)
func BenchmarkNumDot(b *testing.B) {
for i := 0; i < b.N; i++ {
isNumDot("abc")
isNumDot("123")
isNumDot("12.34")
isNumDot("1.2.3.4")
}
}
func BenchmarkNumeric(b *testing.B) {
for i := 0; i < b.N; i++ {
isNumeric("abc")
isNumeric("123")
isNumeric("12.34")
isNumeric("1.2.3.4")
}
}
func BenchmarkRegexp(b *testing.B) {
re := regexp.MustCompile(`^[0-9]+(\.[0-9]+)?$`)
for i := 0; i < b.N; i++ {
isNumReg("abc", re)
isNumReg("123", re)
isNumReg("12.34", re)
isNumReg("1.2.3.4", re)
}
}
func isNumDot(s string) bool {
dotFound := false
for _, v := range s {
if v == '.' {
if dotFound {
return false
}
dotFound = true
} else if v < '0' || v > '9' {
return false
}
}
return true
}
func isNumeric(s string) bool {
_, err := strconv.ParseFloat(s, 64)
return err == nil
}
func isNumReg(s string, re *regexp.Regexp) bool {
return re.Match([]byte(s))
}