I just read some short tutorials of Go and wrote a simple program sieve. Sieve uses sieve algorithm to print all the prime number that is smaller than 10000, which create a lot of go routines. I got the correct results but the program is very slow (5 seconds on my machine). I also wrote lua script and python script which implemented the same algorithm, and runs a lot faster (both are about 1 second on my machine).
Note that the purpose is to have idea of go routine's performance compared with coroutine in other languages, for example lua. The implementation is very inefficient, some comments pointed out that it's not correct way to implement Sieve of Eratosthenes. Yes, that's intentional. Some other replies pointed out that slowness is caused by print I/O. So I commented out print lines.
My question is why my sieve program implemented in Go is so slow?
Here is the code:
package main
import (
"fmt"
"sync"
)
type Sieve struct {
id int;
msg_queue chan int;
wg *sync.WaitGroup;
}
func NewSieve(id int) *Sieve {
sieve := new(Sieve)
sieve.id = id
sieve.msg_queue = make(chan int)
sieve.wg = new(sync.WaitGroup)
sieve.wg.Add(1)
return sieve
}
func (sieve *Sieve) run() {
defer sieve.wg.Done()
myprime := <-sieve.msg_queue
if myprime == 0 {
return
}
// fmt.Printf("Sieve (%d) is for prime number %d.\n", sieve.id, myprime)
next_sieve := NewSieve(sieve.id + 1)
go next_sieve.run()
for {
number := <-sieve.msg_queue
if number == 0 {
next_sieve.msg_queue <- number;
next_sieve.wg.Wait()
return
} else if number % myprime != 0 {
// fmt.Printf("id: %d, number: %d, myprime: %d, number mod myprime: %d\n", sieve.id, number, myprime, number % myprime)
next_sieve.msg_queue <- number
}
}
}
func driver() {
first := NewSieve(2)
go first.run()
for n := 2; n <= 10000; n++ {
first.msg_queue <- n
}
first.msg_queue <- 0
first.wg.Wait()
}
func main() {
driver()
}
As a comparison, here is the code of sieve.lua
function sieve(id)
local myprime = coroutine.yield()
// print(string.format("Sieve (%d) is for prime number %d", id, myprime))
local next_sieve = coroutine.create(sieve)
coroutine.resume(next_sieve, id + 1)
while true do
local number = coroutine.yield()
if number % myprime ~= 0 then
// print(string.format("id: %d, number: %d, myprime: %d, number mod myprime: %d", id, number, myprime, number % myprime))
coroutine.resume(next_sieve, number)
end
end
end
function driver()
local first = coroutine.create(sieve)
coroutine.resume(first, 2)
local n
for n = 2, 10000 do
coroutine.resume(first, n)
end
end
driver()
Meaningless microbenchmarks produce meaningless results.
You are timing print I/O.
You are incurring go routine and channel overhead for a small amount of work.
Here is a prime number sieve program in Go.
Output:
$ go version
go version devel +46be01f4e0 Sun Oct 13 01:48:30 2019 +0000 linux/amd64
$ go build sumprimes.go && time ./sumprimes
5736396
29.96µs
real 0m0.001s
user 0m0.001s
sys 0m0.000s
sumprimes.go:
package main
import (
"fmt"
"time"
)
const (
prime = 0x00
notprime = 0xFF
)
func oddPrimes(n uint64) (sieve []uint8) {
sieve = make([]uint8, (n+1)/2)
sieve[0] = notprime
p := uint64(3)
for i := p * p; i <= n; i = p * p {
for j := i; j <= n; j += 2 * p {
sieve[j/2] = notprime
}
for p += 2; sieve[p/2] == notprime; p += 2 {
}
}
return sieve
}
func sumPrimes(n uint64) uint64 {
sum := uint64(0)
if n >= 2 {
sum += 2
}
for i, p := range oddPrimes(n) {
if p == prime {
sum += 2*uint64(i) + 1
}
}
return sum
}
func main() {
start := time.Now()
var n uint64 = 10000
sum := sumPrimes(n)
fmt.Println(sum)
fmt.Println(time.Since(start))
}
Most of the time is spent in fmt.Printf.
Taking out the line:
fmt.Printf("id: %d, number: %d, myprime: %d, number mod myprime: %d\n", sieve.id, number, myprime, number%myprime)
reduces runtime from ~5.4 seconds to ~0.64 seconds on one test I ran.
Taking out the unnecessary sync.WaitGroups reduces the time a bit further, to ~0.48 seconds. See the version without sync.WaitGroup here. You're still doing a lot of channel operations, which languages with yield-value-from-coroutine operators do not need (though they have their own issues instead). This is not a good way to implement primality testing.
Related
For this task i need to find min sum in list of numbers. Then i must print number that have min sum. This must be done with Mutex and WaitGroups. I can't find where is the mistake or why is output different.
Logic: Scanf n and make vector with len(n). Then create funcion for sum of number and forward that function to second where we in one FOR cycle give goroutines function to.
I run this code a few times, and sometimes give different answer for same input.
Input:
3
13
12
11
Output:
Sometimes 12
Sometimes 11
package main
import (
"fmt"
"math"
"runtime"
"sync"
)
var wg sync.WaitGroup
var mutex sync.Mutex
var vector []int
var i int
var n int
var firstsum int
var p int //Temp sum
var index_result int
func sumanajmanjih(broj int) int {
var br int
var suma int
br = int(math.Abs(float64(broj)))
suma = 0
for {
suma += br % 10
br = br / 10
if br <= 0 {
break
}
}
return suma
}
func glavna(rg int) {
var index int
firstsum = sumanajmanjih(vector[0])
for {
mutex.Lock()
if i == n {
mutex.Unlock()
break
} else {
index = i
i += 1
mutex.Unlock()
}
fmt.Printf("Procesor %d radni indeks %d\n", rg, index)
p = sumanajmanjih(vector[index])
if p < firstsum {
firstsum = p
index_result = index
}
}
wg.Done()
}
func main() {
fmt.Scanf("%d", &n)
vector = make([]int, n)
for i := 0; i < n; i++ {
fmt.Scanf("%d", &vector[i])
}
fmt.Println(vector)
brojGR := runtime.NumCPU()
wg.Add(brojGR)
for rg := 0; rg < brojGR; rg++ {
go glavna(rg)
}
wg.Wait()
fmt.Println(vector[index_result])
}
Not a full answer to your question, but a few suggestions to make code more readable and stable:
Use English language for names - glavna, brojGR are hard to understand
Add comments to code explaining intent
Try to avoid shared/global variables, especially for concurrent code. glavna(rg) is executed concurrently, and you assign global i and p inside that function, that is a race condition. Sends all the data in and out into function explicitly as argument or function result.
Mutex easily can lock the code, and it is complicated to debug. Simplify its usage. Often defer mutex.Unlock() in the next line after Lock() is good enough.
So I implemented the following prime finding algorithm in go.
primes = []
Assume all numbers are primes (vacuously true)
check = 2
if check is still assumed to be prime append it to primes
multiply check by each prime less than or equal to its minimum factor and
eliminate results from assumed primes.
increment check by 1 and repeat 4 thru 6 until check > limit.
Here is my serial implementation:
package main
import(
"fmt"
"time"
)
type numWithMinFactor struct {
number int
minfactor int
}
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check numWithMinFactor,primes []int,top int,minFactors []numWithMinFactor){
var n int
for i:=0;primes[i]<=check.minfactor;i++{
n = check.number*primes[i]
if n>top{
break;
}
minFactors[n] = numWithMinFactor{n,primes[i]}
if i+1 == len(primes){
break;
}
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]numWithMinFactor,top+2)
check := 2
for power:=1;check <= top;power++{
if minFactors[check].number == 0{
primes = append(primes,check)
minFactors[check] = numWithMinFactor{check,check}
}
process(minFactors[check],primes,top,minFactors)
check++
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(1000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
This runs great producing all the primes <1,000,000 in about 63ms (mostly printing) and primes <10,000,000 in 600ms on my pc. Now I figure none of the numbers check such that 2^n < check <= 2^(n+1) have factors > 2^n so I can do all the multiplications and elimination for each check in that range in parallel once I have primes up to 2^n. And my parallel implementation is as follows:
package main
import(
"fmt"
"time"
"sync"
)
type numWithMinFactor struct {
number int
minfactor int
}
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check numWithMinFactor,primes []int,top int,minFactors []numWithMinFactor, wg *sync.WaitGroup){
defer wg.Done()
var n int
for i:=0;primes[i]<=check.minfactor;i++{
n = check.number*primes[i]
if n>top{
break;
}
minFactors[n] = numWithMinFactor{n,primes[i]}
if i+1 == len(primes){
break;
}
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]numWithMinFactor,top+2)
check := 2
var wg sync.WaitGroup
for power:=1;check <= top;power++{
for check <= pow(2,power){
if minFactors[check].number == 0{
primes = append(primes,check)
minFactors[check] = numWithMinFactor{check,check}
}
wg.Add(1)
go process(minFactors[check],primes,top,minFactors,&wg)
check++
if check>top{
break;
}
}
wg.Wait()
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(1000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
Unfortunately not only is this implementation slower running up to 1,000,000 in 600ms and up to 10 million in 6 seconds. My intuition tells me that there is potential for parallelism to improve performance however I clearly haven't been able to achieve that and would greatly appreciate any input on how to improve runtime here, or more specifically any insight as to why the parallel solution is slower.
Additionally the parallel solution consumes more memory relative to the serial solution but that is to be expected; the serial solution can grid up to 1,000,000,000 in about 22 seconds where the parallel solution runs out of memory on my system (32GB ram) going for the same target. But I'm asking about runtime here not memory use, I could for example use the zero value state of the minFactors array rather than a separate isPrime []bool true state but I think it is more readable as is.
I've tried passing a pointer for primes []int but that didn't seem to make a difference, using a channel instead of passing the minFactors array to the process function resulted in big time memory use and a much(10x ish) slower performance. I've re-written this algo a couple times to see if I could iron anything out but no luck. Any insights or suggestions would be much appreciated because I think parallelism could make this faster not 10x slower!
Par #Volker's suggestion I limited the number of processes to somthing less than my pc's available logical processes with the following revision however I am still getting runtimes that are 10x slower than the serial implementation.
package main
import(
"fmt"
"time"
"sync"
)
type numWithMinFactor struct {
number int
minfactor int
}
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check numWithMinFactor,primes []int,top int,minFactors []numWithMinFactor, wg *sync.WaitGroup){
defer wg.Done()
var n int
for i:=0;primes[i]<=check.minfactor;i++{
n = check.number*primes[i]
if n>top{
break;
}
minFactors[n] = numWithMinFactor{n,primes[i]}
if i+1 == len(primes){
break;
}
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]numWithMinFactor,top+2)
check := 2
nlogicalProcessors := 20
var wg sync.WaitGroup
var twoPow int
for power:=1;check <= top;power++{
twoPow = pow(2,power)
for check <= twoPow{
for nLogicalProcessorsInUse := 0 ; nLogicalProcessorsInUse < nlogicalProcessors; nLogicalProcessorsInUse++{
if minFactors[check].number == 0{
primes = append(primes,check)
minFactors[check] = numWithMinFactor{check,check}
}
wg.Add(1)
go process(minFactors[check],primes,top,minFactors,&wg)
check++
if check>top{
break;
}
if check>twoPow{
break;
}
}
wg.Wait()
if check>top{
break;
}
}
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(10000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
tldr; Why is my parallel implementation slower than serial implementation how do I make it faster?
Par #mh-cbon's I made larger jobs for parallel processing resulting in the following code.
package main
import(
"fmt"
"time"
"sync"
)
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check int,primes []int,top int,minFactors []int){
var n int
for i:=0;primes[i]<=minFactors[check];i++{
n = check*primes[i]
if n>top{
break;
}
minFactors[n] = primes[i]
if i+1 == len(primes){
break;
}
}
}
func processRange(start int,end int,primes []int,top int,minFactors []int, wg *sync.WaitGroup){
defer wg.Done()
for start <= end{
process(start,primes,top,minFactors)
start++
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]int,top+2)
check := 2
nlogicalProcessors := 10
var wg sync.WaitGroup
var twoPow int
var start int
var end int
var stepSize int
var stepsTaken int
for power:=1;check <= top;power++{
twoPow = pow(2,power)
stepSize = (twoPow-start)/nlogicalProcessors
stepsTaken = 0
stepSize = (twoPow/2)/nlogicalProcessors
for check <= twoPow{
start = check
end = check+stepSize
if stepSize == 0{
end = twoPow
}
if stepsTaken == nlogicalProcessors-1{
end = twoPow
}
if end>top {
end = top
}
for check<=end {
if minFactors[check] == 0{
primes = append(primes,check)
minFactors[check] = check
}
check++
}
wg.Add(1)
go processRange(start,end,primes,top,minFactors,&wg)
if check>top{
break;
}
if check>twoPow{
break;
}
stepsTaken++
}
wg.Wait()
if check>top{
break;
}
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(1000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
This runs at a similar speed to the serial implementation.
So I did eventually get a parallel version of the code to run slightly faster than the serial version. following suggestions from #mh-cbon (See above). However this implementation did not result in vast improvements relative to the serial implementation (50ms to 10 million compared to 75ms serially) Considering that allocating and writing an []int 0:10000000 takes 25ms I'm not disappointed by these results. As #Volker stated "such stuff often is not limited by CPU but by memory bandwidth." which I believe is the case here.
I would still love to see any additional improvements however I am somewhat satisfied with what I've gained here.
Serial code running up to 2 billion 19.4 seconds
Parallel code running up to 2 billion 11.1 seconds
Initializing []int{0:2Billion} 4.5 seconds
I have a small golang program that calculates the ith fibonnaci number, however it appears to overflow for some numbers large numbers, even when the array is changed to a type of int64. Why is this happening?
package main
import "fmt"
func main() {
fib(555) //prints a negative number
}
func fib(num int) {
queue := []int{0, 1}
for i := 0; i < num; i++ {
next := queue[0] + queue[1]
queue[0] = queue[1]
queue[1] = next
}
fmt.Println(queue[len(queue)-1])
}
The Fibonacci sequence gets very large, very fast. You need to use the math/big package in order to calculate integers this large. Translating your algorithm gives us:
queue := []*big.Int{big.NewInt(0), big.NewInt(1)}
for i := 0; i < num; i++ {
next := new(big.Int).Add(queue[0], queue[1])
queue[0] = queue[1]
queue[1] = next
}
or more concisely:
for i := 0; i < num; i++ {
queue[0].Add(queue[0], queue[1])
queue[0], queue[1] = queue[1], queue[0]
}
https://play.golang.org/p/udIITdDPfrY
Which will output the following number with 555 as the input:
70411399558423479787498867358975911087747238266614004739546108921832817803452035228895708644544982403856194431208467
(this is off by 1 from the expected 555th Fibonacci number, since it's 0 indexed)
Because the 555th fibonacci number is
43516638122555047989641805373140394725407202037260729735885664398655775748034950972577909265605502785297675867877570
which is much too large even for an int64.
How can I generate a stream of unique random number in Go?
I want to guarantee there are no duplicate values in array a using math/rand and/or standard Go library utilities.
func RandomNumberGenerator() *rand.Rand {
s1 := rand.NewSource(time.Now().UnixNano())
r1 := rand.New(s1)
return r1
}
rng := RandomNumberGenerator()
N := 10000
for i := 0; i < N; i++ {
a[i] = rng.Int()
}
There are questions and solutions on how to generate a series of random number in Go, for example, here.
But I would like to generate a series of random numbers that does not duplicate previous values. Is there a standard/recommended way to achieve this in Go?
My guess is to (1) use permutation or to (2) keep track of previously generated numbers and regenerate a value if it's been generated before.
But solution (1) sounds like overkill if I only want a few number and (2) sounds very time consuming if I end up generating a long series of random numbers due to collision, and I guess it's also very memory-consuming.
Use Case: To benchmark a Go program with 10K, 100K, 1M pseudo-random number that has no duplicates.
You should absolutely go with approach 2. Let's assume you're running on a 64-bit machine, and thus generating 63-bit integers (64 bits, but rand.Int never returns negative numbers). Even if you generate 4 billion numbers, there's still only a 1 in 4 billion chance that any given number will be a duplicate. Thus, you'll almost never have to regenerate, and almost never never have to regenerate twice.
Try, for example:
type UniqueRand struct {
generated map[int]bool
}
func (u *UniqueRand) Int() int {
for {
i := rand.Int()
if !u.generated[i] {
u.generated[i] = true
return i
}
}
}
I had similar task to pick elements from initial slice by random uniq index. So from slice with 10k elements get 1k random uniq elements.
Here is simple head on solution:
import (
"time"
"math/rand"
)
func getRandomElements(array []string) []string {
result := make([]string, 0)
existingIndexes := make(map[int]struct{}, 0)
randomElementsCount := 1000
for i := 0; i < randomElementsCount; i++ {
randomIndex := randomIndex(len(array), existingIndexes)
result = append(result, array[randomIndex])
}
return result
}
func randomIndex(size int, existingIndexes map[int]struct{}) int {
rand.Seed(time.Now().UnixNano())
for {
randomIndex := rand.Intn(size)
_, exists := existingIndexes[randomIndex]
if !exists {
existingIndexes[randomIndex] = struct{}{}
return randomIndex
}
}
}
I see two reasons for wanting this. You want to test a random number generator, or you want unique random numbers.
You're Testing A Random Number Generator
My first question is why? There's plenty of solid random number generators available. Don't write your own, it's basically dabbling in cryptography and that's never a good idea. Maybe you're testing a system that uses a random number generator to generate random output?
There's a problem: there's no guarantee random numbers are unique. They're random. There's always a possibility of collision. Testing that random output is unique is incorrect.
Instead, you want to test the results are distributed evenly. To do this I'll reference another answer about how to test a random number generator.
You Want Unique Random Numbers
From a practical perspective you don't need guaranteed uniqueness, but to make collisions so unlikely that it's not a concern. This is what UUIDs are for. They're 128 bit Universally Unique IDentifiers. There's a number of ways to generate them for particular scenarios.
UUIDv4 is basically just a 122 bit random number which has some ungodly small chance of a collision. Let's approximate it.
n = how many random numbers you'll generate
M = size of the keyspace (2^122 for a 122 bit random number)
P = probability of collision
P = n^2/2M
Solving for n...
n = sqrt(2MP)
Setting P to something absurd like 1e-12 (one in a trillion), we find you can generate about 3.2 trillion UUIDv4s with a 1 in a trillion chance of collision. You're 1000 times more likely to win the lottery than have a collision in 3.2 trillion UUIDv4s. I think that's acceptable.
Here's a UUIDv4 library in Go to use and a demonstration of generating 1 million unique random 128 bit values.
package main
import (
"fmt"
"github.com/frankenbeanies/uuid4"
)
func main() {
for i := 0; i <= 1000000; i++ {
uuid := uuid4.New().Bytes()
// use the uuid
}
}
you can generate a unique random number with len(12) using UnixNano in golang time package :
uniqueNumber:=time.Now().UnixNano()/(1<<22)
println(uniqueNumber)
it's always random :D
1- Fast positive and negative int32 unique pseudo random numbers in 296ms using std lib:
package main
import (
"fmt"
"math/rand"
"time"
)
func main() {
const n = 1000000
rand.Seed(time.Now().UTC().UnixNano())
duplicate := 0
mp := make(map[int32]struct{}, n)
var r int32
t := time.Now()
for i := 0; i < n; {
r = rand.Int31()
if i&1 == 0 {
r = -r
}
if _, ok := mp[r]; ok {
duplicate++
} else {
mp[r] = zero
i++
}
}
fmt.Println(time.Since(t))
fmt.Println("len: ", len(mp))
fmt.Println("duplicate: ", duplicate)
positive := 0
for k := range mp {
if k > 0 {
positive++
}
}
fmt.Println(`n=`, n, `positive=`, positive)
}
var zero = struct{}{}
output:
296.0169ms
len: 1000000
duplicate: 118
n= 1000000 positive= 500000
2- Just fill the map[int32]struct{}:
for i := int32(0); i < n; i++ {
m[i] = zero
}
When reading it is not in order in Go:
for k := range m {
fmt.Print(k, " ")
}
And this just takes 183ms for 1000000 unique numbers, no duplicate (The Go Playground):
package main
import (
"fmt"
"time"
)
func main() {
const n = 1000000
m := make(map[int32]struct{}, n)
t := time.Now()
for i := int32(0); i < n; i++ {
m[i] = zero
}
fmt.Println(time.Since(t))
fmt.Println("len: ", len(m))
// for k := range m {
// fmt.Print(k, " ")
// }
}
var zero = struct{}{}
3- Here is the simple but slow (this takes 22s for 200000 unique numbers), so you may generate and save it to a file once:
package main
import "time"
import "fmt"
import "math/rand"
func main() {
dup := 0
t := time.Now()
const n = 200000
rand.Seed(time.Now().UTC().UnixNano())
var a [n]int32
var exist bool
for i := 0; i < n; {
r := rand.Int31()
exist = false
for j := 0; j < i; j++ {
if a[j] == r {
dup++
fmt.Println(dup)
exist = true
break
}
}
if !exist {
a[i] = r
i++
}
}
fmt.Println(time.Since(t))
}
Temporary workaround based on #joshlf's answer
type UniqueRand struct {
generated map[int]bool //keeps track of
rng *rand.Rand //underlying random number generator
scope int //scope of number to be generated
}
//Generating unique rand less than N
//If N is less or equal to 0, the scope will be unlimited
//If N is greater than 0, it will generate (-scope, +scope)
//If no more unique number can be generated, it will return -1 forwards
func NewUniqueRand(N int) *UniqueRand{
s1 := rand.NewSource(time.Now().UnixNano())
r1 := rand.New(s1)
return &UniqueRand{
generated: map[int]bool{},
rng: r1,
scope: N,
}
}
func (u *UniqueRand) Int() int {
if u.scope > 0 && len(u.generated) >= u.scope {
return -1
}
for {
var i int
if u.scope > 0 {
i = u.rng.Int() % u.scope
}else{
i = u.rng.Int()
}
if !u.generated[i] {
u.generated[i] = true
return i
}
}
}
Client side code
func TestSetGet2(t *testing.T) {
const N = 10000
for _, mask := range []int{0, -1, 0x555555, 0xaaaaaa, 0x333333, 0xcccccc, 0x314159} {
rng := NewUniqueRand(2*N)
a := make([]int, N)
for i := 0; i < N; i++ {
a[i] = (rng.Int() ^ mask) << 1
}
//Benchmark Code
}
}
I imported the math library in my program, and I was trying to find the minimum of three numbers in the following way:
v1[j+1] = math.Min(v1[j]+1, math.Min(v0[j+1]+1, v0[j]+cost))
where v1 is declared as:
t := "stackoverflow"
v1 := make([]int, len(t)+1)
However, when I run my program I get the following error:
./levenshtein_distance.go:36: cannot use int(v0[j + 1] + 1) (type int) as type float64 in argument to math.Min
I thought it was weird because I have another program where I write
fmt.Println(math.Min(2,3))
and that program outputs 2 without complaining.
so I ended up casting the values as float64, so that math.Min could work:
v1[j+1] = math.Min(float64(v1[j]+1), math.Min(float64(v0[j+1]+1), float64(v0[j]+cost)))
With this approach, I got the following error:
./levenshtein_distance.go:36: cannot use math.Min(int(v1[j] + 1), math.Min(int(v0[j + 1] + 1), int(v0[j] + cost))) (type float64) as type int in assignment
so to get rid of the problem, I just casted the result back to int
I thought this was extremely inefficient and hard to read:
v1[j+1] = int(math.Min(float64(v1[j]+1), math.Min(float64(v0[j+1]+1), float64(v0[j]+cost))))
I also wrote a small minInt function, but I think this should be unnecessary because the other programs that make use of math.Min work just fine when taking integers, so I concluded this has to be a problem of my program and not the library per se.
Is there anything that I'm doing terrible wrong?
Here's a program that you can use to reproduce the issues above, line 36 specifically:
package main
import (
"math"
)
func main() {
LevenshteinDistance("stackoverflow", "stackexchange")
}
func LevenshteinDistance(s string, t string) int {
if s == t {
return 0
}
if len(s) == 0 {
return len(t)
}
if len(t) == 0 {
return len(s)
}
v0 := make([]int, len(t)+1)
v1 := make([]int, len(t)+1)
for i := 0; i < len(v0); i++ {
v0[i] = i
}
for i := 0; i < len(s); i++ {
v1[0] = i + 1
for j := 0; j < len(t); j++ {
cost := 0
if s[i] != t[j] {
cost = 1
}
v1[j+1] = int(math.Min(float64(v1[j]+1), math.Min(float64(v0[j+1]+1), float64(v0[j]+cost))))
}
for j := 0; j < len(v0); j++ {
v0[j] = v1[j]
}
}
return v1[len(t)]
}
Until Go 1.18 a one-off function was the standard way; for example, the stdlib's sort.go does it near the top of the file:
func min(a, b int) int {
if a < b {
return a
}
return b
}
You might still want or need to use this approach so your code works on Go versions below 1.18!
Starting with Go 1.18, you can write a generic min function which is just as efficient at run time as the hand-coded single-type version, but works with any type with < and > operators:
func min[T constraints.Ordered](a, b T) T {
if a < b {
return a
}
return b
}
func main() {
fmt.Println(min(1, 2))
fmt.Println(min(1.5, 2.5))
fmt.Println(min("Hello", "世界"))
}
There's been discussion of updating the stdlib to add generic versions of existing functions, but if that happens it won't be until a later version.
math.Min(2, 3) happened to work because numeric constants in Go are untyped. Beware of treating float64s as a universal number type in general, though, since integers above 2^53 will get rounded if converted to float64.
There is no built-in min or max function for integers, but it’s simple to write your own. Thanks to support for variadic functions we can even compare more integers with just one call:
func MinOf(vars ...int) int {
min := vars[0]
for _, i := range vars {
if min > i {
min = i
}
}
return min
}
Usage:
MinOf(3, 9, 6, 2)
Similarly here is the max function:
func MaxOf(vars ...int) int {
max := vars[0]
for _, i := range vars {
if max < i {
max = i
}
}
return max
}
For example,
package main
import "fmt"
func min(x, y int) int {
if x < y {
return x
}
return y
}
func main() {
t := "stackoverflow"
v0 := make([]int, len(t)+1)
v1 := make([]int, len(t)+1)
cost := 1
j := 0
v1[j+1] = min(v1[j]+1, min(v0[j+1]+1, v0[j]+cost))
fmt.Println(v1[j+1])
}
Output:
1
Though the question is quite old, maybe my package imath can be helpful for someone who does not like reinventing a bicycle. There are few functions, finding minimal of two integers: ix.Min (for int), i8.Min (for int8), ux.Min (for uint) and so on. The package can be obtained with go get, imported in your project by URL and functions referred as typeabbreviation.FuncName, for example:
package main
import (
"fmt"
"<Full URL>/go-imath/ix"
)
func main() {
a, b := 45, -42
fmt.Println(ix.Min(a, b)) // Output: -42
}
As the accepted answer states, with the introduction of generics in go 1.18 it's now possible to write a generic function that provides min/max for different numeric types (there is not one built into the language). And with variadic arguments we can support comparing 2 elements or a longer list of elements.
func Min[T constraints.Ordered](args ...T) T {
min := args[0]
for _, x := range args {
if x < min {
min = x
}
}
return min
}
func Max[T constraints.Ordered](args ...T) T {
max := args[0]
for _, x := range args {
if x > max {
max = x
}
}
return max
}
example calls:
Max(1, 2) // 2
Max(4, 5, 3, 1, 2) // 5
Could use https://github.com/pkg/math:
import (
"fmt"
"github.com/pkg/math"
)
func main() {
a, b := 45, -42
fmt.Println(math.Min(a, b)) // Output: -42
}
Since the issue has already been resolved, I would like to add a few words. Always remember that the math package in Golang operates on float64. You can use type conversion to cast int into a float64. Keep in mind to account for type ranges. For example, you cannot fit a float64 into an int16 if the number exceeds the limit for int16 which is 32767. Last but not least, if you convert a float into an int in Golang, the decimal points get truncated without any rounding.
If you want the minimum of a set of N integers you can use (assuming N > 0):
import "sort"
func min(set []int) int {
sort.Slice(set, func(i, j int) bool {
return set[i] < set[j]
})
return set[0]
}
Where the second argument to min function is your less function, that is, the function that decides when an element i of the passed slice is less than an element j
Check it out here in Go Playground: https://go.dev/play/p/lyQYlkwKrsA