I am trying to write a program that pauses for random intervals of time that are decimal numbers.
Here is the program that is not working:
package main
import (
"fmt"
"math/rand"
"time"
)
var test int
func main() {
intervalGenerate()
}
func intervalGenerate() {
var randint float64
rand.Seed(time.Now().UnixNano())
randInterval := randFloats(3, 7, 1)
randint = randInterval[0]
duration := time.Duration(randint) * time.Second
fmt.Println("Sleeping for", duration)
time.Sleep(duration)
fmt.Println("Resuming")
}
func randFloats(min, max float64, n int) []float64 {
res := make([]float64, n)
for i := range res {
res[i] = min + rand.Float64()*(max-min)
}
return res
}
Currently a random number is generated between 3 and 7 (including decimals) but Sleep rounds to the nearest round number.
From what I understand the reason this is failing is because Sleep takes Duration, which is an Int64:
func Sleep(d Duration)
Is there a way to sleep a program for fractions of a second?
Go Playground:
https://play.golang.org/p/z-dnDBnUfxr
Use time.Millisecond, time.Microsecond, or time.Nanosecond depending on the level of granularity you need.
// sleep for 2.5 seconds
milliseconds := 2500
time.Sleep(time.Duration(milliseconds) * time.Millisecond)
Related
So I implemented the following prime finding algorithm in go.
primes = []
Assume all numbers are primes (vacuously true)
check = 2
if check is still assumed to be prime append it to primes
multiply check by each prime less than or equal to its minimum factor and
eliminate results from assumed primes.
increment check by 1 and repeat 4 thru 6 until check > limit.
Here is my serial implementation:
package main
import(
"fmt"
"time"
)
type numWithMinFactor struct {
number int
minfactor int
}
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check numWithMinFactor,primes []int,top int,minFactors []numWithMinFactor){
var n int
for i:=0;primes[i]<=check.minfactor;i++{
n = check.number*primes[i]
if n>top{
break;
}
minFactors[n] = numWithMinFactor{n,primes[i]}
if i+1 == len(primes){
break;
}
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]numWithMinFactor,top+2)
check := 2
for power:=1;check <= top;power++{
if minFactors[check].number == 0{
primes = append(primes,check)
minFactors[check] = numWithMinFactor{check,check}
}
process(minFactors[check],primes,top,minFactors)
check++
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(1000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
This runs great producing all the primes <1,000,000 in about 63ms (mostly printing) and primes <10,000,000 in 600ms on my pc. Now I figure none of the numbers check such that 2^n < check <= 2^(n+1) have factors > 2^n so I can do all the multiplications and elimination for each check in that range in parallel once I have primes up to 2^n. And my parallel implementation is as follows:
package main
import(
"fmt"
"time"
"sync"
)
type numWithMinFactor struct {
number int
minfactor int
}
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check numWithMinFactor,primes []int,top int,minFactors []numWithMinFactor, wg *sync.WaitGroup){
defer wg.Done()
var n int
for i:=0;primes[i]<=check.minfactor;i++{
n = check.number*primes[i]
if n>top{
break;
}
minFactors[n] = numWithMinFactor{n,primes[i]}
if i+1 == len(primes){
break;
}
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]numWithMinFactor,top+2)
check := 2
var wg sync.WaitGroup
for power:=1;check <= top;power++{
for check <= pow(2,power){
if minFactors[check].number == 0{
primes = append(primes,check)
minFactors[check] = numWithMinFactor{check,check}
}
wg.Add(1)
go process(minFactors[check],primes,top,minFactors,&wg)
check++
if check>top{
break;
}
}
wg.Wait()
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(1000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
Unfortunately not only is this implementation slower running up to 1,000,000 in 600ms and up to 10 million in 6 seconds. My intuition tells me that there is potential for parallelism to improve performance however I clearly haven't been able to achieve that and would greatly appreciate any input on how to improve runtime here, or more specifically any insight as to why the parallel solution is slower.
Additionally the parallel solution consumes more memory relative to the serial solution but that is to be expected; the serial solution can grid up to 1,000,000,000 in about 22 seconds where the parallel solution runs out of memory on my system (32GB ram) going for the same target. But I'm asking about runtime here not memory use, I could for example use the zero value state of the minFactors array rather than a separate isPrime []bool true state but I think it is more readable as is.
I've tried passing a pointer for primes []int but that didn't seem to make a difference, using a channel instead of passing the minFactors array to the process function resulted in big time memory use and a much(10x ish) slower performance. I've re-written this algo a couple times to see if I could iron anything out but no luck. Any insights or suggestions would be much appreciated because I think parallelism could make this faster not 10x slower!
Par #Volker's suggestion I limited the number of processes to somthing less than my pc's available logical processes with the following revision however I am still getting runtimes that are 10x slower than the serial implementation.
package main
import(
"fmt"
"time"
"sync"
)
type numWithMinFactor struct {
number int
minfactor int
}
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check numWithMinFactor,primes []int,top int,minFactors []numWithMinFactor, wg *sync.WaitGroup){
defer wg.Done()
var n int
for i:=0;primes[i]<=check.minfactor;i++{
n = check.number*primes[i]
if n>top{
break;
}
minFactors[n] = numWithMinFactor{n,primes[i]}
if i+1 == len(primes){
break;
}
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]numWithMinFactor,top+2)
check := 2
nlogicalProcessors := 20
var wg sync.WaitGroup
var twoPow int
for power:=1;check <= top;power++{
twoPow = pow(2,power)
for check <= twoPow{
for nLogicalProcessorsInUse := 0 ; nLogicalProcessorsInUse < nlogicalProcessors; nLogicalProcessorsInUse++{
if minFactors[check].number == 0{
primes = append(primes,check)
minFactors[check] = numWithMinFactor{check,check}
}
wg.Add(1)
go process(minFactors[check],primes,top,minFactors,&wg)
check++
if check>top{
break;
}
if check>twoPow{
break;
}
}
wg.Wait()
if check>top{
break;
}
}
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(10000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
tldr; Why is my parallel implementation slower than serial implementation how do I make it faster?
Par #mh-cbon's I made larger jobs for parallel processing resulting in the following code.
package main
import(
"fmt"
"time"
"sync"
)
func pow(base int, power int) int{
result := 1
for i:=0;i<power;i++{
result*=base
}
return result
}
func process(check int,primes []int,top int,minFactors []int){
var n int
for i:=0;primes[i]<=minFactors[check];i++{
n = check*primes[i]
if n>top{
break;
}
minFactors[n] = primes[i]
if i+1 == len(primes){
break;
}
}
}
func processRange(start int,end int,primes []int,top int,minFactors []int, wg *sync.WaitGroup){
defer wg.Done()
for start <= end{
process(start,primes,top,minFactors)
start++
}
}
func findPrimes(top int) []int{
primes := []int{}
minFactors := make([]int,top+2)
check := 2
nlogicalProcessors := 10
var wg sync.WaitGroup
var twoPow int
var start int
var end int
var stepSize int
var stepsTaken int
for power:=1;check <= top;power++{
twoPow = pow(2,power)
stepSize = (twoPow-start)/nlogicalProcessors
stepsTaken = 0
stepSize = (twoPow/2)/nlogicalProcessors
for check <= twoPow{
start = check
end = check+stepSize
if stepSize == 0{
end = twoPow
}
if stepsTaken == nlogicalProcessors-1{
end = twoPow
}
if end>top {
end = top
}
for check<=end {
if minFactors[check] == 0{
primes = append(primes,check)
minFactors[check] = check
}
check++
}
wg.Add(1)
go processRange(start,end,primes,top,minFactors,&wg)
if check>top{
break;
}
if check>twoPow{
break;
}
stepsTaken++
}
wg.Wait()
if check>top{
break;
}
}
return primes
}
func main(){
fmt.Println("Welcome to prime finder!")
start := time.Now()
fmt.Println(findPrimes(1000000))
elapsed := time.Since(start)
fmt.Println("Finding primes took %s", elapsed)
}
This runs at a similar speed to the serial implementation.
So I did eventually get a parallel version of the code to run slightly faster than the serial version. following suggestions from #mh-cbon (See above). However this implementation did not result in vast improvements relative to the serial implementation (50ms to 10 million compared to 75ms serially) Considering that allocating and writing an []int 0:10000000 takes 25ms I'm not disappointed by these results. As #Volker stated "such stuff often is not limited by CPU but by memory bandwidth." which I believe is the case here.
I would still love to see any additional improvements however I am somewhat satisfied with what I've gained here.
Serial code running up to 2 billion 19.4 seconds
Parallel code running up to 2 billion 11.1 seconds
Initializing []int{0:2Billion} 4.5 seconds
I am running a go concurrent program with the below two case and observed that It is taking same time irrespective of no of CPU it using while execution.
Case1: When cpuUsed = 1
program took 3m20.973185s.
when I am increasing the no of CPU used.
Case2: when cpuUsed = 8
program took 3m20.9330516s.
Please find the below Go code for more details.
package main
import (
"fmt"
"math/rand"
"runtime"
"sync"
"time"
)
var waitG sync.WaitGroup
var cpuUsed = 1
var maxRandomNums = 1000
func init() {
maxCPU := runtime.NumCPU() //It'll give us the max CPU :)
cpuUsed = 8 //getting same time taken for 1 and 8
runtime.GOMAXPROCS(cpuUsed)
fmt.Printf("Number of CPUs (Total=%d - Used=%d) \n", maxCPU, cpuUsed)
}
func main() {
start := time.Now()
ids := []string{"rotine1", "routine2", "routine3", "routine4"}
waitG.Add(4)
for i := range ids {
go numbers(ids[i])
}
waitG.Wait()
elapsed := time.Since(start)
fmt.Printf("\nprogram took %s. \n", elapsed)
}
func numbers(id string) {
rand.Seed(time.Now().UnixNano())
for i := 1; i <= maxRandomNums; i++ {
time.Sleep(200 * time.Millisecond)
fmt.Printf("%s-%d ", id, rand.Intn(20)+20)
}
waitG.Done()
}
you will find out:
total time (3 min 20s) = 200s = sleep(200ms) * loops(1000)
Let's simplify your code and focus on CPU usage:
Remove the Sleep, which does not use the CPU at all
fmt.Println as a stdio, does not use the CPU
Random number did nothing but introduce uncertainty into the program, remove it
The only code that takes CPU in the goroutine is the "rand.Intn(20)+20", making it a constant addition
Increase the "maxRandomNums"
then your code will be like this, run it again
package main
import (
"fmt"
"runtime"
"sync"
"time"
)
var waitG sync.WaitGroup
var cpuUsed = 1
var maxRandomNums = 1000000000
func init() {
maxCPU := runtime.NumCPU() //It'll give us the max CPU :)
cpuUsed = 8 //getting same time taken for 1 and 8
runtime.GOMAXPROCS(cpuUsed)
fmt.Printf("Number of CPUs (Total=%d - Used=%d) \n", maxCPU, cpuUsed)
}
func main() {
start := time.Now()
ids := []string{"rotine1", "routine2", "routine3", "routine4"}
waitG.Add(4)
for i := range ids {
go numbers(ids[i])
}
waitG.Wait()
elapsed := time.Since(start)
fmt.Printf("\nprogram took %s. \n", elapsed)
}
func numbers(id string) {
// rand.Seed(time.Now().UnixNano())
for i := 1; i <= maxRandomNums; i++ {
// time.Sleep(200 * time.Millisecond)
// fmt.Printf("%s-%d ", id, rand.Intn(20)+20)
_ = i + 20
}
waitG.Done()
}
I have successfully made a synchronous solution without goroutines to findMax of compute calls.
package main
import (
"context"
"fmt"
"math/rand"
"time"
)
func findMax(ctx context.Context, concurrency int) uint64 {
var (
max uint64 = 0
num uint64 = 0
)
for i := 0; i < concurrency; i++ {
num = compute()
if num > max {
max = num
}
}
return max
}
func compute() uint64 {
// NOTE: This is a MOCK implementation of the blocking operation.
time.Sleep(time.Duration(rand.Int63n(100)) * time.Millisecond)
return rand.Uint64()
}
func main() {
maxDuration := 2 * time.Second
concurrency := 10
ctx, cancel := context.WithTimeout(context.Background(), maxDuration)
defer cancel()
max := findMax(ctx, concurrency)
fmt.Println(max)
}
https://play.golang.org/p/lYXRNTDtNCI
When I attempt to use goroutines to use findMax to repeatedly call compute function using as many goroutines until context ctx is canceled by the caller main function. I am getting 0 every time and not the expected max of the grouting compute function calls. I have tried different ways to do it and get deadlock most of the time.
package main
import (
"context"
"fmt"
"math/rand"
"time"
)
func findMax(ctx context.Context, concurrency int) uint64 {
var (
max uint64 = 0
num uint64 = 0
)
for i := 0; i < concurrency; i++ {
select {
case <- ctx.Done():
return max
default:
go func() {
num = compute()
if num > max {
max = num
}
}()
}
}
return max
}
func compute() uint64 {
// NOTE: This is a MOCK implementation of the blocking operation.
time.Sleep(time.Duration(rand.Int63n(100)) * time.Millisecond)
return rand.Uint64()
}
func main() {
maxDuration := 2 * time.Second
concurrency := 10
ctx, cancel := context.WithTimeout(context.Background(), maxDuration)
defer cancel()
max := findMax(ctx, concurrency)
fmt.Println(max)
}
https://play.golang.org/p/3fFFq2xlXAE
Your program has multiple problems:
You are spawning multiple goroutines that are operating on shared variables i.e., max and num leading to data race as they are not protected (eg. by Mutex).
Here num is modified by every worker goroutine but it should have been local to the worker otherwise the computed data could be lost (eg. one worker goroutine computed a result and stored it in num, but right after that a second worker computes and replaces the value of num).
num = compute // Should be "num := compute"
You are not waiting for every goroutine to finish it's computation and it may result in incorrect results as every workers computation wasn't taken into account even though context wasn't cancelled. Use sync.WaitGroup or channels to fix this.
Here's a sample program that addresses most of the issues in your code:
package main
import (
"context"
"fmt"
"math/rand"
"sync"
"time"
)
type result struct {
sync.RWMutex
max uint64
}
func findMax(ctx context.Context, workers int) uint64 {
var (
res = result{}
wg = sync.WaitGroup{}
)
for i := 0; i < workers; i++ {
select {
case <-ctx.Done():
// RLock to read res.max
res.RLock()
ret := res.max
res.RUnlock()
return ret
default:
wg.Add(1)
go func() {
defer wg.Done()
num := compute()
// Lock so that read from res.max and write
// to res.max is safe. Else, data race could
// occur.
res.Lock()
if num > res.max {
res.max = num
}
res.Unlock()
}()
}
}
// Wait for all the goroutine to finish work i.e., all
// workers are done computing and updating the max.
wg.Wait()
return res.max
}
func compute() uint64 {
rnd := rand.Int63n(100)
time.Sleep(time.Duration(rnd) * time.Millisecond)
return rand.Uint64()
}
func main() {
maxDuration := 2 * time.Second
concurrency := 10
ctx, cancel := context.WithTimeout(context.Background(), maxDuration)
defer cancel()
fmt.Println(findMax(ctx, concurrency))
}
As #Brits pointed out in the comments that when context is cancelled make sure that you stop those worker goroutines to stop processing (if possible) because it is not needed anymore.
I tried to compute the integral concurrently, but my program ended up being slower than computing the integral with a normal for loop. What am I doing wrong?
package main
import (
"fmt"
"math"
"sync"
"time"
)
type Result struct {
result float64
lock sync.RWMutex
}
var wg sync.WaitGroup
var result Result
func main() {
now := time.Now()
a := 0.0
b := 1.0
n := 100000.0
deltax := (b - a) / n
wg.Add(int(n))
for i := 0.0; i < n; i++ {
go f(a, deltax, i)
}
wg.Wait()
fmt.Println(deltax * result.result)
fmt.Println(time.Now().Sub(now))
}
func f(a float64, deltax float64, i float64) {
fx := math.Sqrt(a + deltax * (i + 0.5))
result.lock.Lock()
result.result += fx
result.lock.Unlock()
wg.Done()
}
3- For performance gain, you may divide tasks per CPU cores without using lock sync.RWMutex:
+30x Optimizations using channels and runtime.NumCPU(), this takes 2ms on 2 Cores and 993µs on 8 Cores, while Your sample code takes 61ms on 2 Cores and 40ms on 8 Cores:
See this working sample code and outputs:
package main
import (
"fmt"
"math"
"runtime"
"time"
)
func main() {
nCPU := runtime.NumCPU()
fmt.Println("nCPU =", nCPU)
ch := make(chan float64, nCPU)
startTime := time.Now()
a := 0.0
b := 1.0
n := 100000.0
deltax := (b - a) / n
stepPerCPU := n / float64(nCPU)
for start := 0.0; start < n; {
stop := start + stepPerCPU
go f(start, stop, a, deltax, ch)
start = stop
}
integral := 0.0
for i := 0; i < nCPU; i++ {
integral += <-ch
}
fmt.Println(time.Now().Sub(startTime))
fmt.Println(deltax * integral)
}
func f(start, stop, a, deltax float64, ch chan float64) {
result := 0.0
for i := start; i < stop; i++ {
result += math.Sqrt(a + deltax*(i+0.5))
}
ch <- result
}
Output on 2 Cores:
nCPU = 2
2.0001ms
0.6666666685900485
Output on 8 Cores:
nCPU = 8
993µs
0.6666666685900456
Your sample code, Output on 2 Cores:
0.6666666685900424
61.0035ms
Your sample code, Output on 8 Cores:
0.6666666685900415
40.9964ms
2- For good benchmark statistics, use large number of samples (big n):
As you See here using 2 Cores this takes 110ms on 2 Cores, but on this same CPU
using 1 Core this takes 215ms with n := 10000000.0:
With n := 10000000.0 and single goroutine, see this working sample code:
package main
import (
"fmt"
"math"
"time"
)
func main() {
now := time.Now()
a := 0.0
b := 1.0
n := 10000000.0
deltax := (b - a) / n
result := 0.0
for i := 0.0; i < n; i++ {
result += math.Sqrt(a + deltax*(i+0.5))
}
fmt.Println(time.Now().Sub(now))
fmt.Println(deltax * result)
}
Output:
215.0123ms
0.6666666666685884
With n := 10000000.0 and 2 goroutines, see this working sample code:
package main
import (
"fmt"
"math"
"runtime"
"time"
)
func main() {
nCPU := runtime.NumCPU()
fmt.Println("nCPU =", nCPU)
ch := make(chan float64, nCPU)
startTime := time.Now()
a := 0.0
b := 1.0
n := 10000000.0
deltax := (b - a) / n
stepPerCPU := n / float64(nCPU)
for start := 0.0; start < n; {
stop := start + stepPerCPU
go f(start, stop, a, deltax, ch)
start = stop
}
integral := 0.0
for i := 0; i < nCPU; i++ {
integral += <-ch
}
fmt.Println(time.Now().Sub(startTime))
fmt.Println(deltax * integral)
}
func f(start, stop, a, deltax float64, ch chan float64) {
result := 0.0
for i := start; i < stop; i++ {
result += math.Sqrt(a + deltax*(i+0.5))
}
ch <- result
}
Output:
nCPU = 2
110.0063ms
0.6666666666686073
1- There is an optimum point for number of Goroutines, And from this point forward increasing number of Goroutines doesn't reduce program execution time:
On 2 Cores CPU, with the following code, The result is:
nCPU: 1, 2, 4, 8, 16
Time: 2.1601236s, 1.1220642s, 1.1060633s, 1.1140637s, 1.1380651s
As you see from nCPU=1 to nCPU=2 time decrease is big enough but after this point it is not much, so nCPU=2 on 2 Cores CPU is Optimum point for this Sample code, so using nCPU := runtime.NumCPU() is enough here.
package main
import (
"fmt"
"math"
"time"
)
func main() {
nCPU := 2 //2.1601236s#1 1.1220642s#2 1.1060633s#4 1.1140637s#8 1.1380651s#16
fmt.Println("nCPU =", nCPU)
ch := make(chan float64, nCPU)
startTime := time.Now()
a := 0.0
b := 1.0
n := 100000000.0
deltax := (b - a) / n
stepPerCPU := n / float64(nCPU)
for start := 0.0; start < n; {
stop := start + stepPerCPU
go f(start, stop, a, deltax, ch)
start = stop
}
integral := 0.0
for i := 0; i < nCPU; i++ {
integral += <-ch
}
fmt.Println(time.Now().Sub(startTime))
fmt.Println(deltax * integral)
}
func f(start, stop, a, deltax float64, ch chan float64) {
result := 0.0
for i := start; i < stop; i++ {
result += math.Sqrt(a + deltax*(i+0.5))
}
ch <- result
}
Unless the time taken by the activity in the goroutine takes a lot more time than needed to switch contexts, carry out the task and use a mutex to update a value, it would be faster to do it serially.
Take a look at a slightly modified version. All I've done is add a delay of 1 microsecond in the f() function.
package main
import (
"fmt"
"math"
"sync"
"time"
)
type Result struct {
result float64
lock sync.RWMutex
}
var wg sync.WaitGroup
var result Result
func main() {
fmt.Println("concurrent")
concurrent()
result.result = 0
fmt.Println("serial")
serial()
}
func concurrent() {
now := time.Now()
a := 0.0
b := 1.0
n := 100000.0
deltax := (b - a) / n
wg.Add(int(n))
for i := 0.0; i < n; i++ {
go f(a, deltax, i, true)
}
wg.Wait()
fmt.Println(deltax * result.result)
fmt.Println(time.Now().Sub(now))
}
func serial() {
now := time.Now()
a := 0.0
b := 1.0
n := 100000.0
deltax := (b - a) / n
for i := 0.0; i < n; i++ {
f(a, deltax, i, false)
}
fmt.Println(deltax * result.result)
fmt.Println(time.Now().Sub(now))
}
func f(a, deltax, i float64, concurrent bool) {
time.Sleep(1 * time.Microsecond)
fx := math.Sqrt(a + deltax*(i+0.5))
if concurrent {
result.lock.Lock()
result.result += fx
result.lock.Unlock()
wg.Done()
} else {
result.result += fx
}
}
With the delay, the result was as follows (the concurrent version is much faster):
concurrent
0.6666666685900424
624.914165ms
serial
0.6666666685900422
5.609195767s
Without the delay:
concurrent
0.6666666685900428
50.771275ms
serial
0.6666666685900422
749.166µs
As you can see, the longer it takes to complete a task, the more sense it makes to do it concurrently, if possible.
I trying to generate a random number beetwen a min value and a max value,
but seems I'm lost with this, what is wrong?
package main
import (
"crypto/rand"
"encoding/binary"
"fmt"
)
func genRandNum(min, max int8) int {
var num int8
binary.Read(rand.Reader, binary.LittleEndian, &num)
return int(num*(max-min)+min)
}
func main() {
// trying to get a random number beetwen -10 and 10
fmt.Println(genRandNum(-10,10))
}
How about this
func main() {
fmt.Println(genRandNum(-10, 10))
}
func genRandNum(min, max int64) int64 {
// calculate the max we will be using
bg := big.NewInt(max - min)
// get big.Int between 0 and bg
// in this case 0 to 20
n, err := rand.Int(rand.Reader, bg)
if err != nil {
panic(err)
}
// add n to min to support the passed in range
return n.Int64() + min
}
Go play