parallelism in Golang loop - go

I have a project and need to run it on multiple cores of an cpu to get more speed . I have used omplib in fortran but I am not familiar with Golang parallelism . I tried goroutines but that went wrong and made a mess and I got false results. This is my code :
package main
import (
"bufio"
"fmt"
"log"
"math"
"math/rand"
"os"
"time"
)
const (
n_particles int = 2048
n_steps int = 1000000
dt float64 = 1.0
v0 float64 = 0.50
radius float64 = 1.0
f_intensity float64 = 1.8
scale float64 = 32.0
alpha float64 = 1.0 / 36.0
)
var (
x [n_particles + 1]float64
y [n_particles + 1]float64
angles [n_particles + 1]float64
vx [n_particles + 1]float64
vy [n_particles + 1]float64
order [n_steps + 1]float64
)
func main() {
/////randomizer
vstart := time.Now()
rsource := rand.NewSource(time.Now().UnixNano())
randomizer := rand.New(rsource)
for i := 0; i <= n_particles; i++ {
x[i] = (randomizer.Float64()) * scale
y[i] = (randomizer.Float64()) * scale
angles[i] = (randomizer.Float64()) * math.Pi * 2
sin, cos := math.Sincos(angles[i])
vx[i] = v0 * cos
vy[i] = v0 * sin
}
//////main loop
for i := 0; i <= n_steps; i++ {
start := time.Now()
for j := 0; j <= n_particles; j++ {
x[j] = x[j] + (vx[j] * dt)
//x[j] = math.Mod(x[j], scale)
if x[j] < 0.0 {
x[j] = x[j] + scale
}
if x[j] >= scale {
x[j] = x[j] - scale
}
y[j] = y[j] + (vy[j] * dt)
//y[j] = math.Mod(x[j], scale)
if y[j] < 0.0 {
y[j] = y[j] + scale
}
if y[j] >= scale {
y[j] = y[j] - scale
}
}
type intpos struct {
x, y int64
}
adjacencyIndex := make(map[intpos][]int)
////getting each boxes particles
for j := 0; j <= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j])) // getting particle box
adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j) // adding particles to boxes
}
/////////
m_angles := angles
Now I want following loop run in parallel :
////particle loop - I WANT FOLLOWING LOOP PARALLEL
for j := 0; j <= n_particles; j++ {
sumanglesx := 0.0
sumanglesy := 0.0
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
// fxi = math.Floor(x[j])
// fyi = math.Floor(y[j])
for dx := -1; dx <= 1; dx++ {
for dy := -1; dy <= 1; dy++ {
adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
for _, k := range adjacentParticles {
dist := ((x[k] - x[j]) * (x[k] - x[j])) + ((y[k] - y[j]) * (y[k] - y[j]))
if dist < radius {
sy, sx := math.Sincos(angles[k])
if k <= j {
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
} else {
sx = alpha * sx
sy = alpha * sy
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
}
}
}
}
}
bsource := rand.NewSource(time.Now().UnixNano())
bandomizer := rand.New(bsource)
sumanglesy = sumanglesy
sumanglesx = sumanglesx
r_angles := math.Atan2(sumanglesy, sumanglesx)
}
}
}
I specified one loop which should run parallelly .

Here are two approaches to try out: https://play.golang.org/p/O1uB2zzJEC5
package main
import (
"fmt"
"sync"
)
func main() {
waitGroupApproach()
channelApproach()
}
func waitGroupApproach() {
fmt.Println("waitGroupApproach")
var waitgroup sync.WaitGroup
result_table := make([]int, 6, 6)
for j := 0; j <= 5; j++ {
waitgroup.Add(1)
go func(index int) {
fmt.Println(index) // try putting here `j` instea of `index`
result_table[index] = index*2
waitgroup.Done()
}(j) // you have to put any for-loop variables into closure
// because otherwsie all routines inside will likely get the last j == n_particles + 1
// as they will likely run after the loop has finished
}
fmt.Println("waiting")
waitgroup.Wait()
// process results further
fmt.Println("finished")
fmt.Println(result_table)
}
func channelApproach() {
fmt.Println("\nchannelApproach")
type intpos struct {
x, y, index int
}
results := make(chan intpos)
// initialize routines
for j := 0; j <= 5; j++ {
go func(index int) {
// do processing
results <- intpos{index*2, index*3, index}
}(j)
}
fmt.Println("Waiting..")
// collect results, iterate the same number of times
result_table := make([]int, 6)
for j := 0; j <= 5; j++ {
r := <- results
// watch out order, migth not be the same as in invocation,
// so that's why I store j in results as well
fmt.Println(r.index, r.x, r.y)
result_table[r.index] = r.x
}
fmt.Println("Finished..")
fmt.Println(result_table)
}
I prefer the channel approach because it's more go idiomatic to me and it allows to easier handle panic, error conditions, etc.

Related

Gaussian Blur implementation generates weird output

I'm trying to implement a Gaussian Blur on golang image.Image objects. For the following image:
The output image generated is:
As one can see, the output image contains some unprocessed borders that corresponds to the current implementation decision to not process the edges, which leads me to think that I might have messed up on calculations somehow (what I mean is, this part of the implementation works, so I can discard off-by-one errors while iterating through image pixels). I've reviewed this code many times, but I can't find my mistake. I would really appreciate some help and considerations on the implementation, that could help me solve the problem. The code is contained below. If any edits or clarifications are necessary, please let me know!
package main
import (
"image"
"image/color"
"image/draw"
"image/jpeg"
"math"
"os"
)
func main() {
f, err := os.Open("dog.jpeg")
if err != nil {
panic(err)
}
img, err := jpeg.Decode(f)
if err != nil {
panic(err)
}
newImg := gaussianBlur(img, 3)
out, err := os.Create("dog-blurred.jpeg")
if err != nil {
panic(err)
}
err = jpeg.Encode(out, newImg, nil)
if err != nil {
panic(err)
}
}
func applyGaussianFunction(x, y, stdDev float64) float64 {
// eFactor := 1 / (2 * math.Pi * stdDev*stdDev);
ePowNominator := -(x*x + y*y);
ePowDenominator := 2 * stdDev*stdDev;
return math.Pow(math.E, (ePowNominator/ePowDenominator));
}
func generateKernel(radius int) [][]float64 {
size := 1 + (radius * 2);
kernel := make([][]float64, size);
stdDev := math.Max(float64(radius / 2), 1);
sum := float64(0);
for i := 0; i < size; i++ {
kernel[i] = make([]float64, size);
}
for i := -radius; i < radius + 1; i++ {
for j := -radius; j < radius + 1; j++ {
val := applyGaussianFunction(float64(j), float64(i), stdDev);
kernel[i + radius][j + radius] = val;
sum += val;
}
}
for i := 0; i < size; i++ {
for j := 0; j < size; j++ {
kernel[i][j] /= sum;
}
}
return kernel;
}
func makeImageRGBA(src image.Image) *image.RGBA {
b := src.Bounds().Size();
rgba := image.NewRGBA(image.Rect(0, 0, b.X, b.Y));
draw.Draw(rgba, rgba.Bounds(), src, image.Pt(0, 0), draw.Src);
return rgba;
}
func gaussianBlur(img image.Image, radius int) image.Image {
size := img.Bounds().Size();
rgbaImg := image.NewRGBA(image.Rect(0, 0, size.X, size.Y));
kernel := generateKernel(radius);
for y := radius; y < size.Y - radius; y++ {
for x := radius; x < size.X - radius; x++ {
var nr, ng, nb, na float64 = 0, 0, 0, 0;
for i := -radius; i < radius + 1; i++ {
for j := -radius; j < radius + 1; j++ {
// NEW: Get pixels from original Image
pr, pg, pb, pa := img.At(x - j, y - i).RGBA();
nr += float64(pr) * kernel[i + radius][j + radius];
ng += float64(pg) * kernel[i + radius][j + radius];
nb += float64(pb) * kernel[i + radius][j + radius];
na += float64(pa) * kernel[i + radius][j + radius];
}
}
// Handle overflow by using 64-bit alphapremultiplied values
rgbaImg.Set(x, y, color.RGBA64{uint16(nr), uint16(ng), uint16(nb), uint16(na)});
}
}
return rgbaImg;
}
EDITS
I modified the code so that pixels are read from the original image, not from rgbaImg
I've also commented eFactor from the applyGaussianFunction function, since I'm already normalizing the kernel with the sum variable
Modified .Set method to use 64-bit RGBA struct
This is the newly generated image
Those black borders are easy to solve, I'm already working them out. This is not a part of the problem anymore.
You're reading from the same image that you're writing to. You shall read from the original image instead:
pr, pg, pb, pa := img.At(x+j, y+i).RGBA()
EDIT:
Additionally, Image.At returns color.RGBA, and func (color.RGBA) RGBA returns colors in the 0 to 0xFFFF range. However color.RGBA constructor expects them to be in 0 to 255 range. You may want to use color.RGBA64 when writing the result:
rgbaImg.Set(x, y, color.RGBA64{uint16(nr), uint16(ng), uint16(nb), uint16(na)});

How to completly making Newton-Raphson Method in golang?

package main
import (
"pars"
"fmt"
)
func Newton(x_first, ddd float64, L []pars.Querry) float64 {
var x_1, x_2, result float64
x_2 = x_first + ddd
f_2 = pars.Function(x_2, L)
f_1 = (f_2 - pars.Function(gox_first)) / ddd
x_2 = x_first - f_2/f_1
return (x_2)
}
func main() {
var x_1, epsilon, Delta, result, check float64
var Max_iteration int
var i, j, k int
epsilon = 0.001
Delta = 0.001
Max_iteration = 100
Equation := pars.ReadFuction()
LIST := pars.Make_list(Equation)
LIST := pars.Insert(LIST)
x_1 = 0.0
for i := 0; i < Max_iteration; i++ {
result = New(funcs, x_1, Delta)
check = result - x_1
if check < 0.0 {
check = -check
}
if check < epsilon {
for k = 0; k <= j; k++ {
if x[k-1] < (result+epsilon) && x[k-1] > (result-epsilon) {
printf(" --> No more ROOT!!\n")
exit(0)
}
prinft("The %3d th ROOT is %10.3f\n", j+1, result)
x[j] = result
j++
x_1 = result + pow(-1, j)*10.0
}
} else
{x_1 = result}
}
}
}

Integer Bilinear interpolation optimization

My code was very much bottlenecked by bilinear interpolation so I wrote a version (ScaleBlerpI) that does not use floating point math. This is already 1.5 1.85 times faster but I am wondering how I could make it even faster.
Any hints are appreciated.
func ScaleBlerpI(src, dst *ValueFieldI) {
mx := uint64((src.Width - 1) * math.MaxUint32 / dst.Width)
my := uint64((src.Height - 1) * math.MaxUint32 / dst.Height)
for y := uint64(0); y < uint64(dst.Height); y++ {
for x := uint64(0); x < uint64(dst.Width); x++ {
gx := (x * mx) >> 32 // eq. / math.MaxUint32
tx := (x * mx) & math.MaxUint32 // eq. % (math.MaxUint32 + 1) or % 2^32
gy := (y * my) >> 32
ty := (y * my) & math.MaxUint32
srcX, srcY := int(gx), int(gy)
rgba00 := src.GetComponent(srcX, srcY)
rgba10 := src.GetComponent(srcX+1, srcY)
rgba01 := src.GetComponent(srcX, srcY+1)
rgba11 := src.GetComponent(srcX+1, srcY+1)
result := []uint32{
blerpI(rgba00[0], rgba10[0], rgba01[0], rgba11[0], tx, ty),
blerpI(rgba00[1], rgba10[1], rgba01[1], rgba11[1], tx, ty),
blerpI(rgba00[2], rgba10[2], rgba01[2], rgba11[2], tx, ty),
}
dst.SetComponent(int(x), int(y), result)
}
}
}
func lerpI(s, e uint32, f uint64) uint32 {
// basically s * (1 - f) + b * f
return uint32(
(uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
math.MaxUint32)
}
func blerpI(c00, c10, c01, c11 uint32, tx, ty uint64) uint32 {
return lerpI(
lerpI(c00, c10, tx),
lerpI(c01, c11, tx),
ty,
)
}
type ValueFieldI struct {
Width, Height int
ComponentSize int
Values []uint32
}
func (vf *ValueFieldI) GetComponent(x, y int) []uint32 {
componentIdx := x + y*vf.Width
return vf.Values[componentIdx*vf.ComponentSize : componentIdx*vf.ComponentSize+vf.ComponentSize]
}
func (vf *ValueFieldI) SetComponent(x, y int, c []uint32) {
copy(vf.GetComponent(x, y), c)
}
Profiling has shown me that the most time is lost on blerpI, src.GetComponent and dst.SetComponent
Edit 1
Replaced
// basically s * (1 - f) + e * f
return uint32(
(uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
math.MaxUint32)
With
// basically s + f*(e-s)
return s + uint32((f*(uint64(e)-uint64(s)))>>32)
Integer version is now 1.85 times faster.
Edit 2
Benchmark:
func BenchmarkBlerpIRand(b *testing.B) {
src := &ValueFieldI{
Width: 37,
Height: 37,
ComponentSize: 3,
Values: make([]uint32, 37*37*3),
}
for i := range src.Values {
src.Values[i] = rand.Uint32()
}
dst := &ValueFieldI{
Width: 37 * 8,
Height: 37 * 8,
ComponentSize: 3,
Values: make([]uint32, 37*8*37*8*3),
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
ScaleBlerpI(src, dst)
}
}

Writing Pascal's Triangle using big.Int int

I have some code for Pascal's Triangle using big.Int. How do I add the values? I get an error:
invalid operation:
PascalTriangle[r - 1][c - 1] + PascalTriangle[r - 1][c]
(operator + not defined on struct)
I am using a big.Int array so I cannot use Add from the big package.
func generatePascalTriangle(n int) [][]big.Int {
PascalTriangle := make([][]big.Int, n)
for i := range PascalTriangle {
PascalTriangle[i] = make([]big.Int, n)
}
var one big.Int
one.SetInt64(1)
for r := 0; r < n; r++ {
PascalTriangle[r][0] = one
PascalTriangle[r][r] = one
}
for r := 2; r < n; r++ {
for c := 1; c < r; c++ {
PascalTriangle[r][c] = PascalTriangle[r-1][c-1] + PascalTriangle[r-1][c]
}
}
return PascalTriangle
}
I am using big.Int array so cannot use "Add" from "big" package.
That claim is false. You can, and you should.
For example,
package main
import (
"fmt"
"math/big"
)
func generatePascalTriangle(n int) [][]big.Int {
PascalTriangle := make([][]big.Int, n)
for i := range PascalTriangle {
PascalTriangle[i] = make([]big.Int, n)
}
var one big.Int
one.SetInt64(1)
for r := 0; r < n; r++ {
PascalTriangle[r][0] = one
PascalTriangle[r][r] = one
}
for r := 2; r < n; r++ {
for c := 1; c < r; c++ {
// PascalTriangle[r][c] = PascalTriangle[r-1][c-1] + PascalTriangle[r-1][c]
PascalTriangle[r][c] = *PascalTriangle[r][c].Add(&PascalTriangle[r-1][c-1], &PascalTriangle[r-1][c])
}
}
return PascalTriangle
}
func main() {
t := generatePascalTriangle(7)
for i, r := range t {
for _, n := range r[:i+1] {
fmt.Print(n.String() + " ")
}
fmt.Println()
}
}
Playground: https://play.golang.org/p/KUGsjr8Mon5
Output:
1
1 1
1 2 1
1 3 3 1
1 4 6 4 1
1 5 10 10 5 1
1 6 15 20 15 6 1

How to solve Spiral Matrix in go

https://leetcode.com/problems/spiral-matrix/
golang implement.
the result as follow:
Run Code Status: Runtime Error
Run Code Result: ×
Your input
[]
Your answer
Expected answer
[]
Show Diff
why [] is the test case ,it's just a one-dimensional slice ?
my code is :
func sprial(begin_r, begin_c, row, col int, matrix [][]int) []int {
s := make([]int, col*row, col*row+10)
k := 0
if row == 1 && col == 1 {
s[k] = matrix[begin_r][begin_c]
return s
} else if row == 1 {
return matrix[begin_r][begin_c : col-1]
} else if col == 1 {
return matrix[begin_r : row-1][begin_c]
} else {
for i := begin_c; i < col; i++ {
s[k] = matrix[begin_r][i]
k++
}
for i := begin_r + 1; i < row; i++ {
s[k] = matrix[i][col-1]
k++
}
for i := col - 2; i >= begin_c; i-- {
s[k] = matrix[row-1][i]
k++
}
for i := row - 2; i >= begin_r+1; i-- {
s[k] = matrix[i][begin_c]
k++
}
return s[:k-1]
}
}
func spiralOrder(matrix [][]int) []int {
m := len(matrix)
n := len(matrix[0])
i := 0
j := 0
// var rS []int
k := 0
//s1 := make([]int, m*n, m*n)
var s1 = []int{}
for {
if m <= 0 || n <= 0 {
break
}
s := sprial(i, j, m, n, matrix)
if k == 0 {
s1 = s
} else {
s1 = append(s1, s...)
}
i++
j++
m -= 2
n -= 2
k++
}
return s1
}
func spiralOrder(matrix [][]int) []int {
if len(matrix) == 0 || len(matrix[0]) == 0 {
return nil
}
m, n := len(matrix), len(matrix[0])
next := nextFunc(m, n)
res := make([]int, m*n)
for i := range res {
x, y := next()
res[i] = matrix[x][y]
}
return res
}
func nextFunc(m, n int) func() (int, int) {
top, down := 0, m-1
left, right := 0, n-1
x, y := 0, -1
dx, dy := 0, 1
return func() (int, int) {
x += dx
y += dy
switch {
case y+dy > right:
top++
dx, dy = 1, 0
case x+dx > down:
right--
dx, dy = 0, -1
case y+dy < left:
down--
dx, dy = -1, 0
case x+dx < top:
left++
dx, dy = 0, 1
}
return x, y
}
}
Source: https://github.com/aQuaYi/LeetCode-in-Go/blob/master/Algorithms/0054.spiral-matrix/spiral-matrix.go
This repository has most of the solutions to LeetCode problems in a very optimal manner. Please do take a look. Hope it helps.

Resources