Related
I'm trying to analyze sparse matrices. Faced with the task of sorting rows in ascending order of the elements in them in the original matrix.
But I don't understand how to do this without damaging the empty elements.
I tried to bind the elements of the sum array to the rows and somehow move them. But some elements have been removed from the CSC structure.
It may be necessary to change the li/lj arrays themselves, but I don't have enough mathematical knowledge for this. More precisely, I don't understand how to track when elements should be rearranged unless additional elements (zeros) are explicitly specified in the structure.
package main
import (
"fmt"
)
type CSC struct {
a, lj, li []int
}
func getEl(i, j int, el *CSC) int {
for k := el.lj[j]; k < el.lj[j+1]; k++ {
if el.li[k] == i {
return el.a[k]
}
}
return 0
}
func maxSliceEl(lj []int) int {
max := 0
for _, v := range lj {
if v > max {
max = v
}
}
return max
}
func main() {
ma := CSC{
a: []int{8, 2, 5, 7, 1, 9, 2},
li: []int{0, 0, 1, 4, 4, 6, 4},
lj: []int{0, 1, 1, 4, 6, 7},
}
n := len(ma.lj) + 1
m := maxSliceEl(ma.li) - 1
fmt.Printf("Col: %v, Row: %v\n", n, m)
maxStr := []int{}
fmt.Println("Initial matrix:")
for i := 0; i < n; i++ {
sumStrEl := 0
for j := 0; j < m; j++ {
fmt.Print(getEl(i, j, &ma), " ")
sumStrEl += getEl(i, j, &ma)
}
maxStr = append(maxStr, sumStrEl)
fmt.Println("|sumStrEl: ", sumStrEl)
}
}
I found a solution to the problem by taking the structure as a solution: the sum of the elements + their index. The solution turned out to be simpler than expected, only the practice of solving sparse matrices was lacking. The position [i] of the sum must be passed to the getEl function as the first parameter.
package main
import (
"fmt"
"sort"
)
// Creating a CSC (CCS) matrix structure
type CSC struct {
// Array of values, column indexes, row indexing
a, lj, li []int
}
// Getting access to the element
func getEl(i, j int, el *CSC) int {
for k := el.lj[j]; k < el.lj[j+1]; k++ {
// If the element string is equal to the string of the searched element, then the element is found
if el.li[k] == i {
return el.a[k]
}
}
// Otherwise, we will return 0. It will be entered into the matrix
return 0
}
func maxSliceEl(lj []int) int {
max := 0
for _, v := range lj {
if v > max {
max = v
}
}
return max
}
type strInfo struct {
summa int
pos int
}
func main() {
// Set the CSC matrix
ma := CSC{
a: []int{8, 2, 5, 7, 1, 9, 2},
li: []int{0, 0, 1, 4, 4, 6, 4},
lj: []int{0, 1, 1, 4, 6, 7},
}
// Define the number of columns
n := len(ma.lj) + 1
// Define the number of rows
m := maxSliceEl(ma.li) - 1
fmt.Printf("Cols: %v, Rows: %v\n", m, n)
// Set a variable with a structure type for calculating
// the amount in a row and indexing each element in it
var stringsInfo []strInfo
fmt.Println("Initial matrix:")
for i := 0; i < n; i++ {
sumStrEl := 0
for j := 0; j < m; j++ {
sumStrEl += getEl(i, j, &ma)
fmt.Print(getEl(i, j, &ma), " ")
}
fmt.Println("|", sumStrEl)
// Adding a cell with the sum and index to the slice
var strI strInfo
strI.summa = sumStrEl
strI.pos = i
stringsInfo = append(stringsInfo, strI)
}
fmt.Println("stringsInfo: ", stringsInfo)
// Sorting the stringsInfo slice in ascending order of the sum elements
sort.Slice(stringsInfo, func(i, j int) (less bool) {
return stringsInfo[i].summa < stringsInfo[j].summa
})
fmt.Println("stringsInfo: ", stringsInfo)
fmt.Println("Sorted matrix:")
for i := range stringsInfo {
for j := 0; j < m; j++ {
// Output the matrix by idnex stringsInfo[i].pos
fmt.Print(getEl(stringsInfo[i].pos, j, &ma), " ")
}
fmt.Println("|", stringsInfo[i].summa)
}
}
enter image description here
This code snippet is for give two slice of binary number a1 and a2 to return sum slice r1, and I want to figure out how long spend with this code snippet figure out the result.
and I figure out the factorial result.
Is my analysis right?
my analysis for time complexity is:
cn + (n*n!) + c
the Code is:
func BinaryPlus(a1 []int, a2 []int) []int {
var r1 = make([]int, len(a1), 2*(len(a1)))
for i := 0; i < len(a1); i++ {
r1[i] = a1[i] + a2[i]
}
// 二分反转
ReverseSlice(r1)
r1 = append(r1, 0)
final := 0
for i := 0; final != 1; i++ {
isOver := 1
for j := 0; j < len(r1); j++ {
if r1[j] > 1 {
r1[j] = r1[j] % 2
r1[j+1] += 1
if r1[j+1] > 1 {
isOver = 0
}
}
}
if isOver == 1 {
final = 1
}
}
// 二分反转
ReverseSlice(r1)
return r1
}
func ReverseSlice(s interface{}) {
n := reflect.ValueOf(s).Len()
swap := reflect.Swapper(s)
for i, j := 0, n-1; i < j; i, j = i+1, j-1 {
swap(i, j)
}
}
It is not entirely clear that your code, as written, is correct. The size cap for your result array could be too small. Consider the case that len(a2) > 2*len(a1): the r1 := make(...) will not reserve enough in this case. Further, the initial for loop will miss adding in the more significant bits of a2.
Binary addition should have no more than O(n) complexity. You can do it with a single for loop. n = 1+max(len(a1),len(a2)):
package main
import (
"fmt"
"reflect"
)
func BinaryPlus(a1 []int, a2 []int) []int {
reserve := len(a1) + 1
if x := len(a2) + 1; x > reserve {
reserve = x
}
hold := 0
maxBit := 1
ans := make([]int, reserve)
for i := 1; i <= reserve; i++ {
hold = hold / 2
if i <= len(a1) {
hold += a1[len(a1)-i]
}
if i <= len(a2) {
hold += a2[len(a2)-i]
}
ans[reserve-i] = hold & 1
if hold != 0 && i > maxBit {
maxBit = i
}
}
return ans[reserve-maxBit:]
}
func main() {
tests := []struct {
a, b, want []int
}{
{
a: []int{1},
b: []int{0},
want: []int{1},
},
{
a: []int{1, 0},
b: []int{0, 0, 1},
want: []int{1, 1},
},
{
a: []int{1, 0, 0, 1},
b: []int{1, 1, 1, 1, 0},
want: []int{1, 0, 0, 1, 1, 1},
},
{
a: []int{0, 0},
b: []int{0, 0, 0, 0, 0},
want: []int{0},
},
}
bad := false
for i := 0; i < len(tests); i++ {
t := tests[i]
c := BinaryPlus(t.a, t.b)
if !reflect.DeepEqual(c, t.want) {
fmt.Println(t.a, "+", t.b, "=", c, "; wanted:", t.want)
bad = true
}
}
if bad {
fmt.Println("FAILED")
} else {
fmt.Println("PASSED")
}
}
I'm trying to get a code to work that pretty much involves channels in goroutine (in a yield-like behavior in C#)
The code involves getting an iterable matrix from a slice like so:
elements := []float64{1, 2, 3, 4}
expected := [][]float64{
{1},
{2},
{3},
{4},
{1, 2},
{1, 3},
{2, 3},
{1, 4},
{2, 4},
{3, 4},
{1, 2, 3},
{1, 2, 4},
{1, 3, 4},
{2, 3, 4},
{1, 2, 3, 4},
}
I've tried applying Knuth's by doing:
func Combinadic(values []float64) <-chan []float64 {
ch := make(chan []float64)
go func() {
for i := 0; i < len(values); i++ {
for value := range CombinadicK(values, i+1) {
ch <- value
}
}
close(ch)
}()
return ch
}
func CombinadicK(values []float64, k int) <-chan []float64 {
chnl := make(chan []float64)
go func() {
n := len(values)
t := k
c := make([]int, t+3)
current := make([]float64, t)
x := 0
j := 0
for j = 1; j <= t; j++ {
c[j] = j - 1
}
c[t+1] = n
c[t+2] = 0
j = t
for {
for i := 0; i < len(current); i++ {
current[i] = values[c[i+1]]
}
chnl <- current
if j > 0 {
x = j
} else {
if c[1]+1 < c[2] {
c[1]++
continue
} else {
j = 2
}
}
for {
c[j-1] = j - 2
x = c[j] + 1
if x == c[j+1] {
j++
} else {
break
}
}
c[j] = x
j--
if j >= t {
break
}
}
close(chnl)
}()
return chnl
}
It appears to give out random number per row, but the structure of the expected (the count of items per row) appears to be ok.
The code in Go Playground
You have a data race. Your results are undefined.
$ go run -race racer.go
==================
WARNING: DATA RACE
Read at 0x00c00009c010 by main goroutine:
reflect.typedmemmove()
/home/peter/go/src/runtime/mbarrier.go:177 +0x0
reflect.packEface()
/home/peter/go/src/reflect/value.go:119 +0x103
reflect.valueInterface()
/home/peter/go/src/reflect/value.go:1027 +0x16f
fmt.(*pp).printValue()
/home/peter/go/src/reflect/value.go:997 +0x38f7
fmt.(*pp).printValue()
/home/peter/go/src/fmt/print.go:868 +0xec7
fmt.(*pp).printArg()
/home/peter/go/src/fmt/print.go:715 +0x2ee
fmt.(*pp).doPrintln()
/home/peter/go/src/fmt/print.go:1172 +0xad
fmt.Fprintln()
/home/peter/go/src/fmt/print.go:263 +0x65
main.main()
/home/peter/go/src/fmt/print.go:273 +0x14b
Previous write at 0x00c00009c010 by goroutine 8:
main.CombinadicK.func1()
/home/peter/racer.go:48 +0x1e6
Goroutine 8 (running) created at:
main.CombinadicK()
/home/peter/racer.go:26 +0x96
main.Combinadic.func1()
/home/peter/racer.go:12 +0xda
==================
[3]
[3]
[4]
[4]
[2 3]
[2 4]
[1 4]
[3 4]
[3 4]
[3 4]
[1 3 4]
[1 3 4]
[1 3 4]
[2 3 4]
[1 2 3 4]
Found 1 data race(s)
exit status 66
$
racer.go:
package main
import (
"fmt"
)
func Combinadic(values []float64) <-chan []float64 {
ch := make(chan []float64)
go func() {
for i := 0; i < len(values); i++ {
for value := range CombinadicK(values, i+1) {
ch <- value
}
}
close(ch)
}()
return ch
}
func CombinadicK(values []float64, k int) <-chan []float64 {
chnl := make(chan []float64)
go func() {
n := len(values)
t := k
c := make([]int, t+3)
current := make([]float64, t)
x := 0
j := 0
for j = 1; j <= t; j++ {
c[j] = j - 1
}
c[t+1] = n
c[t+2] = 0
j = t
for {
for i := 0; i < len(current); i++ {
current[i] = values[c[i+1]]
}
chnl <- current
if j > 0 {
x = j
} else {
if c[1]+1 < c[2] {
c[1]++
continue
} else {
j = 2
}
}
for {
c[j-1] = j - 2
x = c[j] + 1
if x == c[j+1] {
j++
} else {
break
}
}
c[j] = x
j--
if j >= t {
break
}
}
close(chnl)
}()
return chnl
}
func main() {
elements := []float64{1, 2, 3, 4}
for v := range Combinadic(elements) {
fmt.Println(v)
}
}
Playground: https://play.golang.org/p/hhQgVdqe6l1
Go: Data Race Detector
I am optimizing matrix multiplication via goroutines in Go.
My benchmark shows, introducing concurrency per row or per element largely drops performance:
goos: darwin
goarch: amd64
BenchmarkMatrixDotNaive/A.MultNaive-8 2000000 869 ns/op 0 B/op 0 allocs/op
BenchmarkMatrixDotNaive/A.ParalMultNaivePerRow-8 100000 14467 ns/op 80 B/op 9 allocs/op
BenchmarkMatrixDotNaive/A.ParalMultNaivePerElem-8 20000 77299 ns/op 528 B/op 65 allocs/op
I know some basic prior knowledge of cache locality, it make sense that per element concurrency drops performance. However, why per row still drops the performance even in naive version?
In fact, I also wrote a block/tiling optimization, its vanilla version (without goroutine concurrency) even worse than naive version (not present here, let's focus on naive first).
What did I do wrong here? Why? How to optimize here?
Multiplication:
package naive
import (
"errors"
"sync"
)
// Errors
var (
ErrNumElements = errors.New("Error number of elements")
ErrMatrixSize = errors.New("Error size of matrix")
)
// Matrix is a 2d array
type Matrix struct {
N int
data [][]float64
}
// New a size by size matrix
func New(size int) func(...float64) (*Matrix, error) {
wg := sync.WaitGroup{}
d := make([][]float64, size)
for i := range d {
wg.Add(1)
go func(i int) {
defer wg.Done()
d[i] = make([]float64, size)
}(i)
}
wg.Wait()
m := &Matrix{N: size, data: d}
return func(es ...float64) (*Matrix, error) {
if len(es) != size*size {
return nil, ErrNumElements
}
for i := range es {
wg.Add(1)
go func(i int) {
defer wg.Done()
m.data[i/size][i%size] = es[i]
}(i)
}
wg.Wait()
return m, nil
}
}
// At access element (i, j)
func (A *Matrix) At(i, j int) float64 {
return A.data[i][j]
}
// Set set element (i, j) with val
func (A *Matrix) Set(i, j int, val float64) {
A.data[i][j] = val
}
// MultNaive matrix multiplication O(n^3)
func (A *Matrix) MultNaive(B, C *Matrix) (err error) {
var (
i, j, k int
sum float64
N = A.N
)
if N != B.N || N != C.N {
return ErrMatrixSize
}
for i = 0; i < N; i++ {
for j = 0; j < N; j++ {
sum = 0.0
for k = 0; k < N; k++ {
sum += A.At(i, k) * B.At(k, j)
}
C.Set(i, j, sum)
}
}
return
}
// ParalMultNaivePerRow matrix multiplication O(n^3) in concurrency per row
func (A *Matrix) ParalMultNaivePerRow(B, C *Matrix) (err error) {
var N = A.N
if N != B.N || N != C.N {
return ErrMatrixSize
}
wg := sync.WaitGroup{}
for i := 0; i < N; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
for j := 0; j < N; j++ {
sum := 0.0
for k := 0; k < N; k++ {
sum += A.At(i, k) * B.At(k, j)
}
C.Set(i, j, sum)
}
}(i)
}
wg.Wait()
return
}
// ParalMultNaivePerElem matrix multiplication O(n^3) in concurrency per element
func (A *Matrix) ParalMultNaivePerElem(B, C *Matrix) (err error) {
var N = A.N
if N != B.N || N != C.N {
return ErrMatrixSize
}
wg := sync.WaitGroup{}
for i := 0; i < N; i++ {
for j := 0; j < N; j++ {
wg.Add(1)
go func(i, j int) {
defer wg.Done()
sum := 0.0
for k := 0; k < N; k++ {
sum += A.At(i, k) * B.At(k, j)
}
C.Set(i, j, sum)
}(i, j)
}
}
wg.Wait()
return
}
Benchmark:
package naive
import (
"os"
"runtime/trace"
"testing"
)
type Dot func(B, C *Matrix) error
var (
A = &Matrix{
N: 8,
data: [][]float64{
[]float64{1, 2, 3, 4, 5, 6, 7, 8},
[]float64{9, 1, 2, 3, 4, 5, 6, 7},
[]float64{8, 9, 1, 2, 3, 4, 5, 6},
[]float64{7, 8, 9, 1, 2, 3, 4, 5},
[]float64{6, 7, 8, 9, 1, 2, 3, 4},
[]float64{5, 6, 7, 8, 9, 1, 2, 3},
[]float64{4, 5, 6, 7, 8, 9, 1, 2},
[]float64{3, 4, 5, 6, 7, 8, 9, 0},
},
}
B = &Matrix{
N: 8,
data: [][]float64{
[]float64{9, 8, 7, 6, 5, 4, 3, 2},
[]float64{1, 9, 8, 7, 6, 5, 4, 3},
[]float64{2, 1, 9, 8, 7, 6, 5, 4},
[]float64{3, 2, 1, 9, 8, 7, 6, 5},
[]float64{4, 3, 2, 1, 9, 8, 7, 6},
[]float64{5, 4, 3, 2, 1, 9, 8, 7},
[]float64{6, 5, 4, 3, 2, 1, 9, 8},
[]float64{7, 6, 5, 4, 3, 2, 1, 0},
},
}
C = &Matrix{
N: 8,
data: [][]float64{
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
[]float64{0, 0, 0, 0, 0, 0, 0, 0},
},
}
)
func BenchmarkMatrixDotNaive(b *testing.B) {
f, _ := os.Create("bench.trace")
defer f.Close()
trace.Start(f)
defer trace.Stop()
tests := []struct {
name string
f Dot
}{
{
name: "A.MultNaive",
f: A.MultNaive,
},
{
name: "A.ParalMultNaivePerRow",
f: A.ParalMultNaivePerRow,
},
{
name: "A.ParalMultNaivePerElem",
f: A.ParalMultNaivePerElem,
},
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
tt.f(B, C)
}
})
}
}
Performing 8x8 matrix multipliciation is relatively small work.
Goroutines (although may be lightweight) do have overhead. If the work they do is "small", the overhead of launching, synchronizing and throwing them away may outweight the performance gain of utilizing multiple cores / threads, and overall you might not gain performance by executing such small tasks concurrently (hell, you may even do worse than without using goroutines). Measure.
If we increase the matrix size to 80x80, running the benchmark we already see some performance gain in case of ParalMultNaivePerRow:
BenchmarkMatrixDotNaive/A.MultNaive-4 2000 1054775 ns/op
BenchmarkMatrixDotNaive/A.ParalMultNaivePerRow-4 2000 709367 ns/op
BenchmarkMatrixDotNaive/A.ParalMultNaivePerElem-4 100 10224927 ns/op
(As you see in the results, I have 4 CPU cores, running it on your 8-core machine might show more performance gain.)
When rows are small, you are using goroutines to do minimal work, you may improve performance by not "throwing" away goroutines once they're done with their "tiny" work, but you may "reuse" them. See related question: Is this an idiomatic worker thread pool in Go?
Also see related / possible duplicate: Vectorise a function taking advantage of concurrency
I am trying to learn Go, so here is my very simple function for removing adjacent duplicates from slice for exercise from the book by Donovan & Kernighan.
Here is the code: https://play.golang.org/p/avHc1ixfck
package main
import "fmt"
func main() {
a := []int{0, 1, 1, 3, 3, 3}
removeDup(a)
fmt.Println(a)
}
func removeDup(s []int) {
n := len(s)
tmp := make([]int, 0, n)
tmp = append(tmp, s[0])
j := 1
for i := 1; i < n; i++ {
if s[i] != s[i-1] {
tmp = append(tmp, s[i])
j++
}
}
s = s[:len(tmp)]
copy(s, tmp)
}
It should print out [0 1 3] - and I checked, actually tmp at the end of the function it has desired form. However, the result is [0 1 3 3 3 3]. I guess there is something with copy function.
Can I somehow replace input slice s with the temp or trim it to desired length?
Option 1
Return a new slice as suggested by #zerkms.
https://play.golang.org/p/uGJiD3WApS
package main
import "fmt"
func main() {
a := []int{0, 1, 1, 3, 3, 3}
a = removeDup(a)
fmt.Println(a)
}
func removeDup(s []int) []int {
n := len(s)
tmp := make([]int, 0, n)
tmp = append(tmp, s[0])
for i := 1; i < n; i++ {
if s[i] != s[i-1] {
tmp = append(tmp, s[i])
}
}
return tmp
}
Option 2
Use pointers for pass-by-reference.
The same thing in effect as that of option1.
https://play.golang.org/p/80bE5Qkuuj
package main
import "fmt"
func main() {
a := []int{0, 1, 1, 3, 3, 3}
removeDup(&a)
fmt.Println(a)
}
func removeDup(sp *[]int) {
s := *sp
n := len(s)
tmp := make([]int, 0, n)
tmp = append(tmp, s[0])
for i := 1; i < n; i++ {
if s[i] != s[i-1] {
tmp = append(tmp, s[i])
}
}
*sp = tmp
}
Also, refer to following SO thread:
Does Go have no real way to shrink a slice? Is that an issue?
Here's two more slightly different ways to achieve what you want using sets and named types. The cool thing about named types is that you can create interfaces around them and can help with the readability of lots of code.
package main
import "fmt"
func main() {
// returning a list
a := []int{0, 1, 1, 3, 3, 3}
clean := removeDup(a)
fmt.Println(clean)
// creating and using a named type
nA := &newArrType{0, 1, 1, 3, 3, 3}
nA.removeDup2()
fmt.Println(nA)
// or... casting your orginal array to the named type
nB := newArrType(a)
nB.removeDup2()
fmt.Println(nB)
}
// using a set
// order is not kept, but a set is returned
func removeDup(s []int) (newArr []int) {
set := make(map[int]struct{})
for _, n := range s {
set[n] = struct{}{}
}
newArr = make([]int, 0, len(set))
for k := range set {
newArr = append(newArr, k)
}
return
}
// using named a typed
type newArrType []int
func (a *newArrType) removeDup2() {
x := *a
for i := range x {
f := i + 1
if f < len(x) {
if x[i] == x[f] {
x = x[:f+copy(x[f:], x[f+1:])]
}
}
}
// check the last 2 indexes
if x[len(x)-2] == x[len(x)-1] {
x = x[:len(x)-1+copy(x[len(x)-1:], x[len(x)-1+1:])]
}
*a = x
}