computing complexity of an anagram finder implementation - algorithm

Given below code
package main
import (
"fmt"
"sort"
"strings"
)
func main() {
s := []string{"eat", "tea", "tan", "ate", "nat", "bat"}
result := groupAnagrams(s)
fmt.Println(result)
}
func groupAnagrams(s []string) (out [][]string) {
tmp := map[string][]string{}
for _, v := range s {
x := strings.Split(v, "")
sort.Strings(x)
anagram := strings.Join(x, "")
items, ok := tmp[anagram]
if ok {
items = append(items, v)
tmp[anagram] = items
continue
}
tmp[anagram] = []string{v}
}
var keys []string
for key := range tmp {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
sort.Strings(tmp[key])
out = append(out, tmp[key])
}
return
}
And its tests here https://play.golang.org/p/k8F1-FAC_au
can you help figuring the complexity ?
In my understanding, and without checking thoroughly the documentation.
for _, v := range s { // o(n)
sort.Strings(keys) //o(log n)
x := strings.Split(v, "") / anagram := strings.Join(x, "") //o(n)
Are those correct ? Am i missing some ? How to compute the total ?
Do you account for total allocations when computing the complexity of a code ?

(not an answer, more like a formatted comment)
You get to choose what counts as "1 operation".
For example : in your for _, v := range s { ... } loop, I wouldn't count the processing of one single v value :
x := strings.Split(v, "")
sort.Strings(x)
anagram := strings.Join(x, "")
items, ok := tmp[anagram]
if ok {
items = append(items, v)
tmp[anagram] = items
continue
}
tmp[anagram] = []string{v}
as "1 operation". More like something that depends on len(v).
So the length of the items in your starting set will probably appear in your end formula.

this is not an answer, but, little insight as to anyone else having to deal with such things. may that help you.
I slightly revised stuff here and their, then gave a shot to a Godel-inspired scheme as described at https://stackoverflow.com/a/396077/11892070
-- main.go --
package main
import (
"fmt"
"sort"
"strings"
)
func main() {
input := []string{"tan", "nat", "⌘", "日本語", "語日本"}
freq := map[rune]int{}
for _, word := range input {
x, err := hashWord(word, freq)
fmt.Println(word, "=>", x, "err=", err)
}
}
func groupAnagramsUsingSort(s []string, tmp map[string][]string, out [][]string) [][]string {
for k := range tmp {
delete(tmp, k)
}
for i := 0; i < len(out); i++ {
out[i] = out[i][:0]
}
out = out[:0]
for _, v := range s {
x := strings.Split(v, "")
sort.Strings(x)
anagram := strings.Join(x, "")
items, ok := tmp[anagram]
if ok {
items = append(items, v)
tmp[anagram] = items
continue
}
tmp[anagram] = []string{v}
}
for key := range tmp {
out = append(out, tmp[key])
}
return out
}
func groupAnagramsUsingHash(s []string, tmp map[int][]string, out [][]string) [][]string {
for k := range tmp {
delete(tmp, k)
}
for i := 0; i < len(out); i++ {
out[i] = out[i][:0]
}
out = out[:0]
freq := map[rune]int{}
for _, v := range s {
hash, _ := hashWord(v, freq)
items, ok := tmp[hash]
if ok {
items = append(items, v)
tmp[hash] = items
continue
}
tmp[hash] = []string{v}
}
for key := range tmp {
out = append(out, tmp[key])
}
return out
}
var primes = []int{2, 41, 37, 47, 3, 67, 71, 23, 5, 101, 61, 17, 19, 13, 31, 43, 97, 29, 11, 7, 73, 83, 79, 89, 59, 53}
var ErrNonASCII = fmt.Errorf("non ascii letter detected")
func getFrequencyMap(word string, freq map[rune]int) (map[rune]int, error) {
for k := range freq {
delete(freq, k)
}
for _, r := range word {
if r-97 < 0 || int(r-97) > len(primes) {
return nil, ErrNonASCII
}
x := freq[r]
freq[r] = x + 1
}
return freq, nil
}
func hashWord(word string, freq map[rune]int) (int, error) {
var err error
freq, err = getFrequencyMap(word, freq)
if err != nil {
return -1, err
}
product := 1
for letter, r := range freq {
product = product * primes[letter-97]
for e := 1; e < r; e++ {
product = product * product
}
}
return product, nil
}
-- main_test.go --
package main
import (
"reflect"
"sort"
"testing"
)
type expectation struct {
input []string
want [][]string
}
var expectations = []expectation{
expectation{
input: []string{"eat", "tea", "tan", "ate", "nat", "bat"},
want: [][]string{
[]string{"ate", "eat", "tea"},
[]string{"bat"},
[]string{"nat", "tan"},
},
},
expectation{
input: []string{"eaft", "tea", "taen", "ate", "nate", "batf"},
want: [][]string{
[]string{"batf"},
[]string{"eaft"},
[]string{"tea", "ate"},
[]string{"taen", "nate"},
},
},
expectation{
input: []string{""},
want: [][]string{
[]string{""},
},
},
expectation{
input: []string{"a"},
want: [][]string{
[]string{"a"},
},
},
}
func TestUsingSort(t *testing.T) {
tmp := map[string][]string{}
out := [][]string{}
for _, expectation := range expectations {
out = groupAnagramsUsingSort(expectation.input, tmp, out)
if len(out) != len(expectation.want) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
for i := 0; i < len(out); i++ {
sort.Strings(out[i])
sort.Strings(expectation.want[i])
}
sort.Slice(out, func(i int, j int) bool {
return len(out[i]) < len(out[j])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return len(expectation.want[i]) < len(expectation.want[j])
})
sort.Slice(out, func(i int, j int) bool {
return (len(out[i]) > 0 &&
len(out[j]) > 0 &&
out[i][0] < out[j][0])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return (len(expectation.want[i]) > 0 &&
len(expectation.want[j]) > 0 &&
expectation.want[i][0] < expectation.want[j][0])
})
for i := 0; i < len(out); i++ {
if !reflect.DeepEqual(out[i], expectation.want[i]) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
}
}
}
func TestUsingHash(t *testing.T) {
tmp := map[int][]string{}
out := [][]string{}
for _, expectation := range expectations {
out = groupAnagramsUsingHash(expectation.input, tmp, out)
if len(out) != len(expectation.want) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
for i := 0; i < len(out); i++ {
sort.Strings(out[i])
sort.Strings(expectation.want[i])
}
sort.Slice(out, func(i int, j int) bool {
return len(out[i]) < len(out[j])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return len(expectation.want[i]) < len(expectation.want[j])
})
sort.Slice(out, func(i int, j int) bool {
return (len(out[i]) > 0 &&
len(out[j]) > 0 &&
out[i][0] < out[j][0])
})
sort.Slice(expectation.want, func(i int, j int) bool {
return (len(expectation.want[i]) > 0 &&
len(expectation.want[j]) > 0 &&
expectation.want[i][0] < expectation.want[j][0])
})
for i := 0; i < len(out); i++ {
if !reflect.DeepEqual(out[i], expectation.want[i]) {
t.Fatalf("unexpected output,\nwanted=%#v\ngot =%#v\n", expectation.want, out)
}
}
}
}
func BenchmarkUsingSort(b *testing.B) {
tmp := map[string][]string{}
out := [][]string{}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, expectation := range expectations {
out = groupAnagramsUsingSort(expectation.input, tmp, out)
_ = out
}
}
}
func BenchmarkUsingHash(b *testing.B) {
tmp := map[int][]string{}
out := [][]string{}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, expectation := range expectations {
out = groupAnagramsUsingHash(expectation.input, tmp, out)
_ = out
}
}
}
Benchmark result
$ go test -bench=. -v .
=== RUN TestUsingSort
--- PASS: TestUsingSort (0.00s)
=== RUN TestUsingHash
--- PASS: TestUsingHash (0.00s)
goos: linux
goarch: amd64
BenchmarkUsingSort
BenchmarkUsingSort-4 344438 3315 ns/op 787 B/op 29 allocs/op
BenchmarkUsingHash
BenchmarkUsingHash-4 410810 2911 ns/op 496 B/op 17 allocs/op
PASS
ok _/home/clementauger/tmp 2.408s

Related

pythons enumerate in go

let's say that I have generator of fibonachi numbers, and I would like to use enumerate(get_next_fibs(10)) and I would like to have generator of pairs index, number_from_generator, I am struggling to find solution with "named return values"
and it's not how it should be done but it's for purpose of learning specific things about generators
package main
import "fmt"
func get_next_fibs(ii int) func() int {
i := 0
a, b := 0, 1
fc := func() int {
i++
a, b = b, a+b
if ii <= i {
return -1
}
return a
}
return fc
}
func enumerate(iter func() int) func() (index, v int) {
index := 0
fc := func() (index, v int) {
v := iter()
return
index++
}
return fc
}
func main() {
iter := enumerate(get_next_fibs(10))
// iter := get_next_fibs(10)
fmt.Printf("iter = %T\n", iter)
for tuple := iter(); tuple != -1; tuple = iter() {
fmt.Println("tuple:", tuple)
}
}
You have few issues in this code sample:
You can't have index++ after return statement. Use defer if you need to do something after return-ing.
You're missing how variable shadowing works in go. Thus, you're trying to modify a wrong index variable.
Go doesn't have tuples.
...
func enumerate(iter func() int) func() (index, v int) {
counter := 0
return func() (index, v int) {
i := counter
counter++
return i, iter()
}
}
...
func main() {
iter := enumerate(get_next_fibs(10))
fmt.Printf("iter = %T\n", iter)
for i, v := iter(); v != -1; i, v = iter() {
fmt.Printf("i: %d, v: %d\n", i, v)
}
}
Playground link

Why having a default clause in a goroutine's select makes it slower?

Referring to the following benchmarking test codes:
func BenchmarkRuneCountNoDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++{
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make (chan int)
for i := 0; i < runtime.NumCPU(); i++{
go RuneCountNoDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func(){
for n := 0; n < numStrings; n++{
<-results
}
return
}()
for n := 0; n < numStrings; n++{
jobs <- strings[n]
}
}
close(jobs)
}
func RuneCountNoDefault(jobs chan string, results chan int){
for{
select{
case j, ok := <-jobs:
if ok{
results <- utf8.RuneCountInString(j)
} else {
return
}
}
}
}
func BenchmarkRuneCountWithDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++{
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make (chan int)
for i := 0; i < runtime.NumCPU(); i++{
go RuneCountWithDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func(){
for n := 0; n < numStrings; n++{
<-results
}
return
}()
for n := 0; n < numStrings; n++{
jobs <- strings[n]
}
}
close(jobs)
}
func RuneCountWithDefault(jobs chan string, results chan int){
for{
select{
case j, ok := <-jobs:
if ok{
results <- utf8.RuneCountInString(j)
} else {
return
}
default: //DIFFERENCE
}
}
}
//https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
When I benchmarked both the functions where one function, RuneCountNoDefault has no default clause in the select and the other, RuneCountWithDefault has a default clause, I'm getting the following benchmark:
BenchmarkRuneCountNoDefault-4 200000 8910 ns/op
BenchmarkRuneCountWithDefault-4 5 277798660 ns/op
Checking the cpuprofile generated by the tests, I noticed that the function with the default clause spends a lot of time in the following channel operations:
Why having a default clause in the goroutine's select makes it slower?
I'm using Go version 1.10 for windows/amd64
The Go Programming Language
Specification
Select statements
If one or more of the communications can proceed, a single one that
can proceed is chosen via a uniform pseudo-random selection.
Otherwise, if there is a default case, that case is chosen. If there
is no default case, the "select" statement blocks until at least one
of the communications can proceed.
Modifying your benchmark to count the number of proceed and default cases taken:
$ go test default_test.go -bench=.
goos: linux
goarch: amd64
BenchmarkRuneCountNoDefault-4 300000 4108 ns/op
BenchmarkRuneCountWithDefault-4 10 209890782 ns/op
--- BENCH: BenchmarkRuneCountWithDefault-4
default_test.go:90: proceeds 114
default_test.go:91: defaults 128343308
$
While other cases were unable to proceed, the default case was taken 128343308 times in 209422470, (209890782 - 114*4108), nanoseconds or 1.63 nanoseconds per default case. If you do something small a large number of times, it adds up.
default_test.go:
package main
import (
"math/rand"
"runtime"
"sync/atomic"
"testing"
"time"
"unicode/utf8"
)
func BenchmarkRuneCountNoDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++ {
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make(chan int)
for i := 0; i < runtime.NumCPU(); i++ {
go RuneCountNoDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func() {
for n := 0; n < numStrings; n++ {
<-results
}
return
}()
for n := 0; n < numStrings; n++ {
jobs <- strings[n]
}
}
close(jobs)
}
func RuneCountNoDefault(jobs chan string, results chan int) {
for {
select {
case j, ok := <-jobs:
if ok {
results <- utf8.RuneCountInString(j)
} else {
return
}
}
}
}
var proceeds ,defaults uint64
func BenchmarkRuneCountWithDefault(b *testing.B) {
b.StopTimer()
var strings []string
numStrings := 10
for n := 0; n < numStrings; n++ {
s := RandStringBytesMaskImprSrc(10)
strings = append(strings, s)
}
jobs := make(chan string)
results := make(chan int)
for i := 0; i < runtime.NumCPU(); i++ {
go RuneCountWithDefault(jobs, results)
}
b.StartTimer()
for n := 0; n < b.N; n++ {
go func() {
for n := 0; n < numStrings; n++ {
<-results
}
return
}()
for n := 0; n < numStrings; n++ {
jobs <- strings[n]
}
}
close(jobs)
b.Log("proceeds", atomic.LoadUint64(&proceeds))
b.Log("defaults", atomic.LoadUint64(&defaults))
}
func RuneCountWithDefault(jobs chan string, results chan int) {
for {
select {
case j, ok := <-jobs:
atomic.AddUint64(&proceeds, 1)
if ok {
results <- utf8.RuneCountInString(j)
} else {
return
}
default: //DIFFERENCE
atomic.AddUint64(&defaults, 1)
}
}
}
//https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
Playground: https://play.golang.org/p/DLnAY0hovQG

Challenge of finding 3 pairs in array

The length L at the time of joining, when the length of the bar of the N (1 ≦ N ≦ 5000) is supplied from standard input, is the L by connecting three lengths among the N number of bars please write a program to find the total number of combinations of. However, and the length of the individual bars, length was pieced together (L) is a positive integer, is sufficient handle range in 32bit integer. In addition, it has all the length of the bar different things.
for example)
input:
15
5
8
4
10
3
2
output:
2 //{{2, 3, 10}, {3, 4, 8}}
example 2)
input :
35
10
13
12
17
10
4
18
3
11
5
7
output:
6 //{{4, 13, 18}, {5, 12, 18}, {5, 13, 17}, {7, 10, 18}, {7, 11, 17}, {10, 12, 13}}
and my answer is here
package main
import (
"fmt"
"sort"
)
func main() {
input_count := 0
var target int
var count int
var v int
var array []int
for read_count, _ := fmt.Scan(&v); read_count != 0; read_count, _ = fmt.Scan(&v) {
if 0 == input_count {
target = v
} else if 1 == input_count {
count = v
array = make([]int, count)
} else {
array[input_count-2] = v
}
input_count++
}
sort.Ints(array)
fmt.Println(Calculate(target, count, array))
}
func Except(pair []int, count int, array []int) []int {
except := make([]int, count-pair[2])
except_index := 0
on := false
for _, v := range array {
if on {
except[except_index] = v
except_index++
}
if v == pair[1] {
on = true
}
}
return except
}
func ListUp(target int, count int, array []int) [][]int {
max := array[count-1]
list := make([][]int, Fact(count-1))
list_index := 0
for i, h := range array {
if count > i+1 && target > h+array[i+1] {
for j, v := range array[i+1:] {
if count > i+j+1 && target <= max+h+v && target > h+v {
list[list_index] = []int{h, v, i + j + 1}
list_index++
}
}
}
}
return list
}
//func Calculate(target int, count int, array []int) [][]int {
func Calculate(target int, count int, array []int) int {
// var answers [][]int
answer_count := 0
for _, pair := range ListUp(target, count, array) {
if 3 == len(pair) {
pair_sum := pair[0] + pair[1]
if target-pair_sum >= array[0] {
for _, v := range Except(pair, count, array) {
if target == pair[0]+pair[1]+v {
// answers = append(answers, []int{pair[0], pair[1], v})
answer_count++
}
}
}
}
}
return answer_count
}
func Fact(n int) int {
if n == 0 {
return 0
}
return n + Fact(n-1)
}
Does anyone who can refactor the code?
and you should refactor it
if input
https://github.com/freddiefujiwara/horiemon-challenge-codeiq/blob/master/sample4.txt
then output
1571200
in 10 seconds
current status is here
time ./horiemon-challenge-codeiq < sample4.txt
1571200
real 6m56.584s
user 6m56.132s
sys 0m1.578s
very very slow.
Your time of almost seven minutes is very, very slow. Ten seconds is slow. One second is more reasonable, a tenth of a second is good. For example, using an O(N*N) algorithm,
package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
)
func triples(l int, b []int) int {
t := 0
sort.Ints(b)
// for i < j < k, b[i] <= b[j] <= b[k]
for i := 0; i < len(b)-2; i++ {
x := b[i]
if x > l {
break
}
lx := l - x
j, k := i+1, len(b)-1
y := b[j]
z := b[k]
for j < k {
yz := y + z
switch {
case lx > yz:
j++
y = b[j]
case lx < yz:
k--
z = b[k]
default:
// l == b[i]+b[j]+b[k]
t++
j++
k--
y = b[j]
z = b[k]
}
}
}
return t
}
func readInput() (l int, b []int, err error) {
r := bufio.NewReader(os.Stdin)
for {
line, err := r.ReadString('\n')
line = strings.TrimSpace(line)
if err == nil && len(line) == 0 {
err = io.EOF
}
if err != nil {
if err == io.EOF {
break
}
return 0, nil, err
}
i, err := strconv.Atoi(string(line))
if err == nil && i < 0 {
err = errors.New("Nonpositive number: " + line)
}
if err != nil {
return 0, nil, err
}
b = append(b, i)
}
if len(b) > 0 {
l = b[0]
b = b[1:]
if len(b) > 1 {
n := b[0]
b = b[1:]
if n != len(b) {
err := errors.New("Invalid number of bars: " + strconv.Itoa(len(b)))
return 0, nil, err
}
}
}
return l, b, nil
}
func main() {
l, b, err := readInput()
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
t := triples(l, b)
fmt.Println(t)
}
Output:
1571200
real 0m0.164s
user 0m0.161s
sys 0m0.004s
For comparison, your program,
Output:
1571200
real 9m24.384s
user 16m14.592s
sys 0m19.129s
ive tuned
package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
)
type triple struct {
x, y, z int
}
func triples(l int, n []int, list bool) (nt int, t []triple) {
num_of_list := len(n)
for i := 0; i < num_of_list-2; i++ {
x := n[i]
if x > l {
break
}
for j := i + 1; j < num_of_list-1; j++ {
y := x + n[j]
if y > l {
break
}
pos := sort.SearchInts(n[j:], l-y)
if j < pos+j && pos+j < num_of_list && n[pos+j] == l-y {
nt++
}
}
}
return nt, t
}
func readInput() (l int, n []int, err error) {
r := bufio.NewReader(os.Stdin)
for {
line, err := r.ReadString('\n')
line = strings.TrimSpace(line)
if err == nil && len(line) == 0 {
err = io.EOF
}
if err != nil {
if err == io.EOF {
break
}
return 0, nil, err
}
i, err := strconv.Atoi(string(line))
if err == nil && i < 0 {
err = errors.New("Nonpositive number: " + line)
}
if err != nil {
return 0, nil, err
}
n = append(n, i)
}
if len(n) > 0 {
l = n[0]
n = n[1:]
}
sort.Ints(n)
for i := 1; i < len(n); i++ {
if n[i] == n[i-1] {
copy(n[i:], n[i+1:])
n = n[:len(n)-1]
}
}
return l, n, nil
}
func main() {
l, n, err := readInput()
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
list := false
nt, t := triples(l, n, list)
fmt.Println(nt)
if list {
fmt.Println(t)
}
}

How to find the difference between two slices of strings

Here is my desired outcome
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}
difference(slice1, slice2)
=> ["hello"]
I am looking for the difference between the two string slices!
Assuming Go maps are ~O(1), here is an ~O(n) difference function that works on unsorted slices.
// difference returns the elements in `a` that aren't in `b`.
func difference(a, b []string) []string {
mb := make(map[string]struct{}, len(b))
for _, x := range b {
mb[x] = struct{}{}
}
var diff []string
for _, x := range a {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}
Depending on the size of the slices, different solutions might be best.
My answer assumes order doesn't matter.
Using simple loops, only to be used with smaller slices:
package main
import "fmt"
func difference(slice1 []string, slice2 []string) []string {
var diff []string
// Loop two times, first to find slice1 strings not in slice2,
// second loop to find slice2 strings not in slice1
for i := 0; i < 2; i++ {
for _, s1 := range slice1 {
found := false
for _, s2 := range slice2 {
if s1 == s2 {
found = true
break
}
}
// String not found. We add it to return slice
if !found {
diff = append(diff, s1)
}
}
// Swap the slices, only if it was the first loop
if i == 0 {
slice1, slice2 = slice2, slice1
}
}
return diff
}
func main() {
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "world", "bar", "foo"}
fmt.Printf("%+v\n", difference(slice1, slice2))
}
Output:
[hello world]
Playground: http://play.golang.org/p/KHTmJcR4rg
I use the map to solve this problem
package main
import "fmt"
func main() {
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar","world"}
diffStr := difference(slice1, slice2)
for _, diffVal := range diffStr {
fmt.Println(diffVal)
}
}
func difference(slice1 []string, slice2 []string) ([]string){
diffStr := []string{}
m :=map [string]int{}
for _, s1Val := range slice1 {
m[s1Val] = 1
}
for _, s2Val := range slice2 {
m[s2Val] = m[s2Val] + 1
}
for mKey, mVal := range m {
if mVal==1 {
diffStr = append(diffStr, mKey)
}
}
return diffStr
}
output:
hello
world
func diff(a, b []string) []string {
temp := map[string]int{}
for _, s := range a {
temp[s]++
}
for _, s := range b {
temp[s]--
}
var result []string
for s, v := range temp {
if v != 0 {
result = append(result, s)
}
}
return result
}
If you want to handle duplicated strings, the v in the map can do that. And you can pick a.Remove(b) ( v>0 ) or b.Remove(a) (v<0)
func unique(slice []string) []string {
encountered := map[string]int{}
diff := []string{}
for _, v := range slice {
encountered[v] = encountered[v]+1
}
for _, v := range slice {
if encountered[v] == 1 {
diff = append(diff, v)
}
}
return diff
}
func main() {
slice1 := []string{"hello", "michael", "dorner"}
slice2 := []string{"hello", "michael"}
slice3 := []string{}
fmt.Println(unique(append(slice1, slice2...))) // [dorner]
fmt.Println(unique(append(slice2, slice3...))) // [michael michael]
}
As mentioned by ANisus, different approaches will suit different sizes of input slices. This solution will work in linear time O(n) independent of input size, but assumes that the "equality" includes index position.
Thus, in the OP's examples of:
slice1 := []string{"foo", "bar","hello"}
slice2 := []string{"foo", "bar"}
The entries foo and bar are equal not just due to value, but also due to their index in the slice.
Given these conditions, you can do something like:
package main
import "fmt"
func difference(s1, s2 []string) string {
var (
lenMin int
longest []string
out string
)
// Determine the shortest length and the longest slice
if len(s1) < len(s2) {
lenMin = len(s1)
longest = s2
} else {
lenMin = len(s2)
longest = s1
}
// compare common indeces
for i := 0; i < lenMin; i++ {
if s1[i] != s2[i] {
out += fmt.Sprintf("=>\t%s\t%s\n", s1[i], s2[i])
}
}
// add indeces not in common
for _, v := range longest[lenMin:] {
out += fmt.Sprintf("=>\t%s\n", v)
}
return out
}
func main() {
slice1 := []string{"foo", "bar", "hello"}
slice2 := []string{"foo", "bar"}
fmt.Print(difference(slice1, slice2))
}
Produces:
=> hello
Playground
If you change the slices to be:
func main() {
slice1 := []string{"foo", "baz", "hello"}
slice2 := []string{"foo", "bar"}
fmt.Print(difference(slice1, slice2))
}
It will produce:
=> baz bar
=> hello
Most of the other solutions here will fail to return the correct answer in case the slices contain duplicated elements.
This solution is O(n) time and O(n) space if the slices are already sorted, and O(n*log(n)) time O(n) space if they are not, but has the nice property of actually being correct. 🤣
func diff(a, b []string) []string {
a = sortIfNeeded(a)
b = sortIfNeeded(b)
var d []string
i, j := 0, 0
for i < len(a) && j < len(b) {
c := strings.Compare(a[i], b[j])
if c == 0 {
i++
j++
} else if c < 0 {
d = append(d, a[i])
i++
} else {
d = append(d, b[j])
j++
}
}
d = append(d, a[i:len(a)]...)
d = append(d, b[j:len(b)]...)
return d
}
func sortIfNeeded(a []string) []string {
if sort.StringsAreSorted(a) {
return a
}
s := append(a[:0:0], a...)
sort.Strings(s)
return s
}
If you know for sure that the slices are already sorted, you can remove the calls to sortIfNeeded (the reason for the defensive slice copy in sortIfNeeded is because sorting is done in-place, so we would be modifying the slices that are passed to diff).
See https://play.golang.org/p/lH-5L0aL1qr for tests showing correctness in face of duplicated entries.
I have this example but it works only for the elements of the first array "not present" in the second array
with generics
type HandleDiff[T comparable] func(item1 T, item2 T) bool
func HandleDiffDefault[T comparable](val1 T, val2 T) bool {
return val1 == val2
}
func Diff[T comparable](items1 []T, items2 []T, callback HandleDiff[T]) []T {
acc := []T{}
for _, item1 := range items1 {
find := false
for _, item2 := range items2 {
if callback(item1, item2) {
find = true
break
}
}
if !find {
acc = append(acc, item1)
}
}
return acc
}
usage
diff := Diff(items1, items2, HandleDiffDefault[string])
Why not keep it simple and use labels?
// returns items unique to slice1
func difference(slice1, slice2 []string) []string {
var diff []string
outer:
for _, v1 := range slice1 {
for _, v2 := range slice2 {
if v1 == v2 {
continue outer
}
}
diff = append(diff, v1)
}
return diff
}
https://go.dev/play/p/H46zSpfocHp
I would add a small change to the solution by #peterwilliams97, so that we can ignore the order of the input.
func difference(a, b []string) []string {
// reorder the input,
// so that we can check the longer slice over the shorter one
longer, shorter := a, b
if len(b) > len(a) {
longer, shorter = b, a
}
mb := make(map[string]struct{}, len(shorter))
for _, x := range shorter {
mb[x] = struct{}{}
}
var diff []string
for _, x := range longer {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}
The code below gives the absolute difference between strings regardless of the order. Space complexity O(n) and Time complexity O(n).
// difference returns the elements in a that aren't in b
func difference(a, b string) string {
longest, shortest := longestString(&a, &b)
var builder strings.Builder
var mem = make(map[rune]bool)
for _, s := range longest {
mem[s] = true
}
for _, s := range shortest {
if _, ok := mem[s]; ok {
mem[s] = false
}
}
for k, v := range mem {
if v == true {
builder.WriteRune(k)
}
}
return builder.String()
}
func longestString(a *string, b *string) ([]rune, []rune) {
if len(*a) > len(*b) {
return []rune(*a), []rune(*b)
}
return []rune(*b), []rune(*a)
}

Iterate over two strings at the same time

I am just wondering if there is any beautiful way to iterate over two strings at the same time:
var ascii_runes = []rune(string_1)
var shifted_runes = []rune(string_2)
for i := 0; i < len(string_1); i++ {
fmt.Println(string(ascii_runes[i]) + string(shifted_runes[i]))
}
Not sure IIUC, but for example:
package main
import (
"fmt"
)
var (
ascii = []rune("string1")
shifted = []rune("STRING!")
)
func main() {
for i, v := range ascii {
fmt.Printf("%c%c\n", v, shifted[i])
}
}
Also here: http://play.golang.org/p/2ruvLFg_qe
Output:
sS
tT
rR
iI
nN
gG
1!
For example,
package main
import "fmt"
func main() {
var runes_1, runes_2 = []rune("string_1"), []rune("string_2")
for i := 0; i < len(runes_1) && i < len(runes_2); i++ {
fmt.Println(string(runes_1[i]) + string(runes_2[i]))
}
}
Output:
ss
tt
rr
ii
nn
gg
__
12
Not particularly beautiful, but an efficient way is to use strings.NewReader and its ReadRune method.
func Less(s1, s2 string) bool {
rdr1 := strings.NewReader(s1)
rdr2 := strings.NewReader(s2)
for {
rune1, _, err1 := rdr1.ReadRune()
rune2, _, err2 := rdr2.ReadRune()
if err2 == io.EOF { return false }
if err1 == io.EOF { return true }
if rune1 != rune2 { return rune1 < rune2 }
}
}

Resources