I have just seen an implementation of a priority queue in a generic kind of way in which any
type satisfying an interface can be put into the queue. Is this the way to go with go or does this introduces any issues?
// Copyright 2012 Stefan Nilsson
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package prio provides a priority queue.
// The queue can hold elements that implement the two methods of prio.Interface.
package prio
/*
A type that implements prio.Interface can be inserted into a priority queue.
The simplest use case looks like this:
type myInt int
func (x myInt) Less(y prio.Interface) bool { return x < y.(myInt) }
func (x myInt) Index(i int) {}
To use the Remove method you need to keep track of the index of elements
in the heap, e.g. like this:
type myType struct {
value int
index int // index in heap
}
func (x *myType) Less(y prio.Interface) bool { return x.value < y.(*myType).value }
func (x *myType) Index(i int) { x.index = i }
*/
type Interface interface {
// Less returns whether this element should sort before element x.
Less(x Interface) bool
// Index is called by the priority queue when this element is moved to index i.
Index(i int)
}
// Queue represents a priority queue.
// The zero value for Queue is an empty queue ready to use.
type Queue struct {
h []Interface
}
// New returns an initialized priority queue with the given elements.
// A call of the form New(x...) uses the underlying array of x to implement
// the queue and hence might change the elements of x.
// The complexity is O(n), where n = len(x).
func New(x ...Interface) Queue {
q := Queue{x}
heapify(q.h)
return q
}
// Push pushes the element x onto the queue.
// The complexity is O(log(n)) where n = q.Len().
func (q *Queue) Push(x Interface) {
n := len(q.h)
q.h = append(q.h, x)
up(q.h, n) // x.Index(n) is done by up.
}
// Pop removes a minimum element (according to Less) from the queue and returns it.
// The complexity is O(log(n)), where n = q.Len().
func (q *Queue) Pop() Interface {
h := q.h
n := len(h) - 1
x := h[0]
h[0], h[n] = h[n], nil
h = h[:n]
if n > 0 {
down(h, 0) // h[0].Index(0) is done by down.
}
q.h = h
x.Index(-1) // for safety
return x
}
// Peek returns, but does not remove, a minimum element (according to Less) of the queue.
func (q *Queue) Peek() Interface {
return q.h[0]
}
// Remove removes the element at index i from the queue and returns it.
// The complexity is O(log(n)), where n = q.Len().
func (q *Queue) Remove(i int) Interface {
h := q.h
n := len(h) - 1
x := h[i]
h[i], h[n] = h[n], nil
h = h[:n]
if i < n {
down(h, i) // h[i].Index(i) is done by down.
up(h, i)
}
q.h = h
x.Index(-1) // for safety
return x
}
// Len returns the number of elements in the queue.
func (q *Queue) Len() int {
return len(q.h)
}
// Establishes the heap invariant in O(n) time.
func heapify(h []Interface) {
n := len(h)
for i := n - 1; i >= n/2; i-- {
h[i].Index(i)
}
for i := n/2 - 1; i >= 0; i-- { // h[i].Index(i) is done by down.
down(h, i)
}
}
// Moves element at position i towards top of heap to restore invariant.
func up(h []Interface, i int) {
for {
parent := (i - 1) / 2
if i == 0 || h[parent].Less(h[i]) {
h[i].Index(i)
break
}
h[parent], h[i] = h[i], h[parent]
h[i].Index(i)
i = parent
}
}
// Moves element at position i towards bottom of heap to restore invariant.
func down(h []Interface, i int) {
for {
n := len(h)
left := 2*i + 1
if left >= n {
h[i].Index(i)
break
}
j := left
if right := left + 1; right < n && h[right].Less(h[left]) {
j = right
}
if h[i].Less(h[j]) {
h[i].Index(i)
break
}
h[i], h[j] = h[j], h[i]
h[i].Index(i)
i = j
}
}
This package is not the way to go in general, but if you like it and it meets your needs, use it. I don't see any major issues.
The concept of this package compared to container/heap is to put the interface on the node rather than the container. Container/heap allows more flexibility by using your container. (You might have nodes in a container already, and that container might not even be a slice. It just has to be indexable.) On the other hand, it's probably a common case that you don't care about the container and would be happy to let the package manage it for you. The index management of this package is a nice feature over container/heap, although it adds the overhead of a method call even when index management is not needed.
There are always tradeoffs. Container/heap is very general. This package gets by with a smaller method set (2 instead of 5) and adds index management on top, but only by sacrificing a bit of generality and perhaps a bit of performance in some cases. (You'd want to benchmark if you really cared. There are other differences that may dwarf the overhead of the Index call.)
Related
s := []string{"Zeno", "John", "Al", "Jenny"}
sort.Sort(sort.Reverse(sort.StringSlice(s)))
I could not understand the logic of Reverse
The source code for Reverse seems as follows:
func Reverse(data Interface) Interface {
return &reverse{data}
}
type reverse struct {
// This embedded Interface permits Reverse to use the methods of
// another Interface implementation.
Interface
}
type Interface interface {
// Len is the number of elements in the collection.
Len() int
// Less reports whether the element with index i
// must sort before the element with index j.
//
// If both Less(i, j) and Less(j, i) are false,
// then the elements at index i and j are considered equal.
// Sort may place equal elements in any order in the final result,
// while Stable preserves the original input order of equal elements.
//
// Less must describe a transitive ordering:
// - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
// - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
//
// Note that floating-point comparison (the < operator on float32 or float64 values)
// is not a transitive ordering when not-a-number (NaN) values are involved.
// See Float64Slice.Less for a correct implementation for floating-point values.
Less(i, j int) bool
// Swap swaps the elements with indexes i and j.
Swap(i, j int)
}
How do those given operations above induce the given array to be reversed?
If you look at the sort.StringSlice type, you can see it implements the Less method, notice the comparison x[i] < x[j], which means smaller element goes first.
func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
And then notice the sort.reverse type (not sort.Reverse interface), it also implements the Less method but see how it passes the i and j arguments, receives i and j but passes j and i, which is just equivalent to x[i] > x[j]
// Less returns the opposite of the embedded implementation's Less method.
func (r reverse) Less(i, j int) bool {
return r.Interface.Less(j, i)
}
A go method on struct receives pointer reference, made some modifications and returning same pointer. The struct has nested reference of same struct: when append method being called with values some reason it was loosing previous values.
package main
import (
"fmt"
)
type Node struct{
next *Node
val int
}
func newNode(val int) (*Node){
n := Node{
val: val,
}
return &n
}
func (n *Node) append(val int) (*Node){
for n.next != nil {
n = n.next
}
n.next = newNode(val)
return n
}
func (n *Node)printList(){
for n != nil {
fmt.Printf("%d,", n.val)
n = n.next
}
fmt.Println()
}
func main() {
n := newNode(3)
n.printList()
n = n.append(4)
n.printList()
n = n.append(5)
n.printList()
n = n.append(6)
n.printList()
}
output:
3,
3,4,
4,5,
5,6,
I was expecting 3,4,5,6, - Probably something I totally missing something fundamentals here. appreciate if you have some inputs.
https://play.golang.org/p/-zDH98UNFLa
I was getting expected results when I modify append method not return anything.
append() returns the pointer of the next node. Therefore printList() only print the nodes starting from the next node. If you'd like to print the all nodes in the list, you should add a variable to store the pointer referenced to the starting node of this list.
func main() {
n := newNode(3)
head := n
head.printList()
n = n.append(4)
head.printList()
n = n.append(5)
head.printList()
n = n.append(6)
head.printList() // 3,4,5,6
}
This function:
func (n *Node) append(val int) (*Node){
for n.next != nil {
n = n.next
}
n.next = newNode(val)
return n
}
does not return its original argument in general. It returns the node that is next-to-last in the (assumed to be non-empty) list. Hence:
n = n.append(4)
adds a node holding 4 to the original node holding 3, then returns the original node holding 3, but:
n = n.append(5)
adds a node holding 5 to the original list, but then returns a pointer to the node holding 4. That's why you see 4,5, at this point. Subsequent calls keep repeating the last two elements for the same reason.
You could modify your append function to save the original return value and return that:
func (n *Node) append(val int) *Node {
// find current tail
t := n
for t.next != nil {
t = t.next
}
t.next = newNode(val)
return n
}
but overall this is still not a great strategy: this append does not work when given a nil-valued n, for instance. Consider constructing a list type that does handle such cases. Alternatively, as in Hsaio's answer, you can have the caller hang on to the head node directly. If you do that you could have the append function return the tail pointer:
func (n *Node) append(val int) *Node {
n.next = newNode(val)
return n.next
}
and then use it like this:
head := newNode(3)
t := append(head, 4)
t = append(t, 5)
t = append(t, 6)
head.printList()
(There's already a List implementation in the standard Go packages, container/list, that does this stuff nicely for you. Instead of pointing directly to each element in your list, you create an overall list-container instance, which allows you to insert-at-front, insert-at-back, remove-from-anywhere, and so on. It's a little awkward in that it uses interface{} for the data, so it requires a type-assertion to get each node's value.)
The variable n in main function is overrided by n in append function.
I have a map[string]int
I want to get the x top values from it and store them in another data structure, another map or a slice.
From https://blog.golang.org/go-maps-in-action#TOC_7. I understood that:
When iterating over a map with a range loop, the iteration order is
not specified and is not guaranteed to be the same from one iteration
to the next.
so the result structure will be a slice then.
I had a look at several related topics but none fits my problem:
related topic 1
related topic 2
related topic 3
What would be the most efficient way to do this please?
Thanks,
Edit:
My solution would be to turn my map into a slice and sort it, then extract the first x values.
But is there a better way ?
package main
import (
"fmt"
"sort"
)
func main() {
// I want the x top values
x := 3
// Here is the map
m := make(map[string]int)
m["k1"] = 7
m["k2"] = 31
m["k3"] = 24
m["k4"] = 13
m["k5"] = 31
m["k6"] = 12
m["k7"] = 25
m["k8"] = -8
m["k9"] = -76
m["k10"] = 22
m["k11"] = 76
// Turning the map into this structure
type kv struct {
Key string
Value int
}
var ss []kv
for k, v := range m {
ss = append(ss, kv{k, v})
}
// Then sorting the slice by value, higher first.
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
// Print the x top values
for _, kv := range ss[:x] {
fmt.Printf("%s, %d\n", kv.Key, kv.Value)
}
}
Link to golang playground example
If I want to have a map at the end with the x top values, then with my solution I would have to turn the slice into a map again. Would this still be the most efficient way to do it?
Creating a slice and sorting is a fine solution; however, you could also use a heap. The Big O performance should be equal for both implementations (n log n) so this is a viable alternative with the advantage that if you want to add new entries you can still efficiently access the top N items without repeatedly sorting the entire set.
To use a heap, you would implement the heap.Interface for the kv type with a Less function that compares Values as greater than (h[i].Value > h[j].Value), add all of the entries from the map, and then pop the number of items you want to use.
For example (Go Playground):
func main() {
m := getMap()
// Create a heap from the map and print the top N values.
h := getHeap(m)
for i := 1; i <= 3; i++ {
fmt.Printf("%d) %#v\n", i, heap.Pop(h))
}
// 1) main.kv{Key:"k11", Value:76}
// 2) main.kv{Key:"k2", Value:31}
// 3) main.kv{Key:"k5", Value:31}
}
func getHeap(m map[string]int) *KVHeap {
h := &KVHeap{}
heap.Init(h)
for k, v := range m {
heap.Push(h, kv{k, v})
}
return h
}
// See https://golang.org/pkg/container/heap/
type KVHeap []kv
// Note that "Less" is greater-than here so we can pop *larger* items.
func (h KVHeap) Less(i, j int) bool { return h[i].Value > h[j].Value }
func (h KVHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h KVHeap) Len() int { return len(h) }
func (h *KVHeap) Push(x interface{}) {
*h = append(*h, x.(kv))
}
func (h *KVHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
I was just playing around with sorting in golang and I found a qsort function on stackoverflow. It seems to run about twice as fast as the native sort function in golang. I've tried it with different input sizes and tested that it works.
Could anyone explain why this happens?
Here is the code you can test it on your pc:
package main
import (
"fmt"
"math/rand"
"sort"
"time"
)
func qsort(a []int) []int {
if len(a) < 2 {
return a
}
left, right := 0, len(a)-1
// Pick a pivot
pivotIndex := rand.Int() % len(a)
// Move the pivot to the right
a[pivotIndex], a[right] = a[right], a[pivotIndex]
// Pile elements smaller than the pivot on the left
for i := range a {
if a[i] < a[right] {
a[i], a[left] = a[left], a[i]
left++
}
}
// Place the pivot after the last smaller element
a[left], a[right] = a[right], a[left]
// Go down the rabbit hole
qsort(a[:left])
qsort(a[left+1:])
return a
}
func main() {
// Create an array with random integers
rand.Seed(30)
size := 1000000
array1 := make([]int, size)
start := time.Now()
for i, _ := range array1 {
array1[i] = rand.Int()
}
fmt.Println("Creating array with ", size, " elements...")
fmt.Println("--- ", time.Since(start), " ---")
// Create a copy of the unsorted array
array2 := make([]int, size)
copy(array2, array1)
// Short using native function
start = time.Now()
sort.Ints(array1)
fmt.Println("Sorting with the native sort...")
fmt.Println("--- ", time.Since(start), " ---")
// Sort using custom qsort
start = time.Now()
qsort(array2)
fmt.Println("Sorting with custom qsort...")
fmt.Println("--- ", time.Since(start), " ---")
}
The difference seems to largely be due to the fact that your Quicksort uses builtins. It slices and uses len. Keep in mind that sort.Sort takes in a sort.Interface. So every time you call len it calls slice.Len and every time you do array[i],array[j] = array[j],array[i] it has to call Swap(i,j).
I wrote a comparable version that works on an arbitrary qsort.Interface:
func Qsort(a Interface, prng *rand.Rand) Interface {
if a.Len() < 2 {
return a
}
left, right := 0, a.Len()-1
// Pick a pivot
pivotIndex := prng.Int() % a.Len()
// Move the pivot to the right
a.Swap(pivotIndex, right)
// Pile elements smaller than the pivot on the left
for i := 0; i < a.Len(); i++ {
if a.Less(i, right) {
a.Swap(i, left)
left++
}
}
// Place the pivot after the last smaller element
a.Swap(left, right)
// Go down the rabbit hole
leftSide, rightSide := a.Partition(left)
Qsort(leftSide, prng)
Qsort(rightSide, prng)
return a
}
Then I used Go's benchmark functionality (which you should always use for Benchmarks where possible).
For reference and transparency, qsort.Interface is defined as:
type Interface interface {
sort.Interface
// Partition returns slice[:i] and slice[i+1:]
// These should references the original memory
// since this does an in-place sort
Partition(i int) (left Interface, right Interface)
}
The actual IntSlice implementation for qsort is:
type IntSlice []int
func (is IntSlice) Less(i, j int) bool {
return is[i] < is[j]
}
func (is IntSlice) Swap(i, j int) {
is[i], is[j] = is[j], is[i]
}
func (is IntSlice) Len() int {
return len(is)
}
func (is IntSlice) Partition(i int) (left Interface, right Interface) {
return IntSlice(is[:i]), IntSlice(is[i+1:])
}
Finally, here's the qsort_test.go file:
package qsort_test
import (
"math/rand"
"qsort"
"sort"
"testing"
"time"
)
const size int = 1000000
var list = make([]int, size)
var prng = rand.New(rand.NewSource(int64(time.Now().Nanosecond())))
func BenchmarkQsort(b *testing.B) {
for n := 0; n < b.N; n++ {
b.StopTimer()
for i := range list {
list[i] = prng.Int()
}
b.StartTimer()
qsort.Qsort(qsort.IntSlice(list), prng)
}
}
func BenchmarkNativeQsort(b *testing.B) {
for n := 0; n < b.N; n++ {
b.StopTimer()
for i := range list {
list[i] = prng.Int()
}
b.StartTimer()
qsort.NativeQsort(list, prng)
}
}
func BenchmarkSort(b *testing.B) {
for n := 0; n < b.N; n++ {
b.StopTimer()
for i := range list {
list[i] = prng.Int()
}
b.StartTimer()
sort.Sort(sort.IntSlice(list))
}
}
The results (formatting mine):
PASS
BenchmarkQsort 5 513629360 ns/op
BenchmarkNativeQsort 10 160609180 ns/op
BenchmarkSort 5 292416760 ns/op
As you can see, the standard library's sort massively outperforms your qsort on average with random data. NativeQsort refers to the qsort functions you posted in your actual question, and it outperforms both. The only thing that's changed between that and Qsort is that I swapped the builtin functions for qsort.Interface. It follows, then, that genericity is likely the reason one is slower than the other.
Edit: There aren't many samples because of how expensive sorting is, so here are the results with -benchtime 10s just for slightly more representative results.
BenchmarkQsort 50 524389994 ns/op
BenchmarkNativeQsort 100 161199217 ns/op
BenchmarkSort 50 302037284 ns/op
It seems to run about twice as fast as the native sort function in golang
Note that the native sort for slice will evolve with Go 1.19 (Q4 2022).
See:
issue 50154,
CL 399315,
commit 72e77a7 by ZhangYunHao.
sort: use pdqsort
Across all benchmarks, pdqsort is never significantly slower than the previous algorithm.
In common patterns, pdqsort is often faster (i.e. 10x faster in sorted slices).
The pdqsort is described at Pattern-defeating Quicksort (pdf) by Orson R. L. Peters.
(extract)
Pattern-defeating
quicksort is often the best choice of algorithm overall for small to medium input sizes or data type sizes.
It and other quicksort variants suffer from datasets that
are too large to fit in cache, where is4o shines.
The latter algorithm however suffers from bad performance on smaller sizes, future research could perhaps combine the best of these two algorithms
This CL is inspired by both C++ implementation and Rust implementation.
C++ implementation
Rust implementation
I have an algorithm that I'm trying to implement but currently I have absolutely no clue how to do so, from a technical perspective.
We have a slice of 5 floats:
mySlice := [float1, float2, float3, float4, float5]
And a switch statement:
aFloat := mySlice[index]
switch aFloat {
case 1:
{
//do something
}
case 2:
{
//do something
}
case 3:
{
//do something
}
case 4:
{
//do something
}
case 5:
{
//do something
}
default:
{
//somehow go back to slice, take the next smallest and run
//through the switch statement again
}
}
What I want to do is as follows:
identify the smallest element of mySlice ex: smallestFloat
run smallestFloat through the switch statement
if smallestFloat gets to default case take the next smallest float from mySlice
do step 2 again.
I've managed to do the first step with a for loop and step 2, but I'm stuck on steps 3 and 4. I don't have an idea at the moment on how I might go about re-feeding the next smallest float from mySlice to the switch statement again...
I would appreciate any light shed on my problem.
EDIT: I figured that it would be good to put my solution to the algorithm presented above.
create another slice which will be a sorted version of mySlice
create a map[int]value where the index will correspond to the position of the value in the non-sorted slice, but the items of the map will be inserted in the same order as the sorted slice.
Result: a value sorted map with the respective indexes corresponding to the position of the original non-sorted slice
Here is an implementation using a Minimum Priority Queue. The original input slice of floats is not changed. It can be run on the Go playground
Note: When dealing with recursive functions, you need to be weary of stack overflows.
Go does tail recursion optimizations only in limited cases. For more information on that,
refer to this answer.
This particular example does even better than amortized O(log N) time, because it does not have to resize the priority queue halfway through. This makes it guaranteed O(log N).
package main
import (
"fmt"
)
func main() {
slice := []float64{2, 1, 13, 4, 22, 0, 5, 7, 3}
fmt.Printf("Order before: %v\n", slice)
queue := NewMinPQ(slice)
for !queue.Empty() {
doSmallest(queue)
}
fmt.Printf("Order after: %v\n", slice)
}
func doSmallest(queue *MinPQ) {
if queue.Empty() {
return
}
v := queue.Dequeue()
switch v {
case 1:
fmt.Println("Do", v)
case 2:
fmt.Println("Do", v)
case 3:
fmt.Println("Do", v)
case 4:
fmt.Println("Do", v)
case 5:
fmt.Println("Do", v)
default:
// No hit, do it all again with the next value.
doSmallest(queue)
}
}
// MinPQ represents a Minimum priority queue.
// It is implemented as a binary heap.
//
// Values which are enqueued can be dequeued, but will be done
// in the order where the smallest item is returned first.
type MinPQ struct {
values []float64 // Original input list -- Order is never changed.
indices []int // List of indices into values slice.
index int // Current size of indices list.
}
// NewMinPQ creates a new MinPQ heap for the given input set.
func NewMinPQ(set []float64) *MinPQ {
m := new(MinPQ)
m.values = set
m.indices = make([]int, 1, len(set))
// Initialize the priority queue.
// Use the set's indices as values, instead of the floats
// themselves. As these may not be re-ordered.
for i := range set {
m.indices = append(m.indices, i)
m.index++
m.swim(m.index)
}
return m
}
// Empty returns true if the heap is empty.
func (m *MinPQ) Empty() bool { return m.index == 0 }
// Dequeue removes the smallest item and returns it.
// Returns nil if the heap is empty.
func (m *MinPQ) Dequeue() float64 {
if m.Empty() {
return 0
}
min := m.indices[1]
m.indices[1], m.indices[m.index] = m.indices[m.index], m.indices[1]
m.index--
m.sink(1)
m.indices = m.indices[:m.index+1]
return m.values[min]
}
// greater returns true if element x is greater than element y.
func (m *MinPQ) greater(x, y int) bool {
return m.values[m.indices[x]] > m.values[m.indices[y]]
}
// sink reorders the tree downwards.
func (m *MinPQ) sink(k int) {
for 2*k <= m.index {
j := 2 * k
if j < m.index && m.greater(j, j+1) {
j++
}
if m.greater(j, k) {
break
}
m.indices[k], m.indices[j] = m.indices[j], m.indices[k]
k = j
}
}
// swim reorders the tree upwards.
func (m *MinPQ) swim(k int) {
for k > 1 && m.greater(k/2, k) {
m.indices[k], m.indices[k/2] = m.indices[k/2], m.indices[k]
k /= 2
}
}