Generate all possible n-character passwords - go

As part of a learning-Go exercise, I'm writing a simplistic brute-force password cracker.
To generate all possible 2-character passwords that use the characters A-E in Python, I would use itertools.product():
from itertools import product
for permutation in product('ABCDE', repeat=2):
print permutation
However, I'm struggling to do this in Go.
Other questions seem to be about permutations, which isn't quite what I want. And while the Python docs include a sample implementation of the function, I don't know how to translate yield into Go.
I suppose I should mention two restrictions:
I'd like the length of the password to be variable. That is, I may want to do 8-character passwords, or 6-character, or something else. This means we can't just nest n loops.
I don't want to have all of them in memory at once.

What you want is basically the n-ary cartesian product of a set with itself. So for all 3-character passwords you want Prod(set,set,set). This can be constructed iteratively. First construct the n-1 product, then for each product and each element of the initial set, add the element. So for instance all 2 character passwords -> 3 character passwords where the only valid characters are 'a' or 'b'.
"ab" = {a,b} -> {(a,a),(a,b),(b,a),(b,b)} -> {(a,a,a),(a,a,b),(a,b,a),(a,b,b),(b,a,a),(b,a,b),(b,b,a),(b,b,b)}
func NAryProduct(input string, n int) []string {
if n <= 0 {
return nil
}
// Copy input into initial product set -- a set of
// one character sets
prod := make([]string, len(input))
for i, char := range input {
prod[i] = string(char)
}
for i := 1; i < n; i++ {
// The bigger product should be the size of the input times the size of
// the n-1 size product
next := make([]string, 0, len(input)*len(prod))
// Add each char to each word and add it to the new set
for _, word := range prod {
for _, char := range input {
next = append(next, word + string(char))
}
}
prod = next
}
return prod
}
Playground version: http://play.golang.org/p/6LhApeJ1bv
It should be noted that there's a lot of room for improvement on this solution. If you want to construct all passwords of length, say, 6-18, calling this method independently for each one will recalculate previously computed sets. I'll leave writing the better version up to you. Given what I've shown you, it shouldn't be too difficult to modify the code to take an arbitrary (n-m)ary product and compute the n-ary product from it. (Hint: think about how you'd do this recursively)

For example, satisfying your restrictions,
package main
import "fmt"
func nextPassword(n int, c string) func() string {
r := []rune(c)
p := make([]rune, n)
x := make([]int, len(p))
return func() string {
p := p[:len(x)]
for i, xi := range x {
p[i] = r[xi]
}
for i := len(x) - 1; i >= 0; i-- {
x[i]++
if x[i] < len(r) {
break
}
x[i] = 0
if i <= 0 {
x = x[0:0]
break
}
}
return string(p)
}
}
func main() {
np := nextPassword(2, "ABCDE")
for {
pwd := np()
if len(pwd) == 0 {
break
}
fmt.Println(pwd)
}
}
Output:
AA
AB
AC
AD
AE
BA
BB
BC
BD
BE
CA
CB
CC
CD
CE
DA
DB
DC
DD
DE
EA
EB
EC
ED
EE

package main
import (
"fmt"
"strings"
"strconv"
// permutation and combination of charactersList
"github.com/ernestosuarez/itertools"
)
func main() {
passwordLength := "1,2,4"
characters := "abcdefghijklmnopqrstuvwxyz0123456789!##$%^&*()+-./"
passwordLengthList := strings.Split(passwordLength, ",")
charactersList := strings.Split(characters, "")
for _, passLen := range passwordLengthList {
passLenInt, err := strconv.Atoi(passLen)
if err != nil {
panic(err)
}
for v := range itertools.PermutationsStr(charactersList, passLenInt) {
fmt.Println(strings.Join(v, ""))
}
}
}

uses select for channels to generate unique passwords
func randombitsGen(l int) (out chan string) {
Capschar := "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
smallchar := "abcdefghijklmnopqrstuvwxyz"
nums := "0123456789"
specials := "!##$%ˆ&*()?><"
out = make(chan string, l)
defer close(out)
for {
select {
case out <- string(Capschar[rand.Intn(len(strings.Split(Capschar, "")))]):
case out <- string(Capschar[rand.Intn(len(strings.Split(Capschar, "")))]):
case out <- string(Capschar[rand.Intn(len(strings.Split(Capschar, "")))]):
case out <- string(smallchar[rand.Intn(len(strings.Split(smallchar, "")))]):
case out <- string(smallchar[rand.Intn(len(strings.Split(smallchar, "")))]):
case out <- string(smallchar[rand.Intn(len(strings.Split(smallchar, "")))]):
case out <- string(nums[rand.Intn(len(strings.Split(nums, "")))]):
case out <- string(specials[rand.Intn(len(strings.Split(specials, "")))]):
default:
return
}
}
}

Related

Generate combinations from a given range

I'm trying to create a program capable to generate combinations from a given range.
I started editing this code below that generates combinations:
package main
import "fmt"
func nextPassword(n int, c string) func() string {
r := []rune(c)
p := make([]rune, n)
x := make([]int, len(p))
return func() string {
p := p[:len(x)]
for i, xi := range x {
p[i] = r[xi]
}
for i := len(x) - 1; i >= 0; i-- {
x[i]++
if x[i] < len(r) {
break
}
x[i] = 0
if i <= 0 {
x = x[0:0]
break
}
}
return string(p)
}
}
func main() {
np := nextPassword(2, "ABCDE")
for {
pwd := np()
if len(pwd) == 0 {
break
}
fmt.Println(pwd)
}
}
This is the Output of the code:
AA
AB
AC
AD
AE
BA
BB
BC
BD
BE
CA
CB
CC
CD
CE
DA
DB
DC
DD
DE
EA
EB
EC
ED
EE
And this is the code I edited:
package main
import "fmt"
const (
Min = 5
Max = 10
)
func nextPassword(n int, c string) func() string {
r := []rune(c)
p := make([]rune, n)
x := make([]int, len(p))
return func() string {
p := p[:len(x)]
for i, xi := range x {
p[i] = r[xi]
}
for i := len(x) - 1; i >= 0; i-- {
x[i]++
if x[i] < len(r) {
break
}
x[i] = 0
if i <= 0 {
x = x[0:0]
break
}
}
return string(p)
}
}
func main() {
cont := 0
np := nextPassword(2, "ABCDE")
for {
pwd := np()
if len(pwd) == 0 {
break
}
if cont >= Min && cont <= Max{
fmt.Println(pwd)
} else if cont > Max{
break
}
cont += 1
}
}
Output:
BA
BB
BC
BD
BE
CA
My code works, but if I increase the length of the combination and my range starts from the middle, the program will generate even the combinations that I don't want (and of course that will take a lot of time).
How can I solve this problem?
I really didn't like how nextPassword was written, so I made a variation. Rather than starting at 0 and repeatedly returning the next value, this one takes an integer and converts it to the corresponding "password." E.g. toPassword(0, 2, []rune("ABCDE")) is AA, and toPassword(5, ...) is BA.
From there, it's easy to loop over whatever range you want. But I also wrote a nextPassword wrapper around it that behaves similarly to the one in the original code. This one uses toPassword under the cover and takes a starting n.
Runnable version here: https://play.golang.org/p/fBo6mx4Mji
Code below:
package main
import (
"fmt"
)
func toPassword(n, length int, alphabet []rune) string {
base := len(alphabet)
// This will be our output
result := make([]rune, length)
// Start filling from the right
i := length - 1
// This is essentially a conversion to base-b, where b is
// the number of possible letters (5 in the case of "ABCDE")
for n > 0 {
// Filling from the right, put the right digit mod b
result[i] = alphabet[n%base]
// Divide the number by the base so we're ready for
// the next digit
n /= base
// Move to the left
i -= 1
}
// Fill anything that's left with "zeros" (first letter of
// the alphabet)
for i >= 0 {
result[i] = alphabet[0]
i -= 1
}
return string(result)
}
// Convenience function that just returns successive values from
// toPassword starting at start
func nextPassword(start, length int, alphabet []rune) func() string {
n := start
return func() string {
result := toPassword(n, length, alphabet)
n += 1
return result
}
}
func main() {
for i := 5; i < 11; i++ {
fmt.Println(toPassword(i, 2, []rune("ABCDE")))
} // BA, BB, BC, BD, BE, CA
// Now do the same thing using nextPassword
np := nextPassword(5, 2, []rune("ABCDE"))
for i := 0; i < 6; i++ {
fmt.Println(np())
} // BA, BB, BC, BD, BE, CA
}

Captured Closure (for Loop Variable) in Go

Shouldn't Go compiler capture for...range loop variables as a locally assigned closure variable?
Long Version:
This caused me some confusion in C# too and I was trying to understand it; that why it is fixed in C# 5.0 foreach (reason: the loop variable can not change inside the body of loop) and the reasoning for not fixing it in C# for loops (reason: the loop variable can change inside the body of loop).
Now (to me) for...range loops in Go seems pretty much like foreach loops in C#, but despite the fact that we can not alter those variables (like k and v in for k, v := range m { ... }); still we have to copy them to some local closures first, for them to behave as expected.
What is the reasoning behind this? (I suspect it's because Go treats any for loop the same way; but I'm not sure).
Here is some code to examine described behavior:
func main() {
lab1() // captured closure is not what is expected
fmt.Println(" ")
lab2() // captured closure is not what is expected
fmt.Println(" ")
lab3() // captured closure behaves ok
fmt.Println(" ")
}
func lab3() {
m := make(map[int32]int32)
var i int32
for i = 1; i <= 10; i++ {
m[i] = i
}
l := [](func() (int32, int32)){}
for k, v := range m {
kLocal, vLocal := k, v // (C) captures just the right values assigned to k and v
l = append(l, func() (int32, int32) {
return kLocal, vLocal
})
}
for _, x := range l {
k, v := x()
fmt.Println(k, v)
}
}
func lab2() {
m := make(map[int32]int32)
var i int32
for i = 1; i <= 10; i++ {
m[i] = i
}
l := [](func() (int32, int32)){}
for k, v := range m {
l = append(l, func() (int32, int32) {
kLocal, vLocal := k, v // (B) captures just the last values assigned to k and v from the range
return kLocal, vLocal
})
}
for _, x := range l {
k, v := x()
fmt.Println(k, v)
}
}
func lab1() {
m := make(map[int32]int32)
var i int32
for i = 1; i <= 10; i++ {
m[i] = i
}
l := [](func() (int32, int32)){}
for k, v := range m {
l = append(l, func() (int32, int32) { return k, v }) // (A) captures just the last values assigned to k and v from the range
}
for _, x := range l {
k, v := x()
fmt.Println(k, v)
}
}
As it is shown in lab1, at the comment // (A) we get just the last values from the range; the output is like printing 9,9 ten times instead of showing expected result like 1,1, 2,2, ... (and of-course maps are not necessarily sorted in Go so we may see 3,3 ten times as the last pair of values; instead of 10,10 ten times as the last pair of values). The same goes for code at comment // (B) at lab2, which was expected because we are trying to capture outer variables inside the inner scope (I put this one too just to try that). In lab3 at code at comment // (C) everything works fine and you will see ten pairs of numbers there like 1,1, 2,2, ....
I was trying to use closure+function as a replacement for tuples in Go.
Do you want the closure over the variable or the value? For example,
package main
import "fmt"
func VariableLoop() {
f := make([]func(), 3)
for i := 0; i < 3; i++ {
// closure over variable i
f[i] = func() {
fmt.Println(i)
}
}
fmt.Println("VariableLoop")
for _, f := range f {
f()
}
}
func ValueLoop() {
f := make([]func(), 3)
for i := 0; i < 3; i++ {
i := i
// closure over value of i
f[i] = func() {
fmt.Println(i)
}
}
fmt.Println("ValueLoop")
for _, f := range f {
f()
}
}
func VariableRange() {
f := make([]func(), 3)
for i := range f {
// closure over variable i
f[i] = func() {
fmt.Println(i)
}
}
fmt.Println("VariableRange")
for _, f := range f {
f()
}
}
func ValueRange() {
f := make([]func(), 3)
for i := range f {
i := i
// closure over value of i
f[i] = func() {
fmt.Println(i)
}
}
fmt.Println("ValueRange")
for _, f := range f {
f()
}
}
func main() {
VariableLoop()
ValueLoop()
VariableRange()
ValueRange()
}
Output:
VariableLoop
3
3
3
ValueLoop
0
1
2
VariableRange
2
2
2
ValueRange
0
1
2
References:
The Go Programming Language Specification
Function literals
Function literals are closures: they may refer to variables defined in
a surrounding function. Those variables are then shared between the
surrounding function and the function literal, and they survive as
long as they are accessible.
Go FAQ: What happens with closures running as goroutines?
To bind the current value of v to each closure as it is launched, one
must modify the inner loop to create a new variable each iteration.
One way is to pass the variable as an argument to the closure.
Even easier is just to create a new variable, using a declaration
style that may seem odd but works fine in Go.

Why does the following golang program throw a runtime out of memory error?

This program is supposed to read a file consisting of pairs of ints (one pair per line) and remove duplicate pairs. While it works on small files, it throws a runtime error on huge files (say a file of 1.5 GB). Initially, I thought that it is the map data structure which is causing this, but even after commenting it out, it still runs out of memory. Any ideas why this is happening? How to rectify it? Here's a data file on which it runs out of memory: http://snap.stanford.edu/data/com-Orkut.html
package main
import (
"fmt"
"bufio"
"os"
"strings"
"strconv"
)
func main() {
file, err := os.Open(os.Args[1])
if err != nil {
panic(err.Error())
}
defer file.Close()
type Edge struct {
u, v int
}
//seen := make(map[Edge]bool)
edges := []Edge{}
scanner := bufio.NewScanner(file)
for i, _ := strconv.Atoi(os.Args[2]); i > 0; i-- {
scanner.Scan()
}
for scanner.Scan() {
str := scanner.Text()
edge := strings.Split(str, ",")
u, _ := strconv.Atoi(edge[0])
v, _ := strconv.Atoi(edge[1])
var key Edge
if u < v {
key = Edge{u,v}
} else {
key = Edge{v,u}
}
//if seen[key] {
// continue
//}
//seen[key] = true
edges = append(edges, key)
}
for _, e := range edges {
s := strconv.Itoa(e.u) + "," + strconv.Itoa(e.v)
fmt.Println(s)
}
}
A sample input is given below. The program can be run as follows (where the last input says how many lines to skip).
go run undup.go a.txt 1
# 3072441,117185083
1,2
1,3
1,4
1,5
1,6
1,7
1,8
I looked at this file: com-orkut.ungraph.txt and it contains 117,185,082 lines. The way your data is structured, that's at least 16 bytes per line. (Edge is two 64bit ints) That alone is 1.7GB. I have had this problem in the past, and it can be a tricky one. Are you trying to solve this for a specific use case (the file in question) or the general case?
In the specific case there are a few things about the data you could leverage: (1) the keys are sorted and (2) it looks it stores every connection twice, (3) the numbers don't seem huge. Here are a couple ideas:
If you use a smaller type for the key you will use less memory. Try a uint32.
You could stream (without using a map) the keys to another file by simply seeing if the 2nd column is greater than the first:
if u < v {
// write the key to another file
} else {
// skip it because v will eventually show v -> u
}
For the general case there are a couple strategies you could use:
If the order of the resulting list doesn't matter: Use an on-disk hash table to store the map. There are a bunch of these: leveldb, sqlite, tokyo tyrant, ... A really nice one for go is bolt.
In your for loop you would just check to see if a bucket contains the given key. (You can convert the ints into byte slices using encoding/binary) If it does, just skip it and continue. You will need to move the second for loop processing step into the first for loop so that you don't have to store all the keys.
If the order of the resulting list does matter (and you can't guarantee the input is in order): You can also use an on-disk hash table, but it needs to be sorted. Bolt is sorted so that will work. Add all the keys to it, then traverse it in the second loop.
Here is an example: (this program will take a while to run with 100 million records)
package main
import (
"bufio"
"encoding/binary"
"fmt"
"github.com/boltdb/bolt"
"os"
"strconv"
"strings"
)
type Edge struct {
u, v int
}
func FromKey(bs []byte) Edge {
return Edge{int(binary.BigEndian.Uint64(bs[:8])), int(binary.BigEndian.Uint64(bs[8:]))}
}
func (e Edge) Key() [16]byte {
var k [16]byte
binary.BigEndian.PutUint64(k[:8], uint64(e.u))
binary.BigEndian.PutUint64(k[8:], uint64(e.v))
return k
}
func main() {
file, err := os.Open(os.Args[1])
if err != nil {
panic(err.Error())
}
defer file.Close()
scanner := bufio.NewScanner(file)
for i, _ := strconv.Atoi(os.Args[2]); i > 0; i-- {
scanner.Scan()
}
db, _ := bolt.Open("ex.db", 0777, nil)
defer db.Close()
bucketName := []byte("edges")
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucketIfNotExists(bucketName)
return nil
})
batchSize := 10000
total := 0
batch := make([]Edge, 0, batchSize)
writeBatch := func() {
total += len(batch)
fmt.Println("write batch. total:", total)
db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket(bucketName)
for _, edge := range batch {
key := edge.Key()
bucket.Put(key[:], nil)
}
return nil
})
}
for scanner.Scan() {
str := scanner.Text()
edge := strings.Split(str, "\t")
u, _ := strconv.Atoi(edge[0])
v, _ := strconv.Atoi(edge[1])
var key Edge
if u < v {
key = Edge{u, v}
} else {
key = Edge{v, u}
}
batch = append(batch, key)
if len(batch) == batchSize {
writeBatch()
// reset the batch length to 0
batch = batch[:0]
}
}
// write anything leftover
writeBatch()
db.View(func(tx *bolt.Tx) error {
tx.Bucket(bucketName).ForEach(func(k, v []byte) error {
edge := FromKey(k)
fmt.Println(edge)
return nil
})
return nil
})
}
You are squandering memory. Here's how to rectify it.
You give the sample input a.txt, 48 bytes.
# 3072441,117185083
1,2
1,3
1,4
1,5
On http://snap.stanford.edu/data/com-Orkut.html, I found http://snap.stanford.edu/data/bigdata/communities/com-orkut.ungraph.txt.gz, 1.8 GB uncompressed, 117,185,083 edges.
# Undirected graph: ../../data/output/orkut.txt
# Orkut
# Nodes: 3072441 Edges: 117185083
# FromNodeId ToNodeId
1 2
1 3
1 4
1 5
On http://socialnetworks.mpi-sws.org/data-imc2007.html, I found http://socialnetworks.mpi-sws.mpg.de/data/orkut-links.txt.gz, 3.4 GB uncompressed, 223,534,301 edges.
1 2
1 3
1 4
1 5
Since they are similar, one program can handle all formats.
Your Edge type is
type Edge struct {
u, v int
}
which is 16 bytes on a 64-bit architecture.
Use
type Edge struct {
U, V uint32
}
which is 8 bytes, it is adequate.
If the capacity of a slice is not large enough to fit the additional values, append allocates a new, sufficiently large underlying array that fits both the existing slice elements and the additional values. Otherwise, append re-uses the underlying array. For a large slice, the new array is 1.25 times the size of the old array. While the old array is being copied to the new array, 1 + 1.25 = 2.25 times the memory for the old array is required. Therefore, allocate the underlying array so that all values fit.
make(T, n) initializes map of type T with initial space for n elements. Provide a value for n to limit the cost of reorganization and fragmentation as elements are added. Hashing functions are often imperfect which leads to wasted space. Eliminate the map as it's unneccesary. To eliminate duplicates, sort the slice in place and move the unique elements down.
A string is immutable, therefore a new string is allocated for scanner.Text() to convert from a byte slice buffer. To parse numbers we use strconv. To minimize temporary allocations, use scanner.Bytes() and adapt strconv.ParseUint to accept a byte array argument (bytconv).
For example,
orkut.go
package main
import (
"bufio"
"bytes"
"errors"
"fmt"
"os"
"runtime"
"sort"
"strconv"
)
type Edge struct {
U, V uint32
}
func (e Edge) String() string {
return fmt.Sprintf("%d,%d", e.U, e.V)
}
type ByKey []Edge
func (a ByKey) Len() int { return len(a) }
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool {
if a[i].U < a[j].U {
return true
}
if a[i].U == a[j].U && a[i].V < a[j].V {
return true
}
return false
}
func countEdges(scanner *bufio.Scanner) int {
var nNodes, nEdges int
for scanner.Scan() {
line := scanner.Bytes()
if !(len(line) > 0 && line[0] == '#') {
nEdges++
continue
}
n, err := fmt.Sscanf(string(line), "# Nodes: %d Edges: %d", &nNodes, &nEdges)
if err != nil || n != 2 {
n, err = fmt.Sscanf(string(line), "# %d,%d", &nNodes, &nEdges)
if err != nil || n != 2 {
continue
}
}
fmt.Println(string(line))
break
}
if err := scanner.Err(); err != nil {
panic(err.Error())
}
fmt.Println(nEdges)
return nEdges
}
func loadEdges(filename string) []Edge {
file, err := os.Open(filename)
if err != nil {
panic(err.Error())
}
defer file.Close()
scanner := bufio.NewScanner(file)
nEdges := countEdges(scanner)
edges := make([]Edge, 0, nEdges)
offset, err := file.Seek(0, os.SEEK_SET)
if err != nil || offset != 0 {
panic(err.Error())
}
var sep byte = '\t'
scanner = bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Bytes()
if len(line) > 0 && line[0] == '#' {
continue
}
i := bytes.IndexByte(line, sep)
if i < 0 || i+1 >= len(line) {
sep = ','
i = bytes.IndexByte(line, sep)
if i < 0 || i+1 >= len(line) {
err := errors.New("Invalid line format: " + string(line))
panic(err.Error())
}
}
u, err := ParseUint(line[:i], 10, 32)
if err != nil {
panic(err.Error())
}
v, err := ParseUint(line[i+1:], 10, 32)
if err != nil {
panic(err.Error())
}
if u > v {
u, v = v, u
}
edges = append(edges, Edge{uint32(u), uint32(v)})
}
if err := scanner.Err(); err != nil {
panic(err.Error())
}
if len(edges) <= 1 {
return edges
}
sort.Sort(ByKey(edges))
j := 0
i := j + 1
for ; i < len(edges); i, j = i+1, j+1 {
if edges[i] == edges[j] {
break
}
}
for ; i < len(edges); i++ {
if edges[i] != edges[j] {
j++
edges[j] = edges[i]
}
}
edges = edges[:j+1]
return edges
}
func main() {
if len(os.Args) <= 1 {
err := errors.New("Missing file name")
panic(err.Error())
}
filename := os.Args[1]
fmt.Println(filename)
edges := loadEdges(filename)
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
fmt.Println(ms.Alloc, ms.TotalAlloc, ms.Sys, ms.Mallocs, ms.Frees)
fmt.Println(len(edges), cap(edges))
for i, e := range edges {
fmt.Println(e)
if i >= 10 {
break
}
}
}
// bytconv from strconv
// Return the first number n such that n*base >= 1<<64.
func cutoff64(base int) uint64 {
if base < 2 {
return 0
}
return (1<<64-1)/uint64(base) + 1
}
// ParseUint is like ParseInt but for unsigned numbers.
func ParseUint(s []byte, base int, bitSize int) (n uint64, err error) {
var cutoff, maxVal uint64
if bitSize == 0 {
bitSize = int(strconv.IntSize)
}
s0 := s
switch {
case len(s) < 1:
err = strconv.ErrSyntax
goto Error
case 2 <= base && base <= 36:
// valid base; nothing to do
case base == 0:
// Look for octal, hex prefix.
switch {
case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'):
base = 16
s = s[2:]
if len(s) < 1 {
err = strconv.ErrSyntax
goto Error
}
case s[0] == '0':
base = 8
default:
base = 10
}
default:
err = errors.New("invalid base " + strconv.Itoa(base))
goto Error
}
n = 0
cutoff = cutoff64(base)
maxVal = 1<<uint(bitSize) - 1
for i := 0; i < len(s); i++ {
var v byte
d := s[i]
switch {
case '0' <= d && d <= '9':
v = d - '0'
case 'a' <= d && d <= 'z':
v = d - 'a' + 10
case 'A' <= d && d <= 'Z':
v = d - 'A' + 10
default:
n = 0
err = strconv.ErrSyntax
goto Error
}
if int(v) >= base {
n = 0
err = strconv.ErrSyntax
goto Error
}
if n >= cutoff {
// n*base overflows
n = 1<<64 - 1
err = strconv.ErrRange
goto Error
}
n *= uint64(base)
n1 := n + uint64(v)
if n1 < n || n1 > maxVal {
// n+v overflows
n = 1<<64 - 1
err = strconv.ErrRange
goto Error
}
n = n1
}
return n, nil
Error:
return n, &strconv.NumError{"ParseUint", string(s0), err}
}
Output:
$ go build orkut.go
$ time ./orkut ~/release-orkut-links.txt
/home/peter/release-orkut-links.txt
223534301
1788305680 1788327856 1904683256 135 50
117185083 223534301
1,2
1,3
1,4
1,5
1,6
1,7
1,8
1,9
1,10
1,11
1,12
real 2m53.203s
user 2m51.584s
sys 0m1.628s
$
The orkut.go program with the release-orkut-links.txt file (3,372,855,860 (3.4 GB) bytes with 223,534,301 edges) uses about 1.8 GiB of memory. After eliminating duplicates, 117,185,083 unique edges remain. This matches the 117,185,083 unique edge com-orkut.ungraph.txt file.
With 8 GB of memory on your machine, you can load much larger files.

Golang: find first character in a String that doesn't repeat

I'm trying to write a function that returns the finds first character in a String that doesn't repeat, so far I have this:
package main
import (
"fmt"
"strings"
)
func check(s string) string {
ss := strings.Split(s, "")
smap := map[string]int{}
for i := 0; i < len(ss); i++ {
(smap[ss[i]])++
}
for k, v := range smap {
if v == 1 {
return k
}
}
return ""
}
func main() {
fmt.Println(check("nebuchadnezzer"))
}
Unfortunately in Go when you iterate a map there's no guarantee of the order so every time I run the code I get a different value, any pointers?
Using a map and 2 loops :
play
func check(s string) string {
m := make(map[rune]uint, len(s)) //preallocate the map size
for _, r := range s {
m[r]++
}
for _, r := range s {
if m[r] == 1 {
return string(r)
}
}
return ""
}
The benfit of this is using just 2 loops vs multiple loops if you're using strings.ContainsRune, strings.IndexRune (each function will have inner loops in them).
Efficient (in time and memory) algorithms for grabbing all or the first unique byte http://play.golang.org/p/ZGFepvEXFT:
func FirstUniqueByte(s string) (b byte, ok bool) {
occur := [256]byte{}
order := make([]byte, 0, 256)
for i := 0; i < len(s); i++ {
b = s[i]
switch occur[b] {
case 0:
occur[b] = 1
order = append(order, b)
case 1:
occur[b] = 2
}
}
for _, b = range order {
if occur[b] == 1 {
return b, true
}
}
return 0, false
}
As a bonus, the above function should never generate any garbage. Note that I changed your function signature to be a more idiomatic way to express what you're describing. If you need a func(string) string signature anyway, then the point is moot.
That can certainly be optimized, but one solution (which isn't using map) would be:
(playground example)
func check(s string) string {
unique := ""
for pos, c := range s {
if strings.ContainsRune(unique, c) {
unique = strings.Replace(unique, string(c), "", -1)
} else if strings.IndexRune(s, c) == pos {
unique = unique + string(c)
}
}
fmt.Println("All unique characters found: ", unique)
if len(unique) > 0 {
_, size := utf8.DecodeRuneInString(unique)
return unique[:size]
}
return ""
}
This is after the question "Find the first un-repeated character in a string"
krait suggested below that the function should:
return a string containing the first full rune, not just the first byte of the utf8 encoding of the first rune.

Go: What is the fastest/cleanest way to remove multiple entries from a slice?

How would you implement the deleteRecords function in the code below:
Example:
type Record struct {
id int
name string
}
type RecordList []*Record
func deleteRecords( l *RecordList, ids []int ) {
// Assume the RecordList can contain several 100 entries.
// and the number of the of the records to be removed is about 10.
// What is the fastest and cleanest ways to remove the records that match
// the id specified in the records list.
}
I did some micro-benchmarking on my machine, trying out most of the approaches given in the replies here, and this code comes out fastest when you've got up to about 40 elements in the ids list:
func deleteRecords(data []*Record, ids []int) []*Record {
w := 0 // write index
loop:
for _, x := range data {
for _, id := range ids {
if id == x.id {
continue loop
}
}
data[w] = x
w++
}
return data[:w]
}
You didn't say whether it's important to preserve the order of records in the list. If you don't then this function is faster than the above and still fairly clean.
func reorder(data []*Record, ids []int) []*Record {
n := len(data)
i := 0
loop:
for i < n {
r := data[i]
for _, id := range ids {
if id == r.id {
data[i] = data[n-1]
n--
continue loop
}
}
i++
}
return data[0:n]
}
As the number of ids rises, so does the cost of the linear search. At around 50 elements, using a map or doing a binary search to look up the id becomes more efficient, as long as you can avoid rebuilding the map (or resorting the list) every time. At several hundred ids, it becomes more efficient to use a map or a binary search even if you have to rebuild it every time.
If you wish to preserve original contents of the slice, something like this is more appropriate:
func deletePreserve(data []*Record, ids []int) []*Record {
wdata := make([]*Record, len(data))
w := 0
loop:
for _, x := range data {
for _, id := range ids {
if id == x.id {
continue loop
}
}
wdata[w] = x
w++
}
return wdata[0:w]
}
For a personal project, I did something like this:
func filter(sl []int, fn func(int) bool) []int {
result := make([]int, 0, len(sl))
last := 0
for i, v := range sl {
if fn(v) {
result = append(result, sl[last:i]...)
last = i + 1
}
}
return append(result, sl[last:]...)
}
It doesn't mutate the original, but should be relatively efficient.
It's probably better to just do:
func filter(sl []int, fn func(int) bool) (result []int) {
for _, v := range sl {
if !fn(v) {
result = append(result, v)
}
}
return
}
Simpler and cleaner.
If you want to do it in-place, you probably want something like:
func filter(sl []int, fn func(int) bool) []int {
outi := 0
res := sl
for _, v := range sl {
if !fn(v) {
res[outi] = v
outi++
}
}
return res[0:outi]
}
You can optimize this to use copy to copy ranges of elements, but that's twice
the code and probably not worth it.
So, in this specific case, I'd probably do something like:
func deleteRecords(l []*Record, ids []int) []*Record {
outi := 0
L:
for _, v := range l {
for _, id := range ids {
if v.id == id {
continue L
}
}
l[outi] = v
outi++
}
return l[0:outi]
}
(Note: untested.)
No allocations, nothing fancy, and assuming the rough size of the list of Records and the list of ids you presented, a simple linear search is likely to do as well as fancier things but without any overhead. I realize that my version mutates the slice and returns a new slice, but that's not un-idiomatic in Go, and it avoids forcing the slice at the callsite to be heap allocated.
For the case you described, where len(ids) is approximately 10 and len(*l) is in the several hundreds, this should be relatively fast, since it minimizes memory allocations by updating in place.
package main
import (
"fmt"
"strconv"
)
type Record struct {
id int
name string
}
type RecordList []*Record
func deleteRecords(l *RecordList, ids []int) {
rl := *l
for i := 0; i < len(rl); i++ {
rid := rl[i].id
for j := 0; j < len(ids); j++ {
if rid == ids[j] {
copy(rl[i:len(*l)-1], rl[i+1:])
rl[len(rl)-1] = nil
rl = rl[:len(rl)-1]
break
}
}
}
*l = rl
}
func main() {
l := make(RecordList, 777)
for i := range l {
l[i] = &Record{int(i), "name #" + strconv.Itoa(i)}
}
ids := []int{0, 1, 2, 4, 8, len(l) - 1, len(l)}
fmt.Println(ids, len(l), cap(l), *l[0], *l[1], *l[len(l)-1])
deleteRecords(&l, ids)
fmt.Println(ids, len(l), cap(l), *l[0], *l[1], *l[len(l)-1])
}
Output:
[0 1 2 4 8 776 777] 777 777 {0 name #0} {1 name #1} {776 name #776}
[0 1 2 4 8 776 777] 772 777 {1 name #1} {3 name #3} {775 name #775}
Instead of repeatedly searching ids, you could use a map. This code preallocates the full size of the map, and then just moves array elements in place. There are no other allocations.
func deleteRecords(l *RecordList, ids []int) {
m := make(map[int]bool, len(ids))
for _, id := range ids {
m[id] = true
}
s, x := *l, 0
for _, r := range s {
if !m[r.id] {
s[x] = r
x++
}
}
*l = s[0:x]
}
Use the vector package's Delete method as a guide, or just use a Vector instead of a slice.
Here is one option but I would hope there are cleaner/faster more functional looking ones:
func deleteRecords( l *RecordList, ids []int ) *RecordList {
var newList RecordList
for _, rec := range l {
toRemove := false
for _, id := range ids {
if rec.id == id {
toRemove = true
}
if !toRemove {
newList = append(newList, rec)
}
}
return newList
}
With large enough l and ids it will be more effective to Sort() both lists first and then do a single loop over them instead of two nested loops

Resources