How to find sum of integers using recursion(without loops) in Golang? - algorithm

Program should ask values "number of numbers" and "numbers" for each "number of inputs", answers are sum of squares of these numbers. My code works but it shows answers in wrong order, how to make it work properly? Outputs should be shown after all inputs.
I think its easier to understand this program by reading inputs and outputs:
Enter the number of inputs // output
2 // input
Enter the number of numbers // output
2 // input
Enter the numbers // output
1 2 // input (second ans)
Enter the number of numbers // output
2 // input
Enter the numbers
2 3 // input (first ans)
ans = 13 // ans = 2^2 + 3^2
ans = 5 () // ans = 1^2 + 2^2
MyCode:
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
reader := bufio.NewReader(os.Stdin)
fmt.Println(`Enter the number of inputs`)
n, _ := reader.ReadString('\n')
n = strings.TrimRight(n, "\r\n")
test_cases, err := strconv.Atoi(n)
if err != nil {
fmt.Println(err)
}
process_test_case(test_cases, reader)
}
func process_test_case(test_cases int, reader *bufio.Reader) {
fmt.Println(`Enter the number of numbers`)
_, _ = reader.ReadString('\n')
fmt.Println(`Enter the numbers`)
input, _ := reader.ReadString('\n')
input = strings.TrimRight(input, "\r\n")
arr := strings.Split(input, " ")
test_cases -= 1
if test_cases != 0 {
process_test_case(test_cases, reader)
}
fmt.Println("ans = ", process_array(arr, 0))
}
func process_array(arr []string, result int) int {
num, _ := strconv.Atoi(arr[0])
if len(arr) > 1 {
next := arr[1:]
if num < 0 {
num = 0
}
result = num*num + process_array(next, result)
return result
} else {
if num >= 0 {
return num * num
}
return 0
}
}

How to find sum of integers using recursion (without loops) in Go?
The program should ask "number of numbers" and "numbers" for each "number of inputs", answers are sum of squares of these numbers.
Here is an answer to the question, a recursive solution in Go:
$ go run sumsq.go
Enter the number of inputs:
2
Enter the number of numbers:
2
Enter the numbers:
1
2
Enter the number of numbers:
2
Enter the numbers:
2
3
Sum of Squares:
5
13
$
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func readInt(r *bufio.Reader) int {
line, err := r.ReadString('\n')
line = strings.TrimSpace(line)
if err != nil {
if len(line) == 0 {
return 0
}
}
i, err := strconv.Atoi(line)
if err != nil {
return 0
}
return i
}
func nSquares(n int, r *bufio.Reader) int {
if n == 0 {
return 0
}
i := readInt(r)
return i*i + nSquares(n-1, r)
}
func nNumbers(n int, r *bufio.Reader, sums *[]int) int {
if n == 0 {
return 0
}
fmt.Println("\nEnter the number of numbers: ")
i := readInt(r)
fmt.Println("Enter the numbers: ")
*sums = append(*sums, nSquares(i, r))
return nNumbers(n-1, r, sums)
}
func nInputs(r *bufio.Reader) []int {
fmt.Println("Enter the number of inputs: ")
i := readInt(r)
sums := make([]int, 0, i)
nNumbers(i, r, &sums)
return sums
}
func sumSqrs(sums []int) {
if len(sums) == 0 {
return
}
fmt.Println(sums[0])
sumSqrs(sums[1:])
}
func main() {
r := bufio.NewReader(os.Stdin)
fmt.Println()
sums := nInputs(r)
fmt.Println("\nSum of Squares:")
sumSqrs(sums)
fmt.Println()
}

I have created a sample code for your scenario. You can modify it by using bufio.NewReader(os.Stdin)
func process_array(arr []string) int {
res := 0
for _, v := range arr {
num, err := strconv.Atoi(v)
if err != nil {
panic(err)
}
fmt.Println("num :", num)
res += num * num
}
return res
}
func process_test_case() int {
fmt.Println(`Enter the number of numbers`)
num := 2
fmt.Println("number of numbers :", num)
fmt.Println(`Enter the numbers`)
input := "1 2"
fmt.Println("the numbers :", input)
arr := strings.Split(input, " ")
res := process_array(arr)
return res
}
func main() {
fmt.Println(`Enter the number of inputs`)
test_cases := 1
fmt.Println("number of inputs :", test_cases)
for test_cases >= 1 {
res := process_test_case()
fmt.Println(res)
test_cases -= 1
}
}
You can run it here : https://go.dev/play/p/zGkAln2ghZp
OR
As commented by #phonaputer you can change the sequence. Return the slice and print it from the end.

I think this code answer your title of the question:
package main
import "fmt"
func SumValues(x int, y ...int) (sum int) {
q := len(y) - 1
sum = y[q - x]
if x < q {
sum += SumValues(x + 1, y...)
}
return sum
}
func main() {
sum := SumValues(0,1,2,3,4,5)
fmt.Println("Sum is:", sum)
}

Related

How to read inputs recursively in golang

In the following code after one recursion the inputs are not read(from stdin). Output is incorrect if N is greater than 1.
X is read as 0 after one recursive call and hence the array is not read after that.
Program is supposed to print sum of squares of positive numbers in the array. P.S has to done only using recursion
package main
// Imports
import (
"fmt"
"bufio"
"os"
"strings"
"strconv"
)
// Global Variables
var N int = 0;
var X int = 0;
var err error;
var out int = 0;
var T string = "0"; // All set to 0 just in case there is no input, so we don't crash with nil values.
func main() {
// Let's grab our input.
fmt.Print("Enter N: ")
fmt.Scanln(&N)
// Make our own recursion.
loop()
}
func loop() {
if N == 0 {return}
// Grab our array length.
fmt.Scanln(&X)
tNum := make([]string, X)
// Grab our values and put them into an array.
in := bufio.NewReader(os.Stdin)
T, err = in.ReadString('\n')
tNum = strings.Fields(T)
// Parse the numbers, square, and add.
add(tNum)
// Output and reset.
fmt.Print(out)
out = 0;
N--
loop()
}
// Another loop, until X is 0.
func add(tNum []string) {
if X == 0 {return}
// Parse a string to an integer.
i, err := strconv.Atoi(tNum[X-1])
if err != nil {}
// If a number is negative, make it 0, so when we add its' square, it does nothing.
if (i < 0) {
i = 0;
}
// Add to our total!
out = out + i*i
X--
add(tNum)
}
Input:
2
4
2 4 6 8
3
1 3 9
Output:
1200
Expected output:
120
91
bufio.Reader, like the name suggests, use a buffer to store what is in the reader (os.Stdin here), which means, each time you create a bufio.Reader and read it once, there are more than what is read stored into the buffer, and thus the next time you read from the reader (os.Stdin), you do not read from where you left.
You should only have one bufio.Reader for os.Stdin. Make it global (if that is a requirement) or make it an argument. In fact, bufio package has a Scanner type that can splits spaces and new lines so you don't need to call strings.Fields.
I think you should practise doing this yourself, but here is a playground link: https://play.golang.org/p/7zBDYwqWEZ0
Here is an example that illustrates the general principles.
// Print the sum of the squares of positive numbers in the input.
package main
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
)
func sumOfSquares(sum int, s *bufio.Scanner, err error) (int, *bufio.Scanner, error) {
if err != nil {
return sum, s, err
}
if !s.Scan() {
err = s.Err()
if err == nil {
err = io.EOF
}
return sum, s, err
}
for _, f := range strings.Fields(s.Text()) {
i, err := strconv.Atoi(f)
if err != nil || i <= 0 {
continue
}
sum += i * i
}
return sumOfSquares(sum, s, nil)
}
func main() {
sum := 0
s := bufio.NewScanner(os.Stdin)
sum, s, err := sumOfSquares(sum, s, nil)
if err != nil && err != io.EOF {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
fmt.Println(sum)
}
Input:
2
4
2 4 6 8
3
1 3 9
Output:
240

Golang Calculator -- Can't divide by 0 error

I'm actually in a bit of a trouble...
I have a calculator, but when I want to divide nubers with them, I have a panic err saying that you can't divide by 0.
Well, I know that in maths we can't divide by 0, but I don't put 0 in my ints.
Any idea of the problem ?
Here is the code :
package main
import (
"fmt"
"os"
"strconv"
)
func mult(nums ...int) {
result := 0
total := 1
for _, num := range nums {
result = total * num
total = result
}
fmt.Println(result)
}
func add(nums ...int){
result := 0
total := 0
for _, num := range nums {
result = total + num
total = result
}
fmt.Println(result)
}
func sub(nums ...int){
result := 0
total := 0
for _, num := range nums {
result = num - total
total = result
}
fmt.Println(result)
}
func div(nums ...int){
result := 1
total := 1
for _, num := range nums {
result = num / total
total = result
}
fmt.Println(result)
}
func main() {
var d [] int
var args= os.Args[1:]
nums := make([]int, len(args))
for i := 0; i < len(args); i++ {
nums[i], _ = strconv.Atoi(args[i]);
strconv.Atoi(args[i])
d = append(d, nums[i])
}
num := d
if os.Args[1] == "*"{
mult(num...)
} else if os.Args[1] == "+"{
add(num...)
} else if os.Args[1] == "-"{
sub(num...)
} else if os.Args[1] == "/"{
div(num...)
} else {
fmt.Println("Well well well, you didn't entered a right operand ! Try with +, -, /, or * between double quotes")
}
}
The command I want to run this go code is :
go run calc.exe / 3 2 [Infinite args,...]
If your first parameter will always be a operator select, you can do something like that in your main func, you have a two problems in your main, you are ignoring the convertion error of a string to int and then this index of your array are setted with 0, and you are defining the array larger than you need because your first parameter it's not a number to your div func
nums := make([]int, len(args)-1)
for i := 0; i < len(args); i++ {
ret, errAtoi := strconv.Atoi(args[i])
if errAtoi != nil {
fmt.Println(errAtoi.Error())
} else {
nums[i-1] = ret
d = append(d, nums[i-1])
}
}

Parse number string digits

I am trying to calculate the multiplication result of a few digits which are part of a long digits string. Here is my code:
package main
import (
"fmt"
"strconv"
)
func main() {
inputNum := "73167176531330624919225119"
mult := getMult(3, inputNum)
fmt.Printf("Mult = %d", mult)
}
func getMult(startIndex int, inputNum string) int {
mult := 0
for i := 0; i < 10; i++ {
digit, err := strconv.Atoi(string(inputNum[startIndex+i]))
if err != nil {
mult *= int(digit)
} else {
fmt.Printf("Error converting %s to int : %s\n", string(inputNum[startIndex+i]), err.Error())
}
}
return mult
}
The result I want to get is 6*7*1*7*6*5*3*1*3*3 = 238140
But I an getting a runtime error:
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xc0000005 code=0x0 addr=0x20 pc=0x40130e]
goroutine 1 [running]:
main.getMult(0x3, 0x534d40, 0x1a, 0x4d2701)
test.go:25 +0x17e
main.main()
test.go:10 +0x55
exit status 2
There are a couple problems...
First, you need to start mult at 1, otherwise you will just continually multiply by 0 and always get 0.
Secondly you have the logic for your err check flipped. It should be if err == nil
This seems to do what you want:
func getMult(startIndex int, inputNum string) int {
mult := 1
for i := 0; i < 10; i++ {
digit, err := strconv.Atoi(string(inputNum[startIndex+i]))
if err == nil {
mult *= int(digit)
} else {
fmt.Println(err)
}
}
return mult
}
The error you were getting was because you were trying to print err.Error() when err itself was nil (due to the logical flip of != and ==)
your code will work with fixing these two typos:
change mult := 0 to mult := 1
and change err != nil to err == nil like this:
package main
import (
"fmt"
"strconv"
)
func main() {
inputNum := "73167176531330624919225119"
mult := getMult(3, inputNum)
fmt.Printf("Mult = %d", mult)
}
func getMult(startIndex int, inputNum string) int {
mult := 1
for i := 0; i < 10; i++ {
digit, err := strconv.Atoi(string(inputNum[startIndex+i]))
if err == nil {
mult *= int(digit)
} else {
fmt.Printf("Error converting %s to int : %s\n", string(inputNum[startIndex+i]), err.Error())
}
}
return mult
}
also you may use inputNum[3:13] to get new string from index 3 with length 10,
and you may use int(v - '0') to convert one character to integer number,
then use for range like this:
package main
import "fmt"
func main() {
inputNum := "73167176531330624919225119"
mult := getMult(inputNum[3:13])
fmt.Printf("Mult = %d \n", mult) // Mult = 238140
}
func getMult(str string) int {
m := 1
for _, v := range str {
if v >= '0' && v <= '9' {
m *= int(v - '0')
} else {
fmt.Printf("Error converting %q to int\n", v)
break
}
}
return m
}
output:
Mult = 238140

Challenge of finding 3 pairs in array

The length L at the time of joining, when the length of the bar of the N (1 ≦ N ≦ 5000) is supplied from standard input, is the L by connecting three lengths among the N number of bars please write a program to find the total number of combinations of. However, and the length of the individual bars, length was pieced together (L) is a positive integer, is sufficient handle range in 32bit integer. In addition, it has all the length of the bar different things.
for example)
input:
15
5
8
4
10
3
2
output:
2 //{{2, 3, 10}, {3, 4, 8}}
example 2)
input :
35
10
13
12
17
10
4
18
3
11
5
7
output:
6 //{{4, 13, 18}, {5, 12, 18}, {5, 13, 17}, {7, 10, 18}, {7, 11, 17}, {10, 12, 13}}
and my answer is here
package main
import (
"fmt"
"sort"
)
func main() {
input_count := 0
var target int
var count int
var v int
var array []int
for read_count, _ := fmt.Scan(&v); read_count != 0; read_count, _ = fmt.Scan(&v) {
if 0 == input_count {
target = v
} else if 1 == input_count {
count = v
array = make([]int, count)
} else {
array[input_count-2] = v
}
input_count++
}
sort.Ints(array)
fmt.Println(Calculate(target, count, array))
}
func Except(pair []int, count int, array []int) []int {
except := make([]int, count-pair[2])
except_index := 0
on := false
for _, v := range array {
if on {
except[except_index] = v
except_index++
}
if v == pair[1] {
on = true
}
}
return except
}
func ListUp(target int, count int, array []int) [][]int {
max := array[count-1]
list := make([][]int, Fact(count-1))
list_index := 0
for i, h := range array {
if count > i+1 && target > h+array[i+1] {
for j, v := range array[i+1:] {
if count > i+j+1 && target <= max+h+v && target > h+v {
list[list_index] = []int{h, v, i + j + 1}
list_index++
}
}
}
}
return list
}
//func Calculate(target int, count int, array []int) [][]int {
func Calculate(target int, count int, array []int) int {
// var answers [][]int
answer_count := 0
for _, pair := range ListUp(target, count, array) {
if 3 == len(pair) {
pair_sum := pair[0] + pair[1]
if target-pair_sum >= array[0] {
for _, v := range Except(pair, count, array) {
if target == pair[0]+pair[1]+v {
// answers = append(answers, []int{pair[0], pair[1], v})
answer_count++
}
}
}
}
}
return answer_count
}
func Fact(n int) int {
if n == 0 {
return 0
}
return n + Fact(n-1)
}
Does anyone who can refactor the code?
and you should refactor it
if input
https://github.com/freddiefujiwara/horiemon-challenge-codeiq/blob/master/sample4.txt
then output
1571200
in 10 seconds
current status is here
time ./horiemon-challenge-codeiq < sample4.txt
1571200
real 6m56.584s
user 6m56.132s
sys 0m1.578s
very very slow.
Your time of almost seven minutes is very, very slow. Ten seconds is slow. One second is more reasonable, a tenth of a second is good. For example, using an O(N*N) algorithm,
package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
)
func triples(l int, b []int) int {
t := 0
sort.Ints(b)
// for i < j < k, b[i] <= b[j] <= b[k]
for i := 0; i < len(b)-2; i++ {
x := b[i]
if x > l {
break
}
lx := l - x
j, k := i+1, len(b)-1
y := b[j]
z := b[k]
for j < k {
yz := y + z
switch {
case lx > yz:
j++
y = b[j]
case lx < yz:
k--
z = b[k]
default:
// l == b[i]+b[j]+b[k]
t++
j++
k--
y = b[j]
z = b[k]
}
}
}
return t
}
func readInput() (l int, b []int, err error) {
r := bufio.NewReader(os.Stdin)
for {
line, err := r.ReadString('\n')
line = strings.TrimSpace(line)
if err == nil && len(line) == 0 {
err = io.EOF
}
if err != nil {
if err == io.EOF {
break
}
return 0, nil, err
}
i, err := strconv.Atoi(string(line))
if err == nil && i < 0 {
err = errors.New("Nonpositive number: " + line)
}
if err != nil {
return 0, nil, err
}
b = append(b, i)
}
if len(b) > 0 {
l = b[0]
b = b[1:]
if len(b) > 1 {
n := b[0]
b = b[1:]
if n != len(b) {
err := errors.New("Invalid number of bars: " + strconv.Itoa(len(b)))
return 0, nil, err
}
}
}
return l, b, nil
}
func main() {
l, b, err := readInput()
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
t := triples(l, b)
fmt.Println(t)
}
Output:
1571200
real 0m0.164s
user 0m0.161s
sys 0m0.004s
For comparison, your program,
Output:
1571200
real 9m24.384s
user 16m14.592s
sys 0m19.129s
ive tuned
package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
)
type triple struct {
x, y, z int
}
func triples(l int, n []int, list bool) (nt int, t []triple) {
num_of_list := len(n)
for i := 0; i < num_of_list-2; i++ {
x := n[i]
if x > l {
break
}
for j := i + 1; j < num_of_list-1; j++ {
y := x + n[j]
if y > l {
break
}
pos := sort.SearchInts(n[j:], l-y)
if j < pos+j && pos+j < num_of_list && n[pos+j] == l-y {
nt++
}
}
}
return nt, t
}
func readInput() (l int, n []int, err error) {
r := bufio.NewReader(os.Stdin)
for {
line, err := r.ReadString('\n')
line = strings.TrimSpace(line)
if err == nil && len(line) == 0 {
err = io.EOF
}
if err != nil {
if err == io.EOF {
break
}
return 0, nil, err
}
i, err := strconv.Atoi(string(line))
if err == nil && i < 0 {
err = errors.New("Nonpositive number: " + line)
}
if err != nil {
return 0, nil, err
}
n = append(n, i)
}
if len(n) > 0 {
l = n[0]
n = n[1:]
}
sort.Ints(n)
for i := 1; i < len(n); i++ {
if n[i] == n[i-1] {
copy(n[i:], n[i+1:])
n = n[:len(n)-1]
}
}
return l, n, nil
}
func main() {
l, n, err := readInput()
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
list := false
nt, t := triples(l, n, list)
fmt.Println(nt)
if list {
fmt.Println(t)
}
}

Why does the following golang program throw a runtime out of memory error?

This program is supposed to read a file consisting of pairs of ints (one pair per line) and remove duplicate pairs. While it works on small files, it throws a runtime error on huge files (say a file of 1.5 GB). Initially, I thought that it is the map data structure which is causing this, but even after commenting it out, it still runs out of memory. Any ideas why this is happening? How to rectify it? Here's a data file on which it runs out of memory: http://snap.stanford.edu/data/com-Orkut.html
package main
import (
"fmt"
"bufio"
"os"
"strings"
"strconv"
)
func main() {
file, err := os.Open(os.Args[1])
if err != nil {
panic(err.Error())
}
defer file.Close()
type Edge struct {
u, v int
}
//seen := make(map[Edge]bool)
edges := []Edge{}
scanner := bufio.NewScanner(file)
for i, _ := strconv.Atoi(os.Args[2]); i > 0; i-- {
scanner.Scan()
}
for scanner.Scan() {
str := scanner.Text()
edge := strings.Split(str, ",")
u, _ := strconv.Atoi(edge[0])
v, _ := strconv.Atoi(edge[1])
var key Edge
if u < v {
key = Edge{u,v}
} else {
key = Edge{v,u}
}
//if seen[key] {
// continue
//}
//seen[key] = true
edges = append(edges, key)
}
for _, e := range edges {
s := strconv.Itoa(e.u) + "," + strconv.Itoa(e.v)
fmt.Println(s)
}
}
A sample input is given below. The program can be run as follows (where the last input says how many lines to skip).
go run undup.go a.txt 1
# 3072441,117185083
1,2
1,3
1,4
1,5
1,6
1,7
1,8
I looked at this file: com-orkut.ungraph.txt and it contains 117,185,082 lines. The way your data is structured, that's at least 16 bytes per line. (Edge is two 64bit ints) That alone is 1.7GB. I have had this problem in the past, and it can be a tricky one. Are you trying to solve this for a specific use case (the file in question) or the general case?
In the specific case there are a few things about the data you could leverage: (1) the keys are sorted and (2) it looks it stores every connection twice, (3) the numbers don't seem huge. Here are a couple ideas:
If you use a smaller type for the key you will use less memory. Try a uint32.
You could stream (without using a map) the keys to another file by simply seeing if the 2nd column is greater than the first:
if u < v {
// write the key to another file
} else {
// skip it because v will eventually show v -> u
}
For the general case there are a couple strategies you could use:
If the order of the resulting list doesn't matter: Use an on-disk hash table to store the map. There are a bunch of these: leveldb, sqlite, tokyo tyrant, ... A really nice one for go is bolt.
In your for loop you would just check to see if a bucket contains the given key. (You can convert the ints into byte slices using encoding/binary) If it does, just skip it and continue. You will need to move the second for loop processing step into the first for loop so that you don't have to store all the keys.
If the order of the resulting list does matter (and you can't guarantee the input is in order): You can also use an on-disk hash table, but it needs to be sorted. Bolt is sorted so that will work. Add all the keys to it, then traverse it in the second loop.
Here is an example: (this program will take a while to run with 100 million records)
package main
import (
"bufio"
"encoding/binary"
"fmt"
"github.com/boltdb/bolt"
"os"
"strconv"
"strings"
)
type Edge struct {
u, v int
}
func FromKey(bs []byte) Edge {
return Edge{int(binary.BigEndian.Uint64(bs[:8])), int(binary.BigEndian.Uint64(bs[8:]))}
}
func (e Edge) Key() [16]byte {
var k [16]byte
binary.BigEndian.PutUint64(k[:8], uint64(e.u))
binary.BigEndian.PutUint64(k[8:], uint64(e.v))
return k
}
func main() {
file, err := os.Open(os.Args[1])
if err != nil {
panic(err.Error())
}
defer file.Close()
scanner := bufio.NewScanner(file)
for i, _ := strconv.Atoi(os.Args[2]); i > 0; i-- {
scanner.Scan()
}
db, _ := bolt.Open("ex.db", 0777, nil)
defer db.Close()
bucketName := []byte("edges")
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucketIfNotExists(bucketName)
return nil
})
batchSize := 10000
total := 0
batch := make([]Edge, 0, batchSize)
writeBatch := func() {
total += len(batch)
fmt.Println("write batch. total:", total)
db.Update(func(tx *bolt.Tx) error {
bucket := tx.Bucket(bucketName)
for _, edge := range batch {
key := edge.Key()
bucket.Put(key[:], nil)
}
return nil
})
}
for scanner.Scan() {
str := scanner.Text()
edge := strings.Split(str, "\t")
u, _ := strconv.Atoi(edge[0])
v, _ := strconv.Atoi(edge[1])
var key Edge
if u < v {
key = Edge{u, v}
} else {
key = Edge{v, u}
}
batch = append(batch, key)
if len(batch) == batchSize {
writeBatch()
// reset the batch length to 0
batch = batch[:0]
}
}
// write anything leftover
writeBatch()
db.View(func(tx *bolt.Tx) error {
tx.Bucket(bucketName).ForEach(func(k, v []byte) error {
edge := FromKey(k)
fmt.Println(edge)
return nil
})
return nil
})
}
You are squandering memory. Here's how to rectify it.
You give the sample input a.txt, 48 bytes.
# 3072441,117185083
1,2
1,3
1,4
1,5
On http://snap.stanford.edu/data/com-Orkut.html, I found http://snap.stanford.edu/data/bigdata/communities/com-orkut.ungraph.txt.gz, 1.8 GB uncompressed, 117,185,083 edges.
# Undirected graph: ../../data/output/orkut.txt
# Orkut
# Nodes: 3072441 Edges: 117185083
# FromNodeId ToNodeId
1 2
1 3
1 4
1 5
On http://socialnetworks.mpi-sws.org/data-imc2007.html, I found http://socialnetworks.mpi-sws.mpg.de/data/orkut-links.txt.gz, 3.4 GB uncompressed, 223,534,301 edges.
1 2
1 3
1 4
1 5
Since they are similar, one program can handle all formats.
Your Edge type is
type Edge struct {
u, v int
}
which is 16 bytes on a 64-bit architecture.
Use
type Edge struct {
U, V uint32
}
which is 8 bytes, it is adequate.
If the capacity of a slice is not large enough to fit the additional values, append allocates a new, sufficiently large underlying array that fits both the existing slice elements and the additional values. Otherwise, append re-uses the underlying array. For a large slice, the new array is 1.25 times the size of the old array. While the old array is being copied to the new array, 1 + 1.25 = 2.25 times the memory for the old array is required. Therefore, allocate the underlying array so that all values fit.
make(T, n) initializes map of type T with initial space for n elements. Provide a value for n to limit the cost of reorganization and fragmentation as elements are added. Hashing functions are often imperfect which leads to wasted space. Eliminate the map as it's unneccesary. To eliminate duplicates, sort the slice in place and move the unique elements down.
A string is immutable, therefore a new string is allocated for scanner.Text() to convert from a byte slice buffer. To parse numbers we use strconv. To minimize temporary allocations, use scanner.Bytes() and adapt strconv.ParseUint to accept a byte array argument (bytconv).
For example,
orkut.go
package main
import (
"bufio"
"bytes"
"errors"
"fmt"
"os"
"runtime"
"sort"
"strconv"
)
type Edge struct {
U, V uint32
}
func (e Edge) String() string {
return fmt.Sprintf("%d,%d", e.U, e.V)
}
type ByKey []Edge
func (a ByKey) Len() int { return len(a) }
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool {
if a[i].U < a[j].U {
return true
}
if a[i].U == a[j].U && a[i].V < a[j].V {
return true
}
return false
}
func countEdges(scanner *bufio.Scanner) int {
var nNodes, nEdges int
for scanner.Scan() {
line := scanner.Bytes()
if !(len(line) > 0 && line[0] == '#') {
nEdges++
continue
}
n, err := fmt.Sscanf(string(line), "# Nodes: %d Edges: %d", &nNodes, &nEdges)
if err != nil || n != 2 {
n, err = fmt.Sscanf(string(line), "# %d,%d", &nNodes, &nEdges)
if err != nil || n != 2 {
continue
}
}
fmt.Println(string(line))
break
}
if err := scanner.Err(); err != nil {
panic(err.Error())
}
fmt.Println(nEdges)
return nEdges
}
func loadEdges(filename string) []Edge {
file, err := os.Open(filename)
if err != nil {
panic(err.Error())
}
defer file.Close()
scanner := bufio.NewScanner(file)
nEdges := countEdges(scanner)
edges := make([]Edge, 0, nEdges)
offset, err := file.Seek(0, os.SEEK_SET)
if err != nil || offset != 0 {
panic(err.Error())
}
var sep byte = '\t'
scanner = bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Bytes()
if len(line) > 0 && line[0] == '#' {
continue
}
i := bytes.IndexByte(line, sep)
if i < 0 || i+1 >= len(line) {
sep = ','
i = bytes.IndexByte(line, sep)
if i < 0 || i+1 >= len(line) {
err := errors.New("Invalid line format: " + string(line))
panic(err.Error())
}
}
u, err := ParseUint(line[:i], 10, 32)
if err != nil {
panic(err.Error())
}
v, err := ParseUint(line[i+1:], 10, 32)
if err != nil {
panic(err.Error())
}
if u > v {
u, v = v, u
}
edges = append(edges, Edge{uint32(u), uint32(v)})
}
if err := scanner.Err(); err != nil {
panic(err.Error())
}
if len(edges) <= 1 {
return edges
}
sort.Sort(ByKey(edges))
j := 0
i := j + 1
for ; i < len(edges); i, j = i+1, j+1 {
if edges[i] == edges[j] {
break
}
}
for ; i < len(edges); i++ {
if edges[i] != edges[j] {
j++
edges[j] = edges[i]
}
}
edges = edges[:j+1]
return edges
}
func main() {
if len(os.Args) <= 1 {
err := errors.New("Missing file name")
panic(err.Error())
}
filename := os.Args[1]
fmt.Println(filename)
edges := loadEdges(filename)
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
fmt.Println(ms.Alloc, ms.TotalAlloc, ms.Sys, ms.Mallocs, ms.Frees)
fmt.Println(len(edges), cap(edges))
for i, e := range edges {
fmt.Println(e)
if i >= 10 {
break
}
}
}
// bytconv from strconv
// Return the first number n such that n*base >= 1<<64.
func cutoff64(base int) uint64 {
if base < 2 {
return 0
}
return (1<<64-1)/uint64(base) + 1
}
// ParseUint is like ParseInt but for unsigned numbers.
func ParseUint(s []byte, base int, bitSize int) (n uint64, err error) {
var cutoff, maxVal uint64
if bitSize == 0 {
bitSize = int(strconv.IntSize)
}
s0 := s
switch {
case len(s) < 1:
err = strconv.ErrSyntax
goto Error
case 2 <= base && base <= 36:
// valid base; nothing to do
case base == 0:
// Look for octal, hex prefix.
switch {
case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'):
base = 16
s = s[2:]
if len(s) < 1 {
err = strconv.ErrSyntax
goto Error
}
case s[0] == '0':
base = 8
default:
base = 10
}
default:
err = errors.New("invalid base " + strconv.Itoa(base))
goto Error
}
n = 0
cutoff = cutoff64(base)
maxVal = 1<<uint(bitSize) - 1
for i := 0; i < len(s); i++ {
var v byte
d := s[i]
switch {
case '0' <= d && d <= '9':
v = d - '0'
case 'a' <= d && d <= 'z':
v = d - 'a' + 10
case 'A' <= d && d <= 'Z':
v = d - 'A' + 10
default:
n = 0
err = strconv.ErrSyntax
goto Error
}
if int(v) >= base {
n = 0
err = strconv.ErrSyntax
goto Error
}
if n >= cutoff {
// n*base overflows
n = 1<<64 - 1
err = strconv.ErrRange
goto Error
}
n *= uint64(base)
n1 := n + uint64(v)
if n1 < n || n1 > maxVal {
// n+v overflows
n = 1<<64 - 1
err = strconv.ErrRange
goto Error
}
n = n1
}
return n, nil
Error:
return n, &strconv.NumError{"ParseUint", string(s0), err}
}
Output:
$ go build orkut.go
$ time ./orkut ~/release-orkut-links.txt
/home/peter/release-orkut-links.txt
223534301
1788305680 1788327856 1904683256 135 50
117185083 223534301
1,2
1,3
1,4
1,5
1,6
1,7
1,8
1,9
1,10
1,11
1,12
real 2m53.203s
user 2m51.584s
sys 0m1.628s
$
The orkut.go program with the release-orkut-links.txt file (3,372,855,860 (3.4 GB) bytes with 223,534,301 edges) uses about 1.8 GiB of memory. After eliminating duplicates, 117,185,083 unique edges remain. This matches the 117,185,083 unique edge com-orkut.ungraph.txt file.
With 8 GB of memory on your machine, you can load much larger files.

Resources