GOLANG bufio Scanner handling 10000 characters - go

I want to input 10000 length of string from os.stdin
but bufio.NewScanner can only read 4096 of characters
How can I read more than 4096 characters?
Here is my code
package main
import (
"bufio"
"fmt"
"os"
)
func main() {
sc := bufio.NewScanner(os.Stdin)
buf := make([]byte, 2048*2048)
sc.Buffer(buf, 2048*2048)
sc.Scan()
s := sc.Bytes()
fmt.Println(len(s)) // 9998; must be 10000
str := make([]byte, len(s) + 1)
for i := 1; i < len(s) + 1; i++ {
str[i] = s[i-1]
}
}
if I input 10000 characters
panic: runtime error: index out of range [9998] with length 9998

Just a hunch, but I suspect that the missing two characters are the CR+LF pair used by windows as a line terminator.
This works for me. I'm using a text file ("pp1.txt") that contains the text of Jane Austin's Pride and Prejudice broken up into 10,000-character lines — making each line (on my MacOS system) actually 10,001 characters (10,000 characters of text, followed by a LF line delimiter). It reads the entire line, discarding the line terminator and giving me 10,000 characters.
package main
import (
"bufio"
"fmt"
"os"
)
func main() {
file, err := os.Open("./pp1.txt")
if err != nil {
panic(err)
}
one_megabyte := 1024 * 1024
buf := make([]byte, one_megabyte)
sc := bufio.NewScanner(file)
sc.Buffer(buf, one_megabyte)
sc.Scan()
s := sc.Bytes()
fmt.Println(len(s)) // 9998; must be 10000
str := make([]byte, len(s)+1)
for i := 1; i < len(s)+1; i++ {
str[i] = s[i-1]
}
}
I'm a little confused by this, though:
str := make([]byte, len(s)+1)
for i := 1; i < len(s)+1; i++ {
str[i] = s[i-1]
}
If you trying to convert the bytes from the scanner into a string, why wouldn't you just use sc.Text() instead of sc.Bytes().

Related

Infinite loop in Go

I want to have the "for loop" to loop 3 times or until the user inputs something other than an integer. Below is my code, although this runs an infinite amount of times and prints out the first value the user enters.
package main
import "fmt"
import "bufio"
import "strconv"
import "os"
import "sort"
func main(){
emptySlice := make([]int, 3) // Capacity of 3
fmt.Println(cap(emptySlice))
scanner := bufio.NewScanner(os.Stdin) // Creating scanner object
fmt.Printf("Please enter a number: ")
scanner.Scan() // Will always scan in a string regardless if its a number
for i := 0; i < cap(emptySlice); i++ { // Should this not run 3 times?
input, err := strconv.ParseInt(scanner.Text(), 10, 16)
if err != nil{
fmt.Println("Not a valid entry! Ending program")
break
}
emptySlice = append(emptySlice, int(input)) // adds input to the slice
sort.Ints(emptySlice) // sorts the slice
fmt.Println(emptySlice) // Prints the slice
}
}
I think there are a couple of minor bugs, but this version should work correctly:
package main
import "fmt"
import "bufio"
import "strconv"
import "os"
import "sort"
func main() {
emptySlice := make([]int, 3) // Capacity of 3
fmt.Println(cap(emptySlice))
scanner := bufio.NewScanner(os.Stdin) // Creating scanner object
for i := 0; i < cap(emptySlice); i++ { // Should this not run 3 times?
fmt.Printf("Please enter a number: ")
scanner.Scan() // Will always scan in a string regardless if its a number
input, err := strconv.ParseInt(scanner.Text(), 10, 16)
if err != nil {
fmt.Println("Not a valid entry! Ending program")
break
}
// emptySlice = append(emptySlice, int(input)) // adds input to the slice
emptySlice[i] = int(input)
}
sort.Ints(emptySlice) // sorts the slice
fmt.Println(emptySlice) // Prints the slice
}
I've moved the prompt into the loop, and I've replaced the append call with a direct assignment to the previously allocated slice entries. Otherwise calling append will just increase the size of the slice.
I've moved the sort and the print outside of the loop, as these seemed to be incorrectly placed too.
The program in the question starts with cap(emptySlice) == 3. Given that each complete iteration of the loop appends a new value to empty slice, we know that cap(emptySlice) >= 3 + i. It follows that the loop does not terminate.
My homework assignment is slightly different: Read up to three integers and print them in sorted order. Here's how I did it:
func main() {
var result []int
scanner := bufio.NewScanner(os.Stdin)
for i := 0; i < 3; i++ {
fmt.Printf("Please enter a number: ")
if !scanner.Scan() {
// Exit on EOF or other errors.
break
}
n, err := strconv.Atoi(scanner.Text())
if err != nil {
// Exit on bad input.
fmt.Println(err)
break
}
result = append(result, n)
}
sort.Ints(result)
fmt.Println(result)
}

Reading a random line from a file in constant time in Go

I have the following code to choose 2 random lines from a file containing lines of the form ip:port:
import (
"os"
"fmt"
"math/rand"
"log"
"time"
"unicode/utf8"
//"bufio"
)
func main() {
fmt.Println("num bytes in line is: \n", utf8.RuneCountInString("10.244.1.8:8080"))
file_pods_array, err_file_pods_array := os.Open("pods_array.txt")
if err_file_pods_array != nil {
log.Fatalf("failed opening file: %s", err_file_pods_array)
}
//16 = num of bytes in ip:port pair
randsource := rand.NewSource(time.Now().UnixNano())
randgenerator := rand.New(randsource)
firstLoc := randgenerator.Intn(10)
secondLoc := randgenerator.Intn(10)
candidate1 := ""
candidate2 := ""
num_bytes_from_start_first := 16 * (firstLoc + 1)
num_bytes_from_start_second := 16 * (secondLoc + 1)
buf_ipport_first := make([]byte, int64(15))
buf_ipport_second := make([]byte, int64(15))
start_first := int64(num_bytes_from_start_first)
start_second := int64(num_bytes_from_start_second)
_, err_first := file_pods_array.ReadAt(buf_ipport_first, start_first)
first_ipport_ep := buf_ipport_first
if err_first == nil {
candidate1 = string(first_ipport_ep)
}
_, err_second := file_pods_array.ReadAt(buf_ipport_second, start_second)
second_ipport_ep := buf_ipport_second
if err_second == nil {
candidate2 = string(second_ipport_ep)
}
fmt.Println("first is: ", candidate1)
fmt.Println("sec is: ", candidate2)
}
This sometimes prints empty or partial lines.
Why does this happen and how can I fix it?
Output example:
num bytes in line is:
15
first is: 10.244.1.17:808
sec is:
10.244.1.11:80
Thank you.
If your lines were of a fixed length you could do this in constant time.
Length of each line is L.
Check the size of the file, S.
Divide S/L to get the number of lines N.
Pick a random number R from 0 to N-1.
Seek to R*L in the file.
Read L bytes.
But you don't have fixed length lines. We can't do constant time, but we can do it in constant memory and O(n) time using the technique from The Art of Computer Programming, Volume 2, Section 3.4.2, by Donald E. Knuth.
Read a line. Remember its line number M.
Pick a random number from 1 to M.
If it's 1, remember this line.
That is, as you read each line you have a 1/M chance of picking it. Cumulatively this adds up to 1/N for every line.
If we have three lines, the first line has a 1/1 chance of being picked. Then a 1/2 chance of remaining. Then a 2/3 chance of remaining. Total chance: 1 * 1/2 * 2/3 = 1/3.
The second line has a 1/2 chance of being picked and a 2/3 chance of remaining. Total chance: 1/2 * 2/3 = 1/3.
The third line has a 1/3 chance of being picked.
package main
import(
"bufio"
"fmt"
"os"
"log"
"math/rand"
"time"
);
func main() {
file, err := os.Open("pods_array.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
randsource := rand.NewSource(time.Now().UnixNano())
randgenerator := rand.New(randsource)
lineNum := 1
var pick string
for scanner.Scan() {
line := scanner.Text()
fmt.Printf("Considering %v at 1/%v.\n", scanner.Text(), lineNum)
// Instead of 1 to N it's 0 to N-1
roll := randgenerator.Intn(lineNum)
fmt.Printf("We rolled a %v.\n", roll)
if roll == 0 {
fmt.Printf("Picking line.\n")
pick = line
}
lineNum += 1
}
fmt.Printf("Picked: %v\n", pick)
}
Because rand.Intn(n) returns [0,n), that is from 0 to n-1, we check for 0, not 1.
Maybe you're thinking "what if I seek to a random point in the file and then read the next full line?" That wouldn't quite be constant time, it would beO(longest-line), but it wouldn't be truly random. Longer lines would get picked more frequently.
Note that since these are (I assume) all IP addresses and ports you could have constant record lengths. Store the IPv4 address as a 32 bits and the port as a 16 bits. 48 bits per line.
However, this will break on IPv6. For forward compatibility store everything as IPv6: 128 bits for the IP and 16 bits for the port. 144 bits per line. Convert IPv4 addresses to IPv6 for storage.
This will allow you to pick random addresses in constant time, and it will save disk space.
Alternatively, store them in SQLite.
found a solution using ioutil and strings:
func main() {
randsource := rand.NewSource(time.Now().UnixNano())
randgenerator := rand.New(randsource)
firstLoc := randgenerator.Intn(10)
secondLoc := randgenerator.Intn(10)
candidate1 := ""
candidate2 := ""
dat, err := ioutil.ReadFile("pods_array.txt")
if err == nil {
ascii := string(dat)
splt := strings.Split(ascii, "\n")
candidate1 = splt[firstLoc]
candidate2 = splt[secondLoc]
}
fmt.Println(candidate1)
fmt.Println(candidate2)
}
Output
10.244.1.3:8080
10.244.1.11:8080

Why the array resulted from the stdin in Golang convert the last item to zero?

Note: I am new to StackOverflow as well as to Programming, so if my question is not "so professional" or "well formatted", please forgive me.
I am using the following Go (Golang) code to capture some space-separated numbers (string) from terminal, then split it into a slice. Later I'm converting this slice to a slice of float64 by getting one item at a time from the strings-slice and converting it to float64 and appending it to the float64-slice.
Then I'm returning the resulting float64 slice and printing it in the main function.
The problem is when I pass some space-separated digits to the terminal, the last digit is converted to zero.
for example if I pass 1 2 3 4 5 I expect the resulting slice as [1 2 3 4 5], but it gives me the slice as [1 2 3 4 0].
I'm trying from the last 5 hours, but I'm not able to find what I'm missing or messing.
code:
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
a := ReadInput()
fmt.Println(a)
}
func ReadInput() []float64 {
reader := bufio.NewReader(os.Stdin)
fmt.Print("Enter text: ")
text, _ := reader.ReadString('\n')
textSlice := strings.Split(text, " ")
floatsSlice := make([]float64, 0)
for _, elem := range textSlice {
i, _ := strconv.ParseFloat(elem, 64)
floatsSlice = append(floatsSlice, i)
}
return floatsSlice
}
Thank You in advance!
ReadString reads until the first occurrence of delim in the input,
returning a string containing the data up to and including the
delimiter.
so, strings.Split(text, " ") not splits last \n character so:
you may use strings.Fields(text) instead of strings.Split(text, " ")
and always check for errors:
like this working sample code:
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
a := ReadInput()
fmt.Println(a)
}
func ReadInput() []float64 {
reader := bufio.NewReader(os.Stdin)
fmt.Print("Enter text: ")
text, err := reader.ReadString('\n')
if err != nil {
fmt.Println(err)
}
textSlice := strings.Fields(text)
floatsSlice := make([]float64, 0)
for _, elem := range textSlice {
i, err := strconv.ParseFloat(elem, 64)
if err != nil {
fmt.Println(err)
}
floatsSlice = append(floatsSlice, i)
}
return floatsSlice
}

Which scan to use to read floats from a string?

This seems almost right but it chokes on the newline.
What's the best way to do this?
package main
import (
"fmt"
"strings"
)
func main() {
var z float64
var a []float64
// \n gives an error for Fscanf
s := "3.25 -12.6 33.7 \n 3.47"
in := strings.NewReader(s)
for {
n, err := fmt.Fscanf(in, "%f", &z)
fmt.Println("n", n)
if err != nil {
break
}
a = append(a, z)
}
fmt.Println(a)
}
Output:
n 1
n 1
n 1
n 0
[3.25 -12.6 33.7]
Update:
See the answer from #Atom below. I found another way which is to break if the error is EOF, and otherwise just ignore it. It's just a hack, I know, but I control the source.
_, err := fmt.Fscanf(in, "%f", &z)
if err == os.EOF { break }
if err != nil { continue }
If you are parsing floats only, you can use fmt.Fscan(r io.Reader, a ...interface{}) instead of fmt.Fscanf(r io.Reader, format string, a ...interface{}):
var z float64
...
n, err := fmt.Fscan(in, &z)
The difference between fmt.Fscan and fmt.Fscanf is that in the case of fmt.Fscan newlines count as space. The latter function (with a format string) does not treat newlines as spaces and requires newlines in the input to match newlines in the format string.
The functions with a format string give more control over the form of input, such as when you need to scan %5f or %10s. In this case, if the input contains newlines and it implements the interface io.RuneScanner you can use the method ReadRune to peek the next character and optionally unread it with UnreadRune if it isn't a space or a newline.
If your input is just a bunch of lines with floats separated by white space on each line, it might be easier to just read one line at a time from the file, run Sscanf on that line (assuming the number of floats on each line is fixed). But here's something that works in your example---there may be a way to make it more efficient.
package main
import (
"fmt"
"strings"
)
func main() {
var z float64
var a []float64
// \n gives an error for Fscanf
s := "3.25 -12.6 33.7 \n 3.47"
for _, line := range strings.Split(s, "\n") {
in := strings.NewReader(line)
for {
n, err := fmt.Fscanf(in, "%f", &z)
fmt.Println("n", n)
if err != nil {
fmt.Printf("ERROR: %v\n", err)
break
}
a = append(a, z)
}
}
fmt.Println(a)
}

Looking for Go equivalent of scanf

I'm looking for the Go equivalent of scanf().
I tried with following code:
1 package main
2
3 import (
4 "scanner"
5 "os"
6 "fmt"
7 )
8
9 func main() {
10 var s scanner.Scanner
11 s.Init(os.Stdin)
12 s.Mode = scanner.ScanInts
13 tok := s.Scan()
14 for tok != scanner.EOF {
15 fmt.Printf("%d ", tok)
16 tok = s.Scan()
17 }
18 fmt.Println()
19 }
I run it with input from a text with a line of integers.
But it always output -3 -3 ...
And how to scan a line composed of a string and some integers?
Changing the mode whenever encounter a new data type?
The Package documentation:
Package scanner
A general-purpose scanner for UTF-8
encoded text.
But it seems that the scanner is not for general use.
Updated code:
func main() {
n := scanf()
fmt.Println(n)
fmt.Println(len(n))
}
func scanf() []int {
nums := new(vector.IntVector)
reader := bufio.NewReader(os.Stdin)
str, err := reader.ReadString('\n')
for err != os.EOF {
fields := strings.Fields(str)
for _, f := range fields {
i, _ := strconv.Atoi(f)
nums.Push(i)
}
str, err = reader.ReadString('\n')
}
r := make([]int, nums.Len())
for i := 0; i < nums.Len(); i++ {
r[i] = nums.At(i)
}
return r
}
Improved version:
package main
import (
"bufio"
"os"
"io"
"fmt"
"strings"
"strconv"
"container/vector"
)
func main() {
n := fscanf(os.Stdin)
fmt.Println(len(n), n)
}
func fscanf(in io.Reader) []int {
var nums vector.IntVector
reader := bufio.NewReader(in)
str, err := reader.ReadString('\n')
for err != os.EOF {
fields := strings.Fields(str)
for _, f := range fields {
if i, err := strconv.Atoi(f); err == nil {
nums.Push(i)
}
}
str, err = reader.ReadString('\n')
}
return nums
}
Your updated code was much easier to compile without the line numbers, but it was missing the package and import statements.
Looking at your code, I noticed a few things. Here's my revised version of your code.
package main
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"container/vector"
)
func main() {
n := scanf(os.Stdin)
fmt.Println()
fmt.Println(len(n), n)
}
func scanf(in io.Reader) []int {
var nums vector.IntVector
rd := bufio.NewReader(os.Stdin)
str, err := rd.ReadString('\n')
for err != os.EOF {
fields := strings.Fields(str)
for _, f := range fields {
if i, err := strconv.Atoi(f); err == nil {
nums.Push(i)
}
}
str, err = rd.ReadString('\n')
}
return nums
}
I might want to use any input file for scanf(), not just Stdin; scanf() takes an io.Reader as a parameter.
You wrote: nums := new(vector.IntVector), where type IntVector []int. This allocates an integer slice reference named nums and initializes it to zero, then the new() function allocates an integer slice reference and initializes it to zero, and then assigns it to nums. I wrote: var nums vector.IntVector, which avoids the redundancy by simply allocating an integer slice reference named nums and initializing it to zero.
You didn't check the err value for strconv.Atoi(), which meant invalid input was converted to a zero value; I skip it.
To copy from the vector to a new slice and return the slice, you wrote:
r := make([]int, nums.Len())
for i := 0; i < nums.Len(); i++ {
r[i] = nums.At(i)
}
return r
First, I simply replaced that with an equivalent, the IntVector.Data() method: return nums.Data(). Then, I took advantage of the fact that type IntVector []int and avoided the allocation and copy by replacing that by: return nums.
Although it can be used for other things, the scanner package is designed to scan Go program text. Ints (-123), Chars('c'), Strings("str"), etc. are Go language token types.
package main
import (
"fmt"
"os"
"scanner"
"strconv"
)
func main() {
var s scanner.Scanner
s.Init(os.Stdin)
s.Error = func(s *scanner.Scanner, msg string) { fmt.Println("scan error", msg) }
s.Mode = scanner.ScanInts | scanner.ScanStrings | scanner.ScanRawStrings
for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() {
txt := s.TokenText()
fmt.Print("token:", tok, "text:", txt)
switch tok {
case scanner.Int:
si, err := strconv.Atoi64(txt)
if err == nil {
fmt.Print(" integer: ", si)
}
case scanner.String, scanner.RawString:
fmt.Print(" string: ", txt)
default:
if tok >= 0 {
fmt.Print(" unicode: ", "rune = ", tok)
} else {
fmt.Print(" ERROR")
}
}
fmt.Println()
}
}
This example always reads in a line at a time and returns the entire line as a string. If you want to parse out specific values from it you could.
package main
import (
"fmt"
"bufio"
"os"
"strings"
)
func main() {
value := Input("Please enter a value: ")
trimmed := strings.TrimSpace(value)
fmt.Printf("Hello %s!\n", trimmed)
}
func Input(str string) string {
print(str)
reader := bufio.NewReader(os.Stdin)
input, _ := reader.ReadString('\n')
return input
}
In a comment to one of my answers, you said:
From the Language Specification: "When
memory is allocated to store a value,
either through a declaration or make()
or new() call, and no explicit
initialization is provided, the memory
is given a default initialization".
Then what's the point of new()?
If we run:
package main
import ("fmt")
func main() {
var i int
var j *int
fmt.Println("i (a value) = ", i, "; j (a pointer) = ", j)
j = new(int)
fmt.Println("i (a value) = ", i, "; j (a pointer) = ", j, "; *j (a value) = ", *j)
}
The declaration var i int allocates memory to store an integer value and initializes the value to zero. The declaration var j *int allocates memory to store a pointer to an integer value and initializes the pointer to zero (a nil pointer); no memory is allocated to store an integer value. We see program output similar to:
i (a value) = 0 ; j (a pointer) = <nil>
The built-in function new takes a type T and returns a value of type *T. The memory is initialized to zero values. The statement j = new(int) allocates memory to store an integer value and initializes the value to zero, then it stores a pointer to this integer value in j. We see program output similar to:
i (a value) = 0 ; j (a pointer) = 0x7fcf913a90f0 ; *j (a value) = 0
The latest release of Go (2010-05-27) has added two functions to the fmt package: Scan() and Scanln(). They don't take any pattern string. like in C, but checks the type of the arguments instead.
package main
import (
"fmt"
"os"
"container/vector"
)
func main() {
numbers := new(vector.IntVector)
var number int
n, err := fmt.Scan(os.Stdin, &number)
for n == 1 && err == nil {
numbers.Push(number)
n, err = fmt.Scan(os.Stdin, &number)
}
fmt.Printf("%v\n", numbers.Data())
}

Resources