My intent is to read a text file , and print records on console . Sample text file format
abc 123
test zzz
hello world
I am using byte array to initialize my structure .
However when I try to print after splitting the read lines , blank values are printed for the second token which is obtained after the split
package main
import (
"fmt"
"bufio"
"os"
"strings"
"io/ioutil"
)
type person struct {
fName [20]byte
lName [20]byte
}
func main(){
reader := bufio.NewReader(os.Stdin)
fmt.Printf("Enter Name ")
fname, _ := reader.ReadString('\n')
fname = strings.Replace(fname, "\n", "", -1)
fname = strings.Replace(fname, "\r", "", -1)
readFile, err := ioutil.ReadFile(fname)
if err != nil {
panic(err)
}
s := string(readFile)
fileTextLines := strings.Split(s, "\n")
sli := make([]person, len(fileTextLines))
for _, eachline := range fileTextLines {
res1 := strings.Split( eachline, " ")
var fname [20]byte
copy( fname[:], []byte(res1[0] ))
fmt.Println(string(fname[:]))
var lname [20]byte
copy( lname[:], []byte(res1[1] ) )
fmt.Println(string(lname[:]))
p := person{fname ,lname}
sli = append(sli,p)
}
// print the slice
// for _,object := range sli {
// fmt.Printf("First Name: %s Last Name: %s\n", string(object.fName[:]), string(object.lName[:]))
// }
}
With the suggested approach I got the following output
Last Name: 123
Last Name: zzz
First Name: hello Last Name: world
Once you split the first name and last name there is some extra white spaces in the values, so I added strings.TrimSpace and below code is working now, and also you don't need to Initialize sli with fix length, append will automatically increase the size of slice.
package main
import (
"fmt"
"bufio"
"os"
"strings"
"io/ioutil"
)
type person struct {
fName [20]byte
lName [20]byte
}
func main(){
reader := bufio.NewReader(os.Stdin)
fmt.Printf("Enter Name ")
fname, _ := reader.ReadString('\n')
fname = strings.Replace(fname, "\n", "", -1)
fname = strings.Replace(fname, "\r", "", -1)
readFile, err := ioutil.ReadFile(fname)
if err != nil {
panic(err)
}
s := string(readFile)
fileTextLines := strings.Split(s, "\n")
sli := []person{}
for _, eachline := range fileTextLines {
res1 := strings.Split( eachline, " ")
var fname [20]byte
copy( fname[:], []byte(strings.TrimSpace(res1[0])))
fmt.Println(string(fname[:]))
var lname [20]byte
copy( lname[:], []byte(strings.TrimSpace(res1[1])) )
fmt.Println(string(lname[:]))
p := person{fname ,lname}
sli = append(sli,p)
}
// print the slice
for _,object := range sli {
fmt.Printf("First Name: %s Last Name: %s\n", string(object.fName[:]), string(object.lName[:]))
}
}
Just found out that extra with space is carriage return "\r", If you are using windows platform you should use below line to split the text with newline
strings.Split(strings.Replace(windows, "\r\n", "\n", -1), "\n")
Use this code:
readFile, err := ioutil.ReadFile(fname)
if err != nil {
panic(err)
}
fileTextLines := bytes.Split(readFile, []byte("\n"))
sli := make([]person, 0, len(fileTextLines))
for _, eachline := range fileTextLines {
res1 := bytes.Split(eachline, []byte(" "))
if len(res1) < 2 {
continue
}
var p person
copy(p.fName[:], res1[0])
copy(p.lName[:], res1[1])
sli = append(sli, p)
}
for _, object := range sli {
fmt.Printf("First Name: %s Last Name: %s\n", object.fName[:], object.lName[:])
}
This simplifies the code by working with []byte instead of string and handles case where line has only one token.
Working example on the GoLang PlayGround: https://play.golang.org/p/JFcM1uE3Ywm
Related
This is a small snippet of code supposed to read a list of space-separated values of first name and last name from a text file and put the result into a slice in Go. For some reason, only the first item is stored in the slice:
package main
import (
"fmt"
"bufio"
"os"
//"io"
//"strings"
)
type Person struct {
fName string
lName string
}
func main() {
// a scanner to read input with spaces
scanner := bufio.NewScanner(os.Stdin)
fmt.Print("Enter a file name: ")
scanner.Scan()
fileName := scanner.Text()
scanner = nil //close this scanner
file, err := os.Open(fileName)
if err != nil {
fmt.Fprintf(os.Stderr, "os.Open error: %v\n", err)
return
}
fmt.Println() //empty string
fileScanner := bufio.NewScanner(file)
fileScanner.Split(bufio.ScanLines)
var persons []Person = make([]Person, 0, 3)
for fileScanner.Scan() {
line := fileScanner.Text()
fmt.Println(line)
var (
firstName string
lastName string
)
fmt.Sscanln(line, &firstName, &lastName)
person := Person{truncateString(firstName, 20), truncateString(lastName, 20)}
persons = append(persons, person)
}
file.Close()
fmt.Println("Items: ", len(persons))
for i, element := range persons {
fmt.Printf("%v %s %s\n", i, element.fName, element.lName)
}
}
func truncateString(input string, length int) string {
if len(input) > length {
return string(input[0:length])
} else {
return input
}
}
To debug it I added an fmt.Println() to print out the lines of the input file. Surprisingly, it does not print the strings on new lines, but rather, as far as I can judge from the output, it overwrites the strings over the same line.
For the input file:
Harper Collins
Billy Bons
John Bon Jovi
It gives the output:
Enter a file name: names.txt
John Bon Jovis
1
0 Harper Collins
The last s is obviously from a longer name overwritten.
What can be the reason?
I'm trying this code:
// GetFooter returns a string which is the Footer of an edi file
func GetFooter(file *os.File) (out string, err error) {
// TODO can scanner read files backwards? Seek can get us to the end of file
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
line1 := lines[len(lines)-2]
line2 := lines[len(lines)-1]
return line1 + "\n" + line2, scanner.Err()
}
I'm wondering if there's a cheaper way to get the last two lines of a file?
You can keep only the last two lines in memory as you scan the buffer.
Try it on Go playground.
package main
import (
"fmt"
"bufio"
"bytes"
"strconv"
)
func main() {
var buffer bytes.Buffer
for i := 0; i < 1000; i++ {
s := strconv.Itoa(i)
buffer.WriteString(s + "\n")
}
fmt.Println(GetFooter(&buffer))
}
func GetFooter(file *bytes.Buffer) (out string, err error) {
var line1, line2 string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line1, line2 = line2, scanner.Text()
}
return line1 + "\n" + line2, scanner.Err()
}
If you know roughly the size of the last two lines, you could set SOME_NUMBER to be that size plus some extra bytes to make sure you always capture the last two, then do something like
file, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer file.Close()
buf := make([]byte, SOME_NUMBER)
stat, err := os.Stat(fileName)
start := stat.Size() - SOME_NUMBER
_, err = file.ReadAt(buf, start)
if err != nil {
panic(err)
}
lines := strings.Split(string(start), "\n", -1)
lines = lines[len(lines)-2:]
We can find the byte offset of a pattern from file by
"grep -ob pattern filename";
However, grep is not utf8 safe.
How do I find byte offset of a pattern in Go? The file is process log, which can be in TB.
This is what I want to get in Go:
$ cat fname
hello world
findme
hello 世界
findme again
...
$ grep -ob findme fname
12:findme
32:findme
FindAllStringIndex(s string, n int) returns byte start/finish indexes (i.e., slices) of all successive matches of the expression:
package main
import "fmt"
import "io/ioutil"
import "regexp"
func main() {
fname := "C:\\Users\\UserName\\go\\src\\so56798431\\fname"
b, err := ioutil.ReadFile(fname)
if err != nil {
panic(err)
}
re, err := regexp.Compile("findme")
if err != nil {
// handle error
}
fmt.Println(re.FindAllStringIndex(string(b), -1))
}
Output:
[[12 18] [32 38]]
Note: I did this on Microsoft Windows, but saved the file in UNIX format (linefeed); if input file saved in Windows format (carriage return & linefeed) the byte offsets would increment to 13 and 35, respectively.
UPDATE: for large files, use bufio.Scanner; for example:
package main
import (
"bufio"
"fmt"
"log"
"os"
"regexp"
)
func main() {
fname, err := os.Open("C:\\Users\\UserName\\go\\src\\so56798431\\fname")
if err != nil {
log.Fatal(err)
}
defer fname.Close()
re, err := regexp.Compile("findme")
if err != nil {
// handle error
}
scanner := bufio.NewScanner(fname)
bytesRead := 0
for scanner.Scan() {
b := scanner.Text()
//fmt.Println(b)
results := re.FindAllStringIndex(b, -1)
for _, result := range results {
fmt.Println(bytesRead + result[0])
}
// account for UNIX EOL marker
bytesRead += len(b) + 1
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
Output:
12
32
How can I read a file in Go and skip the first line / headers?
In Python I know I could do
counter = 0
with open("my_file_path", "r") as fo:
try:
next(fo)
except:
pass
for _ in fo:
counter = counter + 1
This is my Go application
package main
import (
"bufio"
"flag"
"os"
)
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
//fmt.Println(fileScanner.Text())
counter = counter + 1
}
return counter
}
func main() {
fileLocation := flag.String("file_location", "default value", "file path to count lines")
flag.Parse()
counted := readFile(*fileLocation)
println(counted)
}
I will be reading a huge file and don't want to be evaluating each line if the index is 0.
How about to move to the next token before the loop
scanner := bufio.NewScanner(file)
scanner.Scan() // this moves to the next token
for scanner.Scan() {
fmt.Println(scanner.Text())
}
file
1
2
3
output
2
3
https://play.golang.org/p/I2w50zFdcg0
For example,
package main
import (
"bufio"
"fmt"
"os"
)
func readFile(filename string) (int, error) {
f, err := os.Open(filename)
if err != nil {
return 0, err
}
defer f.Close()
count := 0
s := bufio.NewScanner(f)
if s.Scan() {
for s.Scan() {
count++
}
}
if err := s.Err(); err != nil {
return 0, err
}
return count, nil
}
func main() {
filename := `test.file`
count, err := readFile(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
fmt.Println(count)
}
Output:
$ cat test.file
1234567890
abc
$ go run count.go
1
$
you can try something like this
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
break
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
counter = counter + 1
// do something with text
}
return counter
}
Edit:
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
if fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
// do something with text
counter = counter + 1
}
return counter
}
I'm trying to write a Go script that takes in as many lines of comma-separated coordinates as the user wishes, split and convert the string of coordinates to float64, store each line as a slice, and then append each slice in a slice of slices for later usage.
Example inputs are:
1.1,2.2,3.3
3.14,0,5.16
Example outputs are:
[[1.1 2.2 3.3],[3.14 0 5.16]]
The equivalent in Python is
def get_input():
print("Please enter comma separated coordinates:")
lines = []
while True:
line = input()
if line:
line = [float(x) for x in line.replace(" ", "").split(",")]
lines.append(line)
else:
break
return lines
But what I wrote in Go seems way too long (pasted below), and I'm creating a lot of variables without the ability to change variable type as in Python. Since I literally just started writing Golang to learn it, I fear my script is long as I'm trying to convert Python thinking into Go. Therefore, I would like to ask for some advice as to how to write this script shorter and more concise in Go style? Thank you.
package main
import (
"fmt"
"os"
"bufio"
"strings"
"strconv"
)
func main() {
inputs := get_input()
fmt.Println(inputs)
}
func get_input() [][]float64 {
fmt.Println("Please enter comma separated coordinates: ")
var inputs [][]float64
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
if len(scanner.Text()) > 0 {
raw_input := strings.Replace(scanner.Text(), " ", "", -1)
input := strings.Split(raw_input, ",")
converted_input := str2float(input)
inputs = append(inputs, converted_input)
} else {
break
}
}
return inputs
}
func str2float(records []string) []float64 {
var float_slice []float64
for _, v := range records {
if s, err := strconv.ParseFloat(v, 64); err == nil {
float_slice = append(float_slice, s)
}
}
return float_slice
}
Using only string functions:
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
scanner := bufio.NewScanner(os.Stdin)
var result [][]float64
var txt string
for scanner.Scan() {
txt = scanner.Text()
if len(txt) > 0 {
values := strings.Split(txt, ",")
var row []float64
for _, v := range values {
fl, err := strconv.ParseFloat(strings.Trim(v, " "), 64)
if err != nil {
panic(fmt.Sprintf("Incorrect value for float64 '%v'", v))
}
row = append(row, fl)
}
result = append(result, row)
}
}
fmt.Printf("Result: %v\n", result)
}
Run:
$ printf "1.1,2.2,3.3
3.14,0,5.16
2,45,76.0, 45 , 69" | go run experiment2.go
Result: [[1.1 2.2 3.3] [3.14 0 5.16] [2 45 76 45 69]]
With given input, you can concatenate them to make a JSON string and then unmarshal (deserialize) that:
func main() {
var lines []string
for {
var line string
fmt.Scanln(&line)
if line == "" {
break
}
lines = append(lines, "["+line+"]")
}
all := "[" + strings.Join(lines, ",") + "]"
inputs := [][]float64{}
if err := json.Unmarshal([]byte(all), &inputs); err != nil {
fmt.Println(err)
return
}
fmt.Println(inputs)
}