How can I make a file reader function more efficiently? - go

I'm trying this code:
// GetFooter returns a string which is the Footer of an edi file
func GetFooter(file *os.File) (out string, err error) {
// TODO can scanner read files backwards? Seek can get us to the end of file
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
line1 := lines[len(lines)-2]
line2 := lines[len(lines)-1]
return line1 + "\n" + line2, scanner.Err()
}
I'm wondering if there's a cheaper way to get the last two lines of a file?

You can keep only the last two lines in memory as you scan the buffer.
Try it on Go playground.
package main
import (
"fmt"
"bufio"
"bytes"
"strconv"
)
func main() {
var buffer bytes.Buffer
for i := 0; i < 1000; i++ {
s := strconv.Itoa(i)
buffer.WriteString(s + "\n")
}
fmt.Println(GetFooter(&buffer))
}
func GetFooter(file *bytes.Buffer) (out string, err error) {
var line1, line2 string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line1, line2 = line2, scanner.Text()
}
return line1 + "\n" + line2, scanner.Err()
}

If you know roughly the size of the last two lines, you could set SOME_NUMBER to be that size plus some extra bytes to make sure you always capture the last two, then do something like
file, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer file.Close()
buf := make([]byte, SOME_NUMBER)
stat, err := os.Stat(fileName)
start := stat.Size() - SOME_NUMBER
_, err = file.ReadAt(buf, start)
if err != nil {
panic(err)
}
lines := strings.Split(string(start), "\n", -1)
lines = lines[len(lines)-2:]

Related

A large CSV/TXT file need to be split into multiple small files based on the number of lines given. using GO bufio.NewScanner

I'm new to GOLANG. I try to do one tiny task .
A large CSV/TXT file need to be split into multiple small files based on the number of lines given.
Input File : abc.csv (assume 200 lines), NoofLines required per file: 50
output File: abc_1.csv, abc_2.csv, abc_3.csv, abc_4.csv
Let me to paste the code into this thread. Currently my script is creating 4 files.
Each file contain not expected data and format.
Is there an easy way to create?
package main
import (
"bufio"
"encoding/csv"
"fmt"
"log"
"os"
)
func main() {
file, err := os.Open("abc.csv")
if err != nil {
log.Fatal(err)
}
defer file.Close()
sc := bufio.NewScanner(file)
lines := make([]string, 0)
// Read through 'tokens' until an EOF is encountered.
linecnt := 1
filecnt := 1
var fname string
fname = fmt.Sprint("abc_", filecnt, ".csv")
file, _ = os.Create(fname)
fmt.Println("file :", fname)
for sc.Scan() {
lines = append(lines, sc.Text())
if linecnt == 50 {
linecnt = 0
filecnt++
//var fname string
fname = fmt.Sprint("abc_", filecnt, ".csv")
file, _ = os.Create(fname)
writer := csv.NewWriter(file)
defer writer.Flush()
writer.Write(lines)
defer file.Close()
fmt.Println("file :", fname)
} else {
linecnt++
writer := csv.NewWriter(file)
defer writer.Flush()
writer.Write(lines)
}
}
if err := sc.Err(); err != nil {
log.Fatal(err)
}
}

My program in Golang prints the first input two times in the file

I try to get some CSV formatted string as input and then to print it to an actual CSV file. It works but it prints the first string 2 times.
My code looks like this:
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
csvwriter := csv.NewWriter(inputFile)
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
err := csvwriter.WriteAll(lines)
if err != nil {
return
}
}
csvwriter.Flush()
inputFile.Close()
}
for n=2 and the records:
abcd, efgh, ijklmn
opq, rstu, vwxyz
the output looks like this:
"abcd, efgh, ijklmn"
"abcd, efgh, ijklmn"
"opq, rstu, vwxyz"
It is my first time working with Golang and I am a little bit lost :D
csvwriter.WriteAll(lines) WriteAll writes multiple CSV records to w using Write and then calls Flush, returning any error from the Flush.
You are appending lines every time you read in a loop and flushing to the file.
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
defer inputFile.Close()
csvwriter := csv.NewWriter(inputFile)
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
}
err = csvwriter.WriteAll(lines)
if err != nil {
return
}
}
You were writing the csv in loop so that first line printed double. Here is the corrected code.
package main
import (
"bufio"
"encoding/csv"
"fmt"
"log"
"os"
)
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
defer func() {
inputFile.Close()
}()
csvwriter := csv.NewWriter(inputFile)
defer func() {
csvwriter.Flush()
}()
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
}
err = csvwriter.WriteAll(lines)
if err != nil {
return
}
}

How to read exact line from file in Golang

How do I find and read line number something in a file that corresponds some input?
I googled up this code, but it loads whole content of a file into single array with all the lines indexed. Isn't there simpler way?
func LinesInFile(fileName string) []string {
f, _ := os.Open(fileName)
// Create new Scanner.
scanner := bufio.NewScanner(f)
result := []string{}
// Use Scan.
for scanner.Scan() {
line := scanner.Text()
// Append line to result.
result = append(result, line)
}
return result
}
You should just ignore lines you're not interested.
func ReadExactLine(fileName string, lineNumber int) string {
inputFile, err := os.Open(fileName)
if err != nil {
fmt.Println("Error occurred! ", err)
}
br := bufio.NewReader(inputFile)
for i := 1; i < lineNumber; i++ {
_, _ = br.ReadString('\n')
}
str, err := br.ReadString('\n')
fmt.Println("Line is ", str)
return str
}

How can I skip the first line of a file in Go?

How can I read a file in Go and skip the first line / headers?
In Python I know I could do
counter = 0
with open("my_file_path", "r") as fo:
try:
next(fo)
except:
pass
for _ in fo:
counter = counter + 1
This is my Go application
package main
import (
"bufio"
"flag"
"os"
)
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
//fmt.Println(fileScanner.Text())
counter = counter + 1
}
return counter
}
func main() {
fileLocation := flag.String("file_location", "default value", "file path to count lines")
flag.Parse()
counted := readFile(*fileLocation)
println(counted)
}
I will be reading a huge file and don't want to be evaluating each line if the index is 0.
How about to move to the next token before the loop
scanner := bufio.NewScanner(file)
scanner.Scan() // this moves to the next token
for scanner.Scan() {
fmt.Println(scanner.Text())
}
file
1
2
3
output
2
3
https://play.golang.org/p/I2w50zFdcg0
For example,
package main
import (
"bufio"
"fmt"
"os"
)
func readFile(filename string) (int, error) {
f, err := os.Open(filename)
if err != nil {
return 0, err
}
defer f.Close()
count := 0
s := bufio.NewScanner(f)
if s.Scan() {
for s.Scan() {
count++
}
}
if err := s.Err(); err != nil {
return 0, err
}
return count, nil
}
func main() {
filename := `test.file`
count, err := readFile(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
fmt.Println(count)
}
Output:
$ cat test.file
1234567890
abc
$ go run count.go
1
$
you can try something like this
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
break
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
counter = counter + 1
// do something with text
}
return counter
}
Edit:
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
if fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
// do something with text
counter = counter + 1
}
return counter
}

How do I read in a large flat file

I have a flat file that has 339276 line of text in it for a size of 62.1 MB. I am attempting to read in all the lines, parse them based on some conditions I have and then insert them into a database.
I originally attempted to use a bufio.Scan() loop and bufio.Text() to get the line but I was running out of buffer space. I switched to using bufio.ReadLine/ReadString/ReadByte (I tried each) and had the same problem with each. I didn't have enough buffer space.
I tried using read and setting the buffer size but as the document says it actually a const that can be made smaller but never bigger that 64*1024 bytes. I then tried to use File.ReadAt where I set the starting postilion and moved it along as I brought in each section to no avail. I have looked at the following examples and explanations (not an exhaustive list):
Read text file into string array (and write)
How to Read last lines from a big file with Go every 10 secs
reading file line by line in go
How do I read in an entire file (either line by line or the whole thing at once) into a slice so I can then go do things to the lines?
Here is some code that I have tried:
file, err := os.Open(feedFolder + value)
handleError(err)
defer file.Close()
// fileInfo, _ := file.Stat()
var linesInFile []string
r := bufio.NewReader(file)
for {
path, err := r.ReadLine("\n") // 0x0A separator = newline
linesInFile = append(linesInFile, path)
if err == io.EOF {
fmt.Printf("End Of File: %s", err)
break
} else if err != nil {
handleError(err) // if you return error
}
}
fmt.Println("Last Line: ", linesInFile[len(linesInFile)-1])
Here is something else I tried:
var fileSize int64 = fileInfo.Size()
fmt.Printf("File Size: %d\t", fileSize)
var bufferSize int64 = 1024 * 60
bytes := make([]byte, bufferSize)
var fullFile []byte
var start int64 = 0
var interationCounter int64 = 1
var currentErr error = nil
for currentErr != io.EOF {
_, currentErr = file.ReadAt(bytes, st)
fullFile = append(fullFile, bytes...)
start = (bufferSize * interationCounter) + 1
interationCounter++
}
fmt.Printf("Err: %s\n", currentErr)
fmt.Printf("fullFile Size: %s\n", len(fullFile))
fmt.Printf("Start: %d", start)
var currentLine []string
for _, value := range fullFile {
if string(value) != "\n" {
currentLine = append(currentLine, string(value))
} else {
singleLine := strings.Join(currentLine, "")
linesInFile = append(linesInFile, singleLine)
currentLine = nil
}
}
I am at a loss. Either I don't understand exactly how the buffer works or I don't understand something else. Thanks for reading.
bufio.Scan() and bufio.Text() in a loop perfectly works for me on a files with much larger size, so I suppose you have lines exceeded buffer capacity. Then
check your line ending
and which Go version you use path, err :=r.ReadLine("\n") // 0x0A separator = newline? Looks like func (b *bufio.Reader) ReadLine() (line []byte, isPrefix bool, err error) has return value isPrefix specifically for your use case
http://golang.org/pkg/bufio/#Reader.ReadLine
It's not clear that it's necessary to read in all the lines before parsing them and inserting them into a database. Try to avoid that.
You have a small file: "a flat file that has 339276 line of text in it for a size of 62.1 MB." For example,
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
)
func readLines(filename string) ([]string, error) {
var lines []string
file, err := ioutil.ReadFile(filename)
if err != nil {
return lines, err
}
buf := bytes.NewBuffer(file)
for {
line, err := buf.ReadString('\n')
if len(line) == 0 {
if err != nil {
if err == io.EOF {
break
}
return lines, err
}
}
lines = append(lines, line)
if err != nil && err != io.EOF {
return lines, err
}
}
return lines, nil
}
func main() {
// a flat file that has 339276 lines of text in it for a size of 62.1 MB
filename := "flat.file"
lines, err := readLines(filename)
fmt.Println(len(lines))
if err != nil {
fmt.Println(err)
return
}
}
It seems to me this variant of readLines is shorter and faster than suggested peterSO
func readLines(filename string) (map[int]string, error) {
lines := make(map[int]string)
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
for n, line := range strings.Split(string(data), "\n") {
lines[n] = line
}
return lines, nil
}
package main
import (
"fmt"
"os"
"log"
"bufio"
)
func main() {
FileName := "assets/file.txt"
file, err := os.Open(FileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
fmt.Println(scanner.Text())
}
}

Resources