How to read exact line from file in Golang - go

How do I find and read line number something in a file that corresponds some input?
I googled up this code, but it loads whole content of a file into single array with all the lines indexed. Isn't there simpler way?
func LinesInFile(fileName string) []string {
f, _ := os.Open(fileName)
// Create new Scanner.
scanner := bufio.NewScanner(f)
result := []string{}
// Use Scan.
for scanner.Scan() {
line := scanner.Text()
// Append line to result.
result = append(result, line)
}
return result
}

You should just ignore lines you're not interested.
func ReadExactLine(fileName string, lineNumber int) string {
inputFile, err := os.Open(fileName)
if err != nil {
fmt.Println("Error occurred! ", err)
}
br := bufio.NewReader(inputFile)
for i := 1; i < lineNumber; i++ {
_, _ = br.ReadString('\n')
}
str, err := br.ReadString('\n')
fmt.Println("Line is ", str)
return str
}

Related

My program in Golang prints the first input two times in the file

I try to get some CSV formatted string as input and then to print it to an actual CSV file. It works but it prints the first string 2 times.
My code looks like this:
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
csvwriter := csv.NewWriter(inputFile)
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
err := csvwriter.WriteAll(lines)
if err != nil {
return
}
}
csvwriter.Flush()
inputFile.Close()
}
for n=2 and the records:
abcd, efgh, ijklmn
opq, rstu, vwxyz
the output looks like this:
"abcd, efgh, ijklmn"
"abcd, efgh, ijklmn"
"opq, rstu, vwxyz"
It is my first time working with Golang and I am a little bit lost :D
csvwriter.WriteAll(lines) WriteAll writes multiple CSV records to w using Write and then calls Flush, returning any error from the Flush.
You are appending lines every time you read in a loop and flushing to the file.
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
defer inputFile.Close()
csvwriter := csv.NewWriter(inputFile)
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
}
err = csvwriter.WriteAll(lines)
if err != nil {
return
}
}
You were writing the csv in loop so that first line printed double. Here is the corrected code.
package main
import (
"bufio"
"encoding/csv"
"fmt"
"log"
"os"
)
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
defer func() {
inputFile.Close()
}()
csvwriter := csv.NewWriter(inputFile)
defer func() {
csvwriter.Flush()
}()
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
}
err = csvwriter.WriteAll(lines)
if err != nil {
return
}
}

How can I make a file reader function more efficiently?

I'm trying this code:
// GetFooter returns a string which is the Footer of an edi file
func GetFooter(file *os.File) (out string, err error) {
// TODO can scanner read files backwards? Seek can get us to the end of file
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
line1 := lines[len(lines)-2]
line2 := lines[len(lines)-1]
return line1 + "\n" + line2, scanner.Err()
}
I'm wondering if there's a cheaper way to get the last two lines of a file?
You can keep only the last two lines in memory as you scan the buffer.
Try it on Go playground.
package main
import (
"fmt"
"bufio"
"bytes"
"strconv"
)
func main() {
var buffer bytes.Buffer
for i := 0; i < 1000; i++ {
s := strconv.Itoa(i)
buffer.WriteString(s + "\n")
}
fmt.Println(GetFooter(&buffer))
}
func GetFooter(file *bytes.Buffer) (out string, err error) {
var line1, line2 string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line1, line2 = line2, scanner.Text()
}
return line1 + "\n" + line2, scanner.Err()
}
If you know roughly the size of the last two lines, you could set SOME_NUMBER to be that size plus some extra bytes to make sure you always capture the last two, then do something like
file, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer file.Close()
buf := make([]byte, SOME_NUMBER)
stat, err := os.Stat(fileName)
start := stat.Size() - SOME_NUMBER
_, err = file.ReadAt(buf, start)
if err != nil {
panic(err)
}
lines := strings.Split(string(start), "\n", -1)
lines = lines[len(lines)-2:]

What does "Scan advances the Scanner to the next token" mean in Go's bufio.Scanner?

According to Scanner.scan documents, Scan() advances the Scanner to the next token, but what does that mean? I find that Scanner.Text and Scanner.Bytes can be different, which is puzzling.
This code doesn't always cause an error, but as the file becomes larger it does:
func TestScanner(t *testing.T) {
path := "/tmp/test.txt"
f, err := os.Open(path)
if err != nil {
panic(fmt.Sprint("failed to open ", path))
}
defer f.Close()
scanner := bufio.NewScanner(f)
bs := make([][]byte, 0)
for scanner.Scan() {
bs = append(bs, scanner.Bytes())
}
f, err = os.Open(path)
if err != nil {
panic(fmt.Sprint("failed to open ", path))
}
defer f.Close()
scanner = bufio.NewScanner(f)
ss := make([]string, 0)
for scanner.Scan() {
ss = append(ss, scanner.Text())
}
for i, b := range bs {
if string(b) != ss[i] {
t.Errorf("expect %s, got %s", ss[i], string(b))
}
}
}
The token is defined by the scanner's split function. Scan() returns when the split function finds a token or there's an error.
The String() and Bytes() methods both return the current token. The String() method returns a copy of the token. The Bytes() method does not allocate memory and returns a slice that may use a backing array that's overwritten on a subsequent call to Scan().
Copy the slice returned from Bytes() to avoid this issue:
for scanner.Scan() {
bs = append(bs, append([]byte(nil), scanner.Bytes()...))
}

How do I read in a large flat file

I have a flat file that has 339276 line of text in it for a size of 62.1 MB. I am attempting to read in all the lines, parse them based on some conditions I have and then insert them into a database.
I originally attempted to use a bufio.Scan() loop and bufio.Text() to get the line but I was running out of buffer space. I switched to using bufio.ReadLine/ReadString/ReadByte (I tried each) and had the same problem with each. I didn't have enough buffer space.
I tried using read and setting the buffer size but as the document says it actually a const that can be made smaller but never bigger that 64*1024 bytes. I then tried to use File.ReadAt where I set the starting postilion and moved it along as I brought in each section to no avail. I have looked at the following examples and explanations (not an exhaustive list):
Read text file into string array (and write)
How to Read last lines from a big file with Go every 10 secs
reading file line by line in go
How do I read in an entire file (either line by line or the whole thing at once) into a slice so I can then go do things to the lines?
Here is some code that I have tried:
file, err := os.Open(feedFolder + value)
handleError(err)
defer file.Close()
// fileInfo, _ := file.Stat()
var linesInFile []string
r := bufio.NewReader(file)
for {
path, err := r.ReadLine("\n") // 0x0A separator = newline
linesInFile = append(linesInFile, path)
if err == io.EOF {
fmt.Printf("End Of File: %s", err)
break
} else if err != nil {
handleError(err) // if you return error
}
}
fmt.Println("Last Line: ", linesInFile[len(linesInFile)-1])
Here is something else I tried:
var fileSize int64 = fileInfo.Size()
fmt.Printf("File Size: %d\t", fileSize)
var bufferSize int64 = 1024 * 60
bytes := make([]byte, bufferSize)
var fullFile []byte
var start int64 = 0
var interationCounter int64 = 1
var currentErr error = nil
for currentErr != io.EOF {
_, currentErr = file.ReadAt(bytes, st)
fullFile = append(fullFile, bytes...)
start = (bufferSize * interationCounter) + 1
interationCounter++
}
fmt.Printf("Err: %s\n", currentErr)
fmt.Printf("fullFile Size: %s\n", len(fullFile))
fmt.Printf("Start: %d", start)
var currentLine []string
for _, value := range fullFile {
if string(value) != "\n" {
currentLine = append(currentLine, string(value))
} else {
singleLine := strings.Join(currentLine, "")
linesInFile = append(linesInFile, singleLine)
currentLine = nil
}
}
I am at a loss. Either I don't understand exactly how the buffer works or I don't understand something else. Thanks for reading.
bufio.Scan() and bufio.Text() in a loop perfectly works for me on a files with much larger size, so I suppose you have lines exceeded buffer capacity. Then
check your line ending
and which Go version you use path, err :=r.ReadLine("\n") // 0x0A separator = newline? Looks like func (b *bufio.Reader) ReadLine() (line []byte, isPrefix bool, err error) has return value isPrefix specifically for your use case
http://golang.org/pkg/bufio/#Reader.ReadLine
It's not clear that it's necessary to read in all the lines before parsing them and inserting them into a database. Try to avoid that.
You have a small file: "a flat file that has 339276 line of text in it for a size of 62.1 MB." For example,
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
)
func readLines(filename string) ([]string, error) {
var lines []string
file, err := ioutil.ReadFile(filename)
if err != nil {
return lines, err
}
buf := bytes.NewBuffer(file)
for {
line, err := buf.ReadString('\n')
if len(line) == 0 {
if err != nil {
if err == io.EOF {
break
}
return lines, err
}
}
lines = append(lines, line)
if err != nil && err != io.EOF {
return lines, err
}
}
return lines, nil
}
func main() {
// a flat file that has 339276 lines of text in it for a size of 62.1 MB
filename := "flat.file"
lines, err := readLines(filename)
fmt.Println(len(lines))
if err != nil {
fmt.Println(err)
return
}
}
It seems to me this variant of readLines is shorter and faster than suggested peterSO
func readLines(filename string) (map[int]string, error) {
lines := make(map[int]string)
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
for n, line := range strings.Split(string(data), "\n") {
lines[n] = line
}
return lines, nil
}
package main
import (
"fmt"
"os"
"log"
"bufio"
)
func main() {
FileName := "assets/file.txt"
file, err := os.Open(FileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
fmt.Println(scanner.Text())
}
}

How to write to a file in golang

i am trying to write to to a file. i read the whole content of the file and now i want to change the content of the file based on some word that i have got from the file. but when i check, the content of the file, it is still the same and it has not change. this is what i used
if strings.Contains(string(read), sam) {
fmt.Println("this file contain that word")
temp := strings.ToUpper(sam)
fmt.Println(temp)
err := ioutil.WriteFile(fi.Name(), []byte(temp), 0644)
} else {
fmt.Println(" the word is not in the file")
}
Considering that your call to ioutil.WriteFile() is consistent with what is used in "Go by Example: Writing Files", this should work.
But that Go by example article check the err just after the write call.
You check the err outside the scope of your test:
if matched {
read, err := ioutil.ReadFile(path)
//fmt.Println(string(read))
fmt.Println(" This is the name of the file", fi.Name())
if strings.Contains(string(read), sam) {
fmt.Println("this file contain that word")
Value := strings.ToUpper(sam)
fmt.Println(Value)
err = ioutil.WriteFile(fi.Name(), []byte(Value), 0644)
} else {
fmt.Println(" the word is not in the file")
}
check(err) <===== too late
}
The err you are testing is the one you got when reading the file (ioutil.ReadFile), because of blocks and scope.
You need to check the error right after the Write call
err = ioutil.WriteFile(fi.Name(), []byte(Value), 0644)
check(err) <===== too late
Since WriteFile overwrite the all file, you could strings.Replace() to replace your word by its upper case equivalent:
r := string(read)
r = strings.Replace(r, sam, strings.ToUpper(sam), -1)
err := ioutil.WriteFile(fi.Name(), []byte(r), 0644)
For a replace which is case insensitive, use a regexp as in "How do I do a case insensitive regular expression in Go?".
The, use func (*Regexp) ReplaceAllString:
re := regexp.MustCompile("(?i)\\b"+sam+"\\b")
r = re.ReplaceAllString(r, strings.ToUpper(sam))
err := ioutil.WriteFile(fi.Name(), []byte(r), 0644)
Note the \b: word boundary to find the any word starting and ending with sam content (instead of finding substrings containing sam content).
If you want to replace substrings, simply drop the \b:
re := regexp.MustCompile("(?i)"+sam)
It's not clear what you want to do. My best guess is something like this:
package main
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"os"
)
func UpdateWord(filename string, data, word []byte) (int, error) {
n := 0
f, err := os.OpenFile(filename, os.O_WRONLY, 0644)
if err != nil {
return n, err
}
uWord := bytes.ToUpper(word)
if len(word) < len(uWord) {
err := errors.New("Upper case longer than lower case:" + string(word))
return n, err
}
if len(word) > len(uWord) {
uWord = append(uWord, bytes.Repeat([]byte{' '}, len(word))...)[:len(word)]
}
off := int64(0)
for {
i := bytes.Index(data[off:], word)
if i < 0 {
break
}
off += int64(i)
_, err = f.WriteAt(uWord, off)
if err != nil {
return n, err
}
n++
off += int64(len(word))
}
f.Close()
if err != nil {
return n, err
}
return n, nil
}
func main() {
// Test file
filename := `ltoucase.txt`
// Create test file
lcase := []byte(`update a bc def ghij update klmno pqrstu update vwxyz update`)
perm := os.FileMode(0644)
err := ioutil.WriteFile(filename, lcase, perm)
if err != nil {
fmt.Println(err)
return
}
// Read test file
data, err := ioutil.ReadFile(filename)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(data))
// Update word in test file
word := []byte("update")
n, err := UpdateWord(filename, data, word)
if err != nil {
fmt.Println(n, err)
return
}
fmt.Println(filename, string(word), n)
data, err = ioutil.ReadFile(filename)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(data))
}
Output:
update a bc def ghij update klmno pqrstu update vwxyz update
ltoucase.txt update 4
UPDATE a bc def ghij UPDATE klmno pqrstu UPDATE vwxyz UPDATE

Resources