Reading CSV file in Go - go

Here is a code snippet that reads CSV file:
func parseLocation(file string) (map[string]Point, error) {
f, err := os.Open(file)
defer f.Close()
if err != nil {
return nil, err
}
lines, err := csv.NewReader(f).ReadAll()
if err != nil {
return nil, err
}
locations := make(map[string]Point)
for _, line := range lines {
name := line[0]
lat, laterr := strconv.ParseFloat(line[1], 64)
if laterr != nil {
return nil, laterr
}
lon, lonerr := strconv.ParseFloat(line[2], 64)
if lonerr != nil {
return nil, lonerr
}
locations[name] = Point{lat, lon}
}
return locations, nil
}
Is there a way to improve readability of this code? if and nil noise.

Go now has a csv package for this. Its is encoding/csv. You can find the docs here: https://golang.org/pkg/encoding/csv/
There are a couple of good examples in the docs. Here is a helper method I created to read a csv file and returns its records.
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
)
func readCsvFile(filePath string) [][]string {
f, err := os.Open(filePath)
if err != nil {
log.Fatal("Unable to read input file " + filePath, err)
}
defer f.Close()
csvReader := csv.NewReader(f)
records, err := csvReader.ReadAll()
if err != nil {
log.Fatal("Unable to parse file as CSV for " + filePath, err)
}
return records
}
func main() {
records := readCsvFile("../tasks.csv")
fmt.Println(records)
}

Go is a very verbose language, however you could use something like this:
// predeclare err
func parseLocation(file string) (locations map[string]*Point, err error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close() // this needs to be after the err check
lines, err := csv.NewReader(f).ReadAll()
if err != nil {
return nil, err
}
//already defined in declaration, no need for :=
locations = make(map[string]*Point, len(lines))
var lat, lon float64 //predeclare lat, lon
for _, line := range lines {
// shorter, cleaner and since we already have lat and err declared, we can do this.
if lat, err = strconv.ParseFloat(line[1], 64); err != nil {
return nil, err
}
if lon, err = strconv.ParseFloat(line[2], 64); err != nil {
return nil, err
}
locations[line[0]] = &Point{lat, lon}
}
return locations, nil
}
//edit
A more efficient and proper version was posted by #Dustin in the comments, I'm adding it here for completeness sake:
func parseLocation(file string) (map[string]*Point, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close()
csvr := csv.NewReader(f)
locations := map[string]*Point{}
for {
row, err := csvr.Read()
if err != nil {
if err == io.EOF {
err = nil
}
return locations, err
}
p := &Point{}
if p.lat, err = strconv.ParseFloat(row[1], 64); err != nil {
return nil, err
}
if p.lon, err = strconv.ParseFloat(row[2], 64); err != nil {
return nil, err
}
locations[row[0]] = p
}
}
playground

I basically copied my answer from here: https://www.dotnetperls.com/csv-go. For me, this was a better answer than what I found on stackoverflow.
import (
"bufio"
"encoding/csv"
"os"
"fmt"
"io"
)
func ReadCsvFile(filePath string) {
// Load a csv file.
f, _ := os.Open(filePath)
// Create a new reader.
r := csv.NewReader(f)
for {
record, err := r.Read()
// Stop at EOF.
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
// Display record.
// ... Display record length.
// ... Display all individual elements of the slice.
fmt.Println(record)
fmt.Println(len(record))
for value := range record {
fmt.Printf(" %v\n", record[value])
}
}
}

I also dislike the verbosity of the default Reader, so I made a new type that is
similar to bufio#Scanner:
package main
import "encoding/csv"
import "io"
type Scanner struct {
Reader *csv.Reader
Head map[string]int
Row []string
}
func NewScanner(o io.Reader) Scanner {
csv_o := csv.NewReader(o)
a, e := csv_o.Read()
if e != nil {
return Scanner{}
}
m := map[string]int{}
for n, s := range a {
m[s] = n
}
return Scanner{Reader: csv_o, Head: m}
}
func (o *Scanner) Scan() bool {
a, e := o.Reader.Read()
o.Row = a
return e == nil
}
func (o Scanner) Text(s string) string {
return o.Row[o.Head[s]]
}
Example:
package main
import "strings"
func main() {
s := `Month,Day
January,Sunday
February,Monday`
o := NewScanner(strings.NewReader(s))
for o.Scan() {
println(o.Text("Month"), o.Text("Day"))
}
}
https://golang.org/pkg/encoding/csv

You can also read contents of a directory to load all the CSV files. And then read all those CSV files 1 by 1 with goroutines
csv file:
101,300.00,11000901,1155686400
102,250.99,11000902,1432339200
main.go file:
const sourcePath string = "./source"
func main() {
dir, _ := os.Open(sourcePath)
files, _ := dir.Readdir(-1)
for _, file := range files {
fmt.Println("SINGLE FILE: ")
fmt.Println(file.Name())
filePath := sourcePath + "/" + file.Name()
f, _ := os.Open(filePath)
defer f.Close()
// os.Remove(filePath)
//func
go func(file io.Reader) {
records, _ := csv.NewReader(file).ReadAll()
for _, row := range records {
fmt.Println(row)
}
}(f)
time.Sleep(10 * time.Millisecond)// give some time to GO routines for execute
}
}
And the OUTPUT will be:
$ go run main.go
SINGLE FILE:
batch01.csv
[101 300.00 11000901 1155686400]
[102 250.99 11000902 1432339200]
----------------- -------------- ---------------------- -------
---------------- ------------------- ----------- --------------
Below example with the Invoice struct
func main() {
dir, _ := os.Open(sourcePath)
files, _ := dir.Readdir(-1)
for _, file := range files {
fmt.Println("SINGLE FILE: ")
fmt.Println(file.Name())
filePath := sourcePath + "/" + file.Name()
f, _ := os.Open(filePath)
defer f.Close()
go func(file io.Reader) {
records, _ := csv.NewReader(file).ReadAll()
for _, row := range records {
invoice := new(Invoice)
invoice.InvoiceNumber = row[0]
invoice.Amount, _ = strconv.ParseFloat(row[1], 64)
invoice.OrderID, _ = strconv.Atoi(row[2])
unixTime, _ := strconv.ParseInt(row[3], 10, 64)
invoice.Date = time.Unix(unixTime, 0)
fmt.Printf("Received invoice `%v` for $ %.2f \n", invoice.InvoiceNumber, invoice.Amount)
}
}(f)
time.Sleep(10 * time.Millisecond)
}
}
type Invoice struct {
InvoiceNumber string
Amount float64
OrderID int
Date time.Time
}

Related

how to get multiple line inputs in golang - interview coding

For the below type of inputs in golang coding interviews, what is the best way to get the input?
Input:
3
hello elloh
test estt
tier riet
I found two methods:
Method 1:
reader := bufio.NewReader(os.Stdin)
var lines []string
for {
line,err := reader.ReadString('\n') //this reads only one read
if err != nil {
log.Fatal(err)
}
if len(strings.TrimSpace(line)) == 0 {
break
}
line_s := strings.Split(line, " ")
lines = append(lines, line_s...)
}
Method 2:
bytes, err := ioutil.ReadAll(os.Stdin)
fmt.Println(len(bytes))
if err == nil {
input := strings.Split(string(bytes), "\n")
count, _ := strconv.Atoi(input[0])
fmt.Println(input)
var lines []string
for i := 1; i < count; i++ {
line := strings.Split(input[i], " ")
lines = append(lines, line...)
}
fmt.Println(lines)
}
But not sure how to end getting input from stdin in Method2.
Please suggest the best method to get input.
Use bufio.Scanner to read input. Use a function to encapsulate complexity and implementation details. For example,
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func readData(s *bufio.Scanner) ([][]string, error) {
var data [][]string
if !s.Scan() {
return nil, s.Err()
}
nLine, err := strconv.Atoi(strings.TrimSpace(s.Text()))
if err != nil {
return nil, err
}
for ; nLine > 0 && s.Scan(); nLine-- {
data = append(data, strings.Fields(s.Text()))
}
if err := s.Err(); err != nil {
return nil, err
}
if nLine != 0 {
err := fmt.Errorf("missing %d lines of data", nLine)
return nil, err
}
return data, nil
}
func main() {
s := bufio.NewScanner(os.Stdin)
data, err := readData(s)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
fmt.Println(len(data))
for _, datum := range data {
fmt.Println(datum)
}
}
https://go.dev/play/p/0Xwp3-hwGyK
3
hello elloh
test estt
tier riet
3
[hello elloh]
[test estt]
[tier riet]

Why is the file empty after writing to it with bufio.Writer?

file, err := os.OpenFile("filename.db", os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatal(err)
}
defer file.Close()
res := 0
writer := bufio.NewWriter(file)
for _, data := range manager {
bin, err := json.Marshal(data)
if err != nil {
log.Println(err)
return
}
res++
if debug {
log.Println(res)
}
fmt.Printf("%s\n", bin)
_, err = writer.Write(bin)
if err != nil {
log.Println(err)
}
_, _ = writer.WriteRune('\n')
}
playground
full code
The file filename.db is created (if didn't exist), but ...is empty...
Why could this happen?
Why is the file empty?
I tried this both on my home pc and a linux server
And in both cases it's empty
As per the suggestion from comment using writer.Flush results in foo and bar values being written in to the document filename.db.
package main
import (
"bufio"
"encoding/json"
"fmt"
"log"
"os"
)
type Valuable struct {
Value string `json:"value"`
}
var debug = true
var manager []Valuable
func main() {
manager = append(manager, Valuable{"foo"}, Valuable{"bar"})
file, err := os.OpenFile("filename.db", os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatal(err)
}
defer file.Close()
res := 0
writer := bufio.NewWriter(file)
defer writer.Flush()
for _, data := range manager {
bin, err := json.Marshal(data)
if err != nil {
log.Println(err)
return
}
res++
if debug {
log.Println(res)
}
fmt.Printf("%s\n", bin)
_, err = writer.Write(bin)
if err != nil {
log.Println(err)
}
_, _ = writer.WriteRune('\n')
}
}

How to skip the first row when reading a csv file?

I have an awkward csv file and I need to skip the first row to read it.
I'm doing this easily with python/pandas
df = pd.read_csv(filename, skiprows=1)
but I don't know how to do it in Go.
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
)
type mwericsson struct {
id string
name string
region string
}
func main() {
rows := readSample()
fmt.Println(rows)
//appendSum(rows)
//writeChanges(rows)
}
func readSample() [][]string {
f, err := os.Open("D:/in/20190629/PM_IG30014_15_201906290015_01.csv")
if err != nil {
log.Fatal(err)
}
rows, err := csv.NewReader(f).ReadAll()
f.Close()
if err != nil {
log.Fatal(err)
}
return rows
}
Error:
2019/07/01 12:38:40 record on line 2: wrong number of fields
PM_IG30014_15_201906290015_01.csv:
PTN Ethernet-Port RMON Performance,PORT_BW_UTILIZATION,2019-06-29 20:00:00,33366
DeviceID,DeviceName,ResourceName,CollectionTime,GranularityPeriod,PORT_RX_BW_UTILIZATION,PORT_TX_BW_UTILIZATION,RXGOODFULLFRAMESPEED,TXGOODFULLFRAMESPEED,PORT_RX_BW_UTILIZATION_MAX,PORT_TX_BW_UTILIZATION_MAX
3174659,H1095,H1095-11-ISM6-1(to ZJBSC-V1),2019-06-29 20:00:00,15,22.08,4.59,,,30.13,6.98
3174659,H1095,H1095-14-ISM6-1(to T6147-V),2019-06-29 20:00:00,15,2.11,10.92,,,4.43,22.45
skip the first row when reading a csv file
For example,
package main
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"os"
)
func readSample(rs io.ReadSeeker) ([][]string, error) {
// Skip first row (line)
row1, err := bufio.NewReader(rs).ReadSlice('\n')
if err != nil {
return nil, err
}
_, err = rs.Seek(int64(len(row1)), io.SeekStart)
if err != nil {
return nil, err
}
// Read remaining rows
r := csv.NewReader(rs)
rows, err := r.ReadAll()
if err != nil {
return nil, err
}
return rows, nil
}
func main() {
f, err := os.Open("sample.csv")
if err != nil {
panic(err)
}
defer f.Close()
rows, err := readSample(f)
if err != nil {
panic(err)
}
fmt.Println(rows)
}
Output:
$ cat sample.csv
one,two,three,four
1,2,3
4,5,6
$ go run sample.go
[[1 2 3] [4 5 6]]
$
$ cat sample.csv
PTN Ethernet-Port RMON Performance,PORT_BW_UTILIZATION,2019-06-29 20:00:00,33366
DeviceID,DeviceName,ResourceName,CollectionTime,GranularityPeriod,PORT_RX_BW_UTILIZATION,PORT_TX_BW_UTILIZATION,RXGOODFULLFRAMESPEED,TXGOODFULLFRAMESPEED,PORT_RX_BW_UTILIZATION_MAX,PORT_TX_BW_UTILIZATION_MAX
3174659,H1095,H1095-11-ISM6-1(to ZJBSC-V1),2019-06-29 20:00:00,15,22.08,4.59,,,30.13,6.98
3174659,H1095,H1095-14-ISM6-1(to T6147-V),2019-06-29 20:00:00,15,2.11,10.92,,,4.43,22.45
$ go run sample.go
[[DeviceID DeviceName ResourceName CollectionTime GranularityPeriod PORT_RX_BW_UTILIZATION PORT_TX_BW_UTILIZATION RXGOODFULLFRAMESPEED TXGOODFULLFRAMESPEED PORT_RX_BW_UTILIZATION_MAX PORT_TX_BW_UTILIZATION_MAX] [3174659 H1095 H1095-11-ISM6-1(to ZJBSC-V1) 2019-06-29 20:00:00 15 22.08 4.59 30.13 6.98] [3174659 H1095 H1095-14-ISM6-1(to T6147-V) 2019-06-29 20:00:00 15 2.11 10.92 4.43 22.45]]
$
Simply call Reader.Read() to read a line, then proceed to read the rest with Reader.ReadAll().
See this example:
src := "one,two,three\n1,2,3\n4,5,6"
r := csv.NewReader(strings.NewReader(src))
if _, err := r.Read(); err != nil {
panic(err)
}
records, err := r.ReadAll()
if err != nil {
panic(err)
}
fmt.Println(records)
Output (try it on the Go Playground):
[[1 2 3] [4 5 6]]
while it was informative to learn about io.ReadSeeker, I think a simpler way to skip the first line/row (often times the header) of a csv is to use the slice functionality as follows:
func readCsv(filename string) [][]string {
f, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer f.Close()
records := [][]string{}
r := csv.NewReader(f)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
records = append(records, record)
}
return records[1:] // skip the header
}
we can just use bufio.ReadBytes('\n') and pass bufio as Reader to csv.NewReader
func readSample(reader io.Reader) ([][]string, error) {
// if reader is bufio, we don't need to NewReader againg
buf, ok := (reader).(*bufio.Reader)
if !ok {
buf = bufio.NewReader(reader)
}
_, err := buf.ReadBytes('\n')
if err != nil {
return nil, err
}
rows, err := csv.NewReader(buf).ReadAll()
if err != nil {
return nil, err
}
return rows, nil
}

Read and merge two Yaml files in go language

Assuming we have two yaml files
master.yaml
someProperty: "someVaue"
anotherProperty: "anotherValue"
override.yaml
someProperty: "overriddenVaue"
Is it possible to unmarshall, merge, and then write those changes to a file without having to define a struct for every property in the yaml file?
The master file has over 500 properties in it that are not at all important to the service at this point of execution, so ideally I'd be able to just unmarshal into a map, do a merge and write out in yaml again but I'm relatively new to go so wanted some opinions.
I've got some code to read the yaml into an interface but i'm unsure on the best approach to then merge the two.
var masterYaml interface{}
yamlBytes, _ := ioutil.ReadFile("master.yaml")
yaml.Unmarshal(yamlBytes, &masterYaml)
var overrideYaml interface{}
yamlBytes, _ = ioutil.ReadFile("override.yaml")
yaml.Unmarshal(yamlBytes, &overrideYaml)
I've looked into libraries like mergo but i'm not sure if that's the right approach.
I'm hoping that after the master I would be able to write out to file with properties
someProperty: "overriddenVaue"
anotherProperty: "anotherValue"
Assuming that you just want to merge at the top level, you can unmarshal into maps of type map[string]interface{}, as follows:
package main
import (
"io/ioutil"
"gopkg.in/yaml.v2"
)
func main() {
var master map[string]interface{}
bs, err := ioutil.ReadFile("master.yaml")
if err != nil {
panic(err)
}
if err := yaml.Unmarshal(bs, &master); err != nil {
panic(err)
}
var override map[string]interface{}
bs, err = ioutil.ReadFile("override.yaml")
if err != nil {
panic(err)
}
if err := yaml.Unmarshal(bs, &override); err != nil {
panic(err)
}
for k, v := range override {
master[k] = v
}
bs, err = yaml.Marshal(master)
if err != nil {
panic(err)
}
if err := ioutil.WriteFile("merged.yaml", bs, 0644); err != nil {
panic(err)
}
}
For a broader solution (with n input files), you can use this function. I have used #robox answer to do my solution:
func ReadValues(filenames ...string) (string, error) {
if len(filenames) <= 0 {
return "", errors.New("You must provide at least one filename for reading Values")
}
var resultValues map[string]interface{}
for _, filename := range filenames {
var override map[string]interface{}
bs, err := ioutil.ReadFile(filename)
if err != nil {
log.Info(err)
continue
}
if err := yaml.Unmarshal(bs, &override); err != nil {
log.Info(err)
continue
}
//check if is nil. This will only happen for the first filename
if resultValues == nil {
resultValues = override
} else {
for k, v := range override {
resultValues[k] = v
}
}
}
bs, err := yaml.Marshal(resultValues)
if err != nil {
log.Info(err)
return "", err
}
return string(bs), nil
}
So for this example you should call it with this order:
result, _ := ReadValues("master.yaml", "overwrite.yaml")
In the case you have an extra file newFile.yaml, you could also use this function:
result, _ := ReadValues("master.yaml", "overwrite.yaml", "newFile.yaml")
DEEP MERGE TWO YAML FILES
package main
import (
"fmt"
"io/ioutil"
"sigs.k8s.io/yaml"
)
func main() {
// declare two map to hold the yaml content
base := map[string]interface{}{}
currentMap := map[string]interface{}{}
// read one yaml file
data, _ := ioutil.ReadFile("conf.yaml")
if err := yaml.Unmarshal(data, &base); err != nil {
}
// read another yaml file
data1, _ := ioutil.ReadFile("conf1.yaml")
if err := yaml.Unmarshal(data1, &currentMap); err != nil {
}
// merge both yaml data recursively
base = mergeMaps(base, currentMap)
// print merged map
fmt.Println(base)
}
func mergeMaps(a, b map[string]interface{}) map[string]interface{} {
out := make(map[string]interface{}, len(a))
for k, v := range a {
out[k] = v
}
for k, v := range b {
if v, ok := v.(map[string]interface{}); ok {
if bv, ok := out[k]; ok {
if bv, ok := bv.(map[string]interface{}); ok {
out[k] = mergeMaps(bv, v)
continue
}
}
}
out[k] = v
}
return out
}

No output to error file

I'm coding a little Go program.
It reads files in a directory line by line, it only reads lines with a certain prefix, normalizes the data and outputs to one of two files, depending on whether the normalized record has certain number of elements.
Data is being outputted to the Data file, but errors are not being outputted to the Errors file.
Debugging I see no issue.
Any help is much appreciated.
Thanks,
Martin
package main
import (
"bufio"
"fmt"
"io/ioutil"
"log"
"os"
"strings"
)
func main() {
//Output file - Data
if _, err := os.Stat("allData.txt"); os.IsNotExist(err) {
var file, err = os.Create("allData.txt")
if err != nil {
fmt.Println(err)
return
}
defer file.Close()
}
file, err := os.OpenFile("allData.txt", os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
panic(err)
}
w := bufio.NewWriter(file)
//Output file - Errors
if _, err := os.Stat("errorData.txt"); os.IsNotExist(err) {
var fileError, err = os.Create("errorData.txt")
if err != nil {
fmt.Println(err)
return
}
defer fileError.Close()
}
fileError, err := os.OpenFile("errorData.txt", os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
panic(err)
}
z := bufio.NewWriter(fileError)
//Read Directory
files, err := ioutil.ReadDir("../")
if err != nil {
log.Fatal(err)
}
//Build file path
for _, f := range files {
fName := string(f.Name())
sPath := string("../" + fName)
sFile, err := os.Open(sPath)
if err != nil {
fmt.Println(err)
return
}
//Create scanner
scanner := bufio.NewScanner(sFile)
scanner.Split(bufio.ScanLines)
var lines []string
// This is the buffer now
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
for _, line := range lines {
sRecordC := strings.HasPrefix((line), "DATA:")
if sRecordC {
splitted := strings.Split(line, " ")
splittedNoSpaces := deleteEmpty(splitted)
if len(splittedNoSpaces) == 11 {
splittedString := strings.Join(splittedNoSpaces, " ")
sFinalRecord := string(splittedString + "\r\n")
if _, err = fmt.Fprintf(w, sFinalRecord); err != nil {
}
}
if len(splittedNoSpaces) < 11 {
splitted := strings.Split(line, " ")
splittedNoSpaces := deleteEmpty(splitted)
splittedString := strings.Join(splittedNoSpaces, " ")
sFinalRecord := string(splittedString + "\r\n")
if _, err = fmt.Fprintf(z, sFinalRecord); err != nil {
}
err = fileError.Sync()
if err != nil {
log.Fatal(err)
}
}
}
}
}
err = file.Sync()
if err != nil {
log.Fatal(err)
}
}
//Delete Empty array elements
func deleteEmpty(s []string) []string {
var r []string
for _, str := range s {
if str != "" {
r = append(r, str)
}
}
return r
}
Don't open the file multiple times, and don't check for the file's existence before creating it, just use the os.O_CREATE flag. You're also not deferring the correct os.File.Close call, because it's opened multiple times.
When using a bufio.Writer, you should always call Flush() to ensure that all data has been written to the underlying io.Writer.

Resources