How to skip the first row when reading a csv file? - go

I have an awkward csv file and I need to skip the first row to read it.
I'm doing this easily with python/pandas
df = pd.read_csv(filename, skiprows=1)
but I don't know how to do it in Go.
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
)
type mwericsson struct {
id string
name string
region string
}
func main() {
rows := readSample()
fmt.Println(rows)
//appendSum(rows)
//writeChanges(rows)
}
func readSample() [][]string {
f, err := os.Open("D:/in/20190629/PM_IG30014_15_201906290015_01.csv")
if err != nil {
log.Fatal(err)
}
rows, err := csv.NewReader(f).ReadAll()
f.Close()
if err != nil {
log.Fatal(err)
}
return rows
}
Error:
2019/07/01 12:38:40 record on line 2: wrong number of fields
PM_IG30014_15_201906290015_01.csv:
PTN Ethernet-Port RMON Performance,PORT_BW_UTILIZATION,2019-06-29 20:00:00,33366
DeviceID,DeviceName,ResourceName,CollectionTime,GranularityPeriod,PORT_RX_BW_UTILIZATION,PORT_TX_BW_UTILIZATION,RXGOODFULLFRAMESPEED,TXGOODFULLFRAMESPEED,PORT_RX_BW_UTILIZATION_MAX,PORT_TX_BW_UTILIZATION_MAX
3174659,H1095,H1095-11-ISM6-1(to ZJBSC-V1),2019-06-29 20:00:00,15,22.08,4.59,,,30.13,6.98
3174659,H1095,H1095-14-ISM6-1(to T6147-V),2019-06-29 20:00:00,15,2.11,10.92,,,4.43,22.45

skip the first row when reading a csv file
For example,
package main
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"os"
)
func readSample(rs io.ReadSeeker) ([][]string, error) {
// Skip first row (line)
row1, err := bufio.NewReader(rs).ReadSlice('\n')
if err != nil {
return nil, err
}
_, err = rs.Seek(int64(len(row1)), io.SeekStart)
if err != nil {
return nil, err
}
// Read remaining rows
r := csv.NewReader(rs)
rows, err := r.ReadAll()
if err != nil {
return nil, err
}
return rows, nil
}
func main() {
f, err := os.Open("sample.csv")
if err != nil {
panic(err)
}
defer f.Close()
rows, err := readSample(f)
if err != nil {
panic(err)
}
fmt.Println(rows)
}
Output:
$ cat sample.csv
one,two,three,four
1,2,3
4,5,6
$ go run sample.go
[[1 2 3] [4 5 6]]
$
$ cat sample.csv
PTN Ethernet-Port RMON Performance,PORT_BW_UTILIZATION,2019-06-29 20:00:00,33366
DeviceID,DeviceName,ResourceName,CollectionTime,GranularityPeriod,PORT_RX_BW_UTILIZATION,PORT_TX_BW_UTILIZATION,RXGOODFULLFRAMESPEED,TXGOODFULLFRAMESPEED,PORT_RX_BW_UTILIZATION_MAX,PORT_TX_BW_UTILIZATION_MAX
3174659,H1095,H1095-11-ISM6-1(to ZJBSC-V1),2019-06-29 20:00:00,15,22.08,4.59,,,30.13,6.98
3174659,H1095,H1095-14-ISM6-1(to T6147-V),2019-06-29 20:00:00,15,2.11,10.92,,,4.43,22.45
$ go run sample.go
[[DeviceID DeviceName ResourceName CollectionTime GranularityPeriod PORT_RX_BW_UTILIZATION PORT_TX_BW_UTILIZATION RXGOODFULLFRAMESPEED TXGOODFULLFRAMESPEED PORT_RX_BW_UTILIZATION_MAX PORT_TX_BW_UTILIZATION_MAX] [3174659 H1095 H1095-11-ISM6-1(to ZJBSC-V1) 2019-06-29 20:00:00 15 22.08 4.59 30.13 6.98] [3174659 H1095 H1095-14-ISM6-1(to T6147-V) 2019-06-29 20:00:00 15 2.11 10.92 4.43 22.45]]
$

Simply call Reader.Read() to read a line, then proceed to read the rest with Reader.ReadAll().
See this example:
src := "one,two,three\n1,2,3\n4,5,6"
r := csv.NewReader(strings.NewReader(src))
if _, err := r.Read(); err != nil {
panic(err)
}
records, err := r.ReadAll()
if err != nil {
panic(err)
}
fmt.Println(records)
Output (try it on the Go Playground):
[[1 2 3] [4 5 6]]

while it was informative to learn about io.ReadSeeker, I think a simpler way to skip the first line/row (often times the header) of a csv is to use the slice functionality as follows:
func readCsv(filename string) [][]string {
f, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer f.Close()
records := [][]string{}
r := csv.NewReader(f)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
records = append(records, record)
}
return records[1:] // skip the header
}

we can just use bufio.ReadBytes('\n') and pass bufio as Reader to csv.NewReader
func readSample(reader io.Reader) ([][]string, error) {
// if reader is bufio, we don't need to NewReader againg
buf, ok := (reader).(*bufio.Reader)
if !ok {
buf = bufio.NewReader(reader)
}
_, err := buf.ReadBytes('\n')
if err != nil {
return nil, err
}
rows, err := csv.NewReader(buf).ReadAll()
if err != nil {
return nil, err
}
return rows, nil
}

Related

Creating/Writing/Reading CSV file in Golang

The following code should create a CSV file and write the employee data to it, then read it out in the terminal. The code will create the file and not print the contents in the terminal. If I take the last chunk of code out to print the contents, it will work as an independent program as long as the CSV file is present. Why does the code not work when written all as one?
package main
import (
"encoding/csv"
"fmt"
"os"
"sort"
)
type Employee struct {
Name string
Age int
Salary float64
}
func main() {
employees := []Employee{
{"Fred", 44, 56000.00},
{"Amy", 25, 65000.00},
{"Zack", 29, 20400.00},
{"Jerry", 73, 120500.00},
}
sort.Slice(employees, func(i, j int) bool {
return employees[i].Name < employees[j].Name
})
file, err := os.Create("employees.csv")
if err != nil {
panic(err)
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
header := []string{"Name", "Age", "Salary"}
if err := writer.Write(header); err != nil {
panic(err)
}
for _, employee := range employees {
record := []string{employee.Name, fmt.Sprintf("%d", employee.Age), fmt.Sprintf("%.2f", employee.Salary)}
if err := writer.Write(record); err != nil {
panic(err)
}
}
file, err = os.Open("employees.csv")
if err != nil {
panic(err)
}
defer file.Close()
reader := csv.NewReader(file)
records, err := reader.ReadAll()
if err != nil {
panic(err)
}
for _, record := range records {
fmt.Println(record)
}
}
//This code will print the contents of the CSV correctly when ran separately
package main
import (
"encoding/csv"
"fmt"
"os"
)
func main() {
file, err := os.Open("employees.csv")
if err != nil {
fmt.Println("Error opening file:", err)
return
}
defer file.Close()
reader := csv.NewReader(file)
reader.FieldsPerRecord = -1
records, err := reader.ReadAll()
if err != nil {
fmt.Println("Error reading CSV data:", err)
return
}
for _, record := range records {
fmt.Println(record)
}
}
The first program will create the CSV file with the expected cells, but instead of printing the contents to the terminal, the program completes.

Why is the file empty after writing to it with bufio.Writer?

file, err := os.OpenFile("filename.db", os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatal(err)
}
defer file.Close()
res := 0
writer := bufio.NewWriter(file)
for _, data := range manager {
bin, err := json.Marshal(data)
if err != nil {
log.Println(err)
return
}
res++
if debug {
log.Println(res)
}
fmt.Printf("%s\n", bin)
_, err = writer.Write(bin)
if err != nil {
log.Println(err)
}
_, _ = writer.WriteRune('\n')
}
playground
full code
The file filename.db is created (if didn't exist), but ...is empty...
Why could this happen?
Why is the file empty?
I tried this both on my home pc and a linux server
And in both cases it's empty
As per the suggestion from comment using writer.Flush results in foo and bar values being written in to the document filename.db.
package main
import (
"bufio"
"encoding/json"
"fmt"
"log"
"os"
)
type Valuable struct {
Value string `json:"value"`
}
var debug = true
var manager []Valuable
func main() {
manager = append(manager, Valuable{"foo"}, Valuable{"bar"})
file, err := os.OpenFile("filename.db", os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatal(err)
}
defer file.Close()
res := 0
writer := bufio.NewWriter(file)
defer writer.Flush()
for _, data := range manager {
bin, err := json.Marshal(data)
if err != nil {
log.Println(err)
return
}
res++
if debug {
log.Println(res)
}
fmt.Printf("%s\n", bin)
_, err = writer.Write(bin)
if err != nil {
log.Println(err)
}
_, _ = writer.WriteRune('\n')
}
}

Using Go to create text files from columns of a csv data frame

I am trying to loop over a csv file and output text file titled after each row in the first column. Each text file is then be populated with data from the other rows for that column. I am able to print the contents of the csv to a text file, but I can not get the logic down using a for loop to grab the index of column one and use that to create/title a new .txt file.
package main
import (
"fmt"
"io"
"io/ioutil"
"log"
"os"
)
func main() {
fmt.Println("Enter file path to CSV: ")
var csvFile string
_, err := fmt.Scanln(&csvFile)
if err != nil {
log.Fatal("Cannot read input")
return
}
//open file
inFile, err := os.Open(csvFile)
if err != nil {
log.Fatal(err)
}
defer inFile.Close()
readMe, _ := ioutil.ReadAll(inFile)
blankFile, err := os.Create(`C:\temp\test.txt`)
if err != nil {
log.Fatal(err)
}
defer blankFile.Close()
//write data to text file
outFile, err := blankFile.Write(readMe)
if err == io.EOF {
log.Fatalln("Failed")
} else if err != nil {
log.Fatal(err)
}
//print bytes total
fmt.Println(outFile, " bytes printed")
}
Take multiple columns from a csv and print each column to a new text
file.
Loop over a csv and produce a new text file that will be titled after
each column in row #1. Each text file will then be populated with data
from the other rows for that column.
For example,
package main
import (
"encoding/csv"
"fmt"
"io"
"os"
"path/filepath"
)
func CsvFileToTxtFiles(inFile string) error {
in, err := os.Open(inFile)
if err != nil {
return err
}
defer in.Close()
r := csv.NewReader(in)
hdr, err := r.Read()
if err != nil {
return err
}
f := make([]*os.File, len(hdr))
w := make([]*csv.Writer, len(hdr))
pfx := filepath.Clean(inFile)
pfx = pfx[:len(pfx)-len(filepath.Ext(pfx))]
for i, col := range hdr {
var err error
f[i], err = os.Create(pfx + "." + col + ".txt")
if err != nil {
return err
}
defer f[i].Close()
w[i] = csv.NewWriter(f[i])
if err != nil {
return err
}
defer w[i].Flush()
}
for {
row, err := r.Read()
if err != nil {
if row == nil && err == io.EOF {
break
}
return err
}
for i, col := range row {
err := w[i].Write([]string{col})
if err != nil {
return err
}
}
}
for i := range hdr {
var err error
w[i].Flush()
err = w[i].Error()
if err != nil {
return err
}
err = f[i].Close()
if err != nil {
return err
}
}
return nil
}
func main() {
if len(os.Args) <= 1 {
usage := "usage: " + filepath.Base(os.Args[0]) + " FILE"
fmt.Fprintln(os.Stderr, usage)
return
}
inFile := os.Args[1]
err := CsvFileToTxtFiles(inFile)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
}
Output:
$ cat ioj.test.csv
one,two,three
1,2,3
11,22,33
$ go run ioj.go ioj.test.csv
$ cat ioj.test.one.txt
1
11
$ cat ioj.test.two.txt
2
22
$ cat ioj.test.three.txt
3
33
$

Golang: fetching data from 1 CSV File to anthoer

I am new to golang, and I am trying to fetch 1 csv file to another new csv file, but i need only 2 records from the old csv file.
How would you fetch only the first two records of that file?
Here is what I have tried so far (also in the play.golang.org):
package main
import (
"encoding/csv"
"fmt"
"io"
"os"
)
func main() {
//SELECTING THE FILE TO EXTRACT.......
csvfile1, err := os.Open("data/sample.csv")
if err != nil {
fmt.Println(err)
return
}
defer csvfile1.Close()
reader := csv.NewReader(csvfile1)
for i := 0; i < 3; i++ {
record, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
fmt.Println(err)
return
}
csvfile2, err := os.Create("data/SingleColomReading.csv")
if err != nil {
fmt.Println(err)
return
}
defer csvfile2.Close()
records := []string{
record,
}
writer := csv.NewWriter(csvfile2)
//fmt.Println(writer)
for _, single := range records {
er := writer.Write(single)
if er != nil {
fmt.Println("error", er)
return
}
fmt.Println(single)
writer.Flush()
//fmt.Println(records)
//a:=strconv.Itoa(single)
n, er2 := csvfile2.WriteString(single)
if er2 != nil {
fmt.Println(n, er2)
}
}
}
}
Fixing your program,
package main
import (
"encoding/csv"
"fmt"
"io"
"os"
)
func main() {
csvfile1, err := os.Open("data/sample.csv")
if err != nil {
fmt.Println(err)
return
}
defer csvfile1.Close()
reader := csv.NewReader(csvfile1)
csvfile2, err := os.Create("data/SingleColomReading.csv")
if err != nil {
fmt.Println(err)
return
}
writer := csv.NewWriter(csvfile2)
for i := 0; i < 2; i++ {
record, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
fmt.Println(err)
return
}
err = writer.Write(record)
if err != nil {
fmt.Println(err)
return
}
}
writer.Flush()
err = csvfile2.Close()
if err != nil {
fmt.Println(err)
return
}
}
However, since you are only interested in copying records (lines) as a whole and not individual fields of a record, you could use bufio.Scanner, as #VonC suggested. For example,
package main
import (
"bufio"
"fmt"
"os"
)
func main() {
csvfile1, err := os.Open("data/sample.csv")
if err != nil {
fmt.Println(err)
return
}
defer csvfile1.Close()
scanner := bufio.NewScanner(csvfile1)
csvfile2, err := os.Create("data/SingleColomReading.csv")
if err != nil {
fmt.Println(err)
return
}
writer := bufio.NewWriter(csvfile2)
nRecords := 0
for scanner.Scan() {
n, err := writer.Write(scanner.Bytes())
if err != nil {
fmt.Println(n, err)
return
}
err = writer.WriteByte('\n')
if err != nil {
fmt.Println(err)
return
}
if nRecords++; nRecords >= 2 {
break
}
}
if err := scanner.Err(); err != nil {
fmt.Println(err)
return
}
err = writer.Flush()
if err != nil {
fmt.Println(err)
return
}
err = csvfile2.Close()
if err != nil {
fmt.Println(err)
return
}
}
It owuld be easier to:
read your csv file into a string array (one line per element), for the two first lines only
var lines []string
scanner := bufio.NewScanner(file)
nblines := 0
for scanner.Scan() {
lines = append(lines, scanner.Text())
if nblines++; nblines >= 2 {
break
}
}
Then you can use a range lines to write those two lines in the destination file.
lines includes at most 2 elements.

Reading CSV file in Go

Here is a code snippet that reads CSV file:
func parseLocation(file string) (map[string]Point, error) {
f, err := os.Open(file)
defer f.Close()
if err != nil {
return nil, err
}
lines, err := csv.NewReader(f).ReadAll()
if err != nil {
return nil, err
}
locations := make(map[string]Point)
for _, line := range lines {
name := line[0]
lat, laterr := strconv.ParseFloat(line[1], 64)
if laterr != nil {
return nil, laterr
}
lon, lonerr := strconv.ParseFloat(line[2], 64)
if lonerr != nil {
return nil, lonerr
}
locations[name] = Point{lat, lon}
}
return locations, nil
}
Is there a way to improve readability of this code? if and nil noise.
Go now has a csv package for this. Its is encoding/csv. You can find the docs here: https://golang.org/pkg/encoding/csv/
There are a couple of good examples in the docs. Here is a helper method I created to read a csv file and returns its records.
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
)
func readCsvFile(filePath string) [][]string {
f, err := os.Open(filePath)
if err != nil {
log.Fatal("Unable to read input file " + filePath, err)
}
defer f.Close()
csvReader := csv.NewReader(f)
records, err := csvReader.ReadAll()
if err != nil {
log.Fatal("Unable to parse file as CSV for " + filePath, err)
}
return records
}
func main() {
records := readCsvFile("../tasks.csv")
fmt.Println(records)
}
Go is a very verbose language, however you could use something like this:
// predeclare err
func parseLocation(file string) (locations map[string]*Point, err error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close() // this needs to be after the err check
lines, err := csv.NewReader(f).ReadAll()
if err != nil {
return nil, err
}
//already defined in declaration, no need for :=
locations = make(map[string]*Point, len(lines))
var lat, lon float64 //predeclare lat, lon
for _, line := range lines {
// shorter, cleaner and since we already have lat and err declared, we can do this.
if lat, err = strconv.ParseFloat(line[1], 64); err != nil {
return nil, err
}
if lon, err = strconv.ParseFloat(line[2], 64); err != nil {
return nil, err
}
locations[line[0]] = &Point{lat, lon}
}
return locations, nil
}
//edit
A more efficient and proper version was posted by #Dustin in the comments, I'm adding it here for completeness sake:
func parseLocation(file string) (map[string]*Point, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close()
csvr := csv.NewReader(f)
locations := map[string]*Point{}
for {
row, err := csvr.Read()
if err != nil {
if err == io.EOF {
err = nil
}
return locations, err
}
p := &Point{}
if p.lat, err = strconv.ParseFloat(row[1], 64); err != nil {
return nil, err
}
if p.lon, err = strconv.ParseFloat(row[2], 64); err != nil {
return nil, err
}
locations[row[0]] = p
}
}
playground
I basically copied my answer from here: https://www.dotnetperls.com/csv-go. For me, this was a better answer than what I found on stackoverflow.
import (
"bufio"
"encoding/csv"
"os"
"fmt"
"io"
)
func ReadCsvFile(filePath string) {
// Load a csv file.
f, _ := os.Open(filePath)
// Create a new reader.
r := csv.NewReader(f)
for {
record, err := r.Read()
// Stop at EOF.
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
// Display record.
// ... Display record length.
// ... Display all individual elements of the slice.
fmt.Println(record)
fmt.Println(len(record))
for value := range record {
fmt.Printf(" %v\n", record[value])
}
}
}
I also dislike the verbosity of the default Reader, so I made a new type that is
similar to bufio#Scanner:
package main
import "encoding/csv"
import "io"
type Scanner struct {
Reader *csv.Reader
Head map[string]int
Row []string
}
func NewScanner(o io.Reader) Scanner {
csv_o := csv.NewReader(o)
a, e := csv_o.Read()
if e != nil {
return Scanner{}
}
m := map[string]int{}
for n, s := range a {
m[s] = n
}
return Scanner{Reader: csv_o, Head: m}
}
func (o *Scanner) Scan() bool {
a, e := o.Reader.Read()
o.Row = a
return e == nil
}
func (o Scanner) Text(s string) string {
return o.Row[o.Head[s]]
}
Example:
package main
import "strings"
func main() {
s := `Month,Day
January,Sunday
February,Monday`
o := NewScanner(strings.NewReader(s))
for o.Scan() {
println(o.Text("Month"), o.Text("Day"))
}
}
https://golang.org/pkg/encoding/csv
You can also read contents of a directory to load all the CSV files. And then read all those CSV files 1 by 1 with goroutines
csv file:
101,300.00,11000901,1155686400
102,250.99,11000902,1432339200
main.go file:
const sourcePath string = "./source"
func main() {
dir, _ := os.Open(sourcePath)
files, _ := dir.Readdir(-1)
for _, file := range files {
fmt.Println("SINGLE FILE: ")
fmt.Println(file.Name())
filePath := sourcePath + "/" + file.Name()
f, _ := os.Open(filePath)
defer f.Close()
// os.Remove(filePath)
//func
go func(file io.Reader) {
records, _ := csv.NewReader(file).ReadAll()
for _, row := range records {
fmt.Println(row)
}
}(f)
time.Sleep(10 * time.Millisecond)// give some time to GO routines for execute
}
}
And the OUTPUT will be:
$ go run main.go
SINGLE FILE:
batch01.csv
[101 300.00 11000901 1155686400]
[102 250.99 11000902 1432339200]
----------------- -------------- ---------------------- -------
---------------- ------------------- ----------- --------------
Below example with the Invoice struct
func main() {
dir, _ := os.Open(sourcePath)
files, _ := dir.Readdir(-1)
for _, file := range files {
fmt.Println("SINGLE FILE: ")
fmt.Println(file.Name())
filePath := sourcePath + "/" + file.Name()
f, _ := os.Open(filePath)
defer f.Close()
go func(file io.Reader) {
records, _ := csv.NewReader(file).ReadAll()
for _, row := range records {
invoice := new(Invoice)
invoice.InvoiceNumber = row[0]
invoice.Amount, _ = strconv.ParseFloat(row[1], 64)
invoice.OrderID, _ = strconv.Atoi(row[2])
unixTime, _ := strconv.ParseInt(row[3], 10, 64)
invoice.Date = time.Unix(unixTime, 0)
fmt.Printf("Received invoice `%v` for $ %.2f \n", invoice.InvoiceNumber, invoice.Amount)
}
}(f)
time.Sleep(10 * time.Millisecond)
}
}
type Invoice struct {
InvoiceNumber string
Amount float64
OrderID int
Date time.Time
}

Resources