How can I read a file in Go and skip the first line / headers?
In Python I know I could do
counter = 0
with open("my_file_path", "r") as fo:
try:
next(fo)
except:
pass
for _ in fo:
counter = counter + 1
This is my Go application
package main
import (
"bufio"
"flag"
"os"
)
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
//fmt.Println(fileScanner.Text())
counter = counter + 1
}
return counter
}
func main() {
fileLocation := flag.String("file_location", "default value", "file path to count lines")
flag.Parse()
counted := readFile(*fileLocation)
println(counted)
}
I will be reading a huge file and don't want to be evaluating each line if the index is 0.
How about to move to the next token before the loop
scanner := bufio.NewScanner(file)
scanner.Scan() // this moves to the next token
for scanner.Scan() {
fmt.Println(scanner.Text())
}
file
1
2
3
output
2
3
https://play.golang.org/p/I2w50zFdcg0
For example,
package main
import (
"bufio"
"fmt"
"os"
)
func readFile(filename string) (int, error) {
f, err := os.Open(filename)
if err != nil {
return 0, err
}
defer f.Close()
count := 0
s := bufio.NewScanner(f)
if s.Scan() {
for s.Scan() {
count++
}
}
if err := s.Err(); err != nil {
return 0, err
}
return count, nil
}
func main() {
filename := `test.file`
count, err := readFile(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
fmt.Println(count)
}
Output:
$ cat test.file
1234567890
abc
$ go run count.go
1
$
you can try something like this
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
break
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
counter = counter + 1
// do something with text
}
return counter
}
Edit:
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
if fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
// do something with text
counter = counter + 1
}
return counter
}
Related
I try to get some CSV formatted string as input and then to print it to an actual CSV file. It works but it prints the first string 2 times.
My code looks like this:
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
csvwriter := csv.NewWriter(inputFile)
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
err := csvwriter.WriteAll(lines)
if err != nil {
return
}
}
csvwriter.Flush()
inputFile.Close()
}
for n=2 and the records:
abcd, efgh, ijklmn
opq, rstu, vwxyz
the output looks like this:
"abcd, efgh, ijklmn"
"abcd, efgh, ijklmn"
"opq, rstu, vwxyz"
It is my first time working with Golang and I am a little bit lost :D
csvwriter.WriteAll(lines) WriteAll writes multiple CSV records to w using Write and then calls Flush, returning any error from the Flush.
You are appending lines every time you read in a loop and flushing to the file.
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
defer inputFile.Close()
csvwriter := csv.NewWriter(inputFile)
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
}
err = csvwriter.WriteAll(lines)
if err != nil {
return
}
}
You were writing the csv in loop so that first line printed double. Here is the corrected code.
package main
import (
"bufio"
"encoding/csv"
"fmt"
"log"
"os"
)
func main() {
scanner := bufio.NewScanner(os.Stdin)
n := 0
inputFile, err := os.Create("input.csv") //create the input.csv file
if err != nil {
log.Fatal(err)
}
defer func() {
inputFile.Close()
}()
csvwriter := csv.NewWriter(inputFile)
defer func() {
csvwriter.Flush()
}()
fmt.Println("How many records ?")
fmt.Scanln(&n)
fmt.Println("Enter the records")
var lines [][]string
for i := 0; i < n; i++ {
scanner.Scan()
text := scanner.Text()
lines = append(lines, []string{text})
}
err = csvwriter.WriteAll(lines)
if err != nil {
return
}
}
I have the following script that generates random sequences and hashes them on the cpu with several threads in go.
package main
import(
"fmt"
"crypto/sha256"
"encoding/hex"
"math/rand"
"time"
"log"
"os"
)
func randChr(i int)(string){
i = i + 65
if i>90 {
i = i - 43
}
return string(i)
}
func randStr(random *rand.Rand, length int)(string){
result := ""
for len(result)<length{
result = result + randChr(random.Intn(36))
}
return result
}
func HashPass(data []byte) (bool,[32]byte){
hash := sha256.Sum256(data)
s := hex.EncodeToString(hash[:])
pass := true
for i := 0; i<7; i++ {
if s[i] != s[i+1]{
pass = false
break;
}
}
return pass,hash
}
func getAPassingHash()(string){
randSource := rand.NewSource(time.Now().UnixNano())
random := rand.New(randSource)
passes := false
s := ""
for !passes {
s=randStr(random,64)
passes,_ = HashPass([]byte(s))
}
return(s)
}
func worker(ch chan string){
ch <- getAPassingHash()
}
func timer(ch chan string,wait time.Duration){
time.Sleep(wait)
ch <- "End"
}
func append(fn string,conts string){
f, err := os.OpenFile(fn,
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Println(err)
}
defer f.Close()
if _, err := f.WriteString(conts); err != nil {
log.Println(err)
}
}
func main(){
ch := make(chan string)
go timer(ch,6*time.Hour)
for i:=0;i<9;i++{
time.Sleep(time.Second)
go worker(ch)
}
for true{
result := <-ch
if result == "End"{
break;
}
go worker(ch)
fmt.Println(result)
hash := sha256.Sum256([]byte(result))
fmt.Println(hex.EncodeToString(hash[:]))
fmt.Println()
append("hashes.txt","\n"+result+"\n"+hex.EncodeToString(hash[:])+"\n")
}
fmt.Println("done")
}
For some reason the script hangs up occasionally until I click on the command prompt and hit enter. I'm not sure where it is getting stuck but I can see that my system CPU utilization goes down and I know I have results somehow blocking out the program, I hit enter and results print and cpu usage spikes back up. I know this might be hard to replicate but I would really appreciate any suggestions.
It doesn't hang, it waits for the value of passes to be true
func getAPassingHash()(string){
randSource := rand.NewSource(time.Now().UnixNano())
random := rand.New(randSource)
passes := false
s := ""
for !passes { // wait for value is true
s=randStr(random,64)
passes,_ = HashPass([]byte(s))
}
return(s)
}
I want to have the limited number of goroutines that make some computation (func worker(), it makes some computation and places the result in a channel). Also a have another channel, that has "jobs" for my workers. As a result I can see that all jobs were computed correctly, but after computation executions stucks.
package main
import (
"bufio"
"fmt"
"os"
"net/http"
"io/ioutil"
"strings"
"time"
)
func worker(id int, urls <- chan string, results chan<- int) {
var data string
for url := range urls {
fmt.Println("worker", id, "started job", url)
if (strings.HasPrefix(url, "http") || strings.HasPrefix(url, "https")) {
resp, err := http.Get(url)
if err != nil {
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println(err)
}
data = string(body)
} else {
body, err := ioutil.ReadFile(url)
if err != nil {
fmt.Println(err)
}
data = string(body)
}
number := strings.Count(data, "Go")
fmt.Println("worker", id, "finished job", url, "Number of Go is", number)
results <- number
}
return
}
func main() {
final_result := 0
maxNbConcurrentGoroutines := 5
numJobs := 0
urls := make(chan string)
results := make(chan int)
scanner := bufio.NewScanner(os.Stdin)
start := time.Now()
for w := 1; w <= maxNbConcurrentGoroutines; w++ {
go worker(w, urls, results)
}
for scanner.Scan() {
url := (scanner.Text())
urls <- url
numJobs += 1
}
close(urls)
for num := range results {
final_result += num
}
t := time.Now()
elapsed := t.Sub(start)
for i := 1; i <= numJobs; i++ {
one_result := <- results
final_result += one_result
}
fmt.Println("Number = ", final_result)
fmt.Println("Time = ", elapsed)
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "error:", err)
os.Exit(1)
}
}
I tried to use https://gobyexample.com/worker-pools to extract all the values from results channel, but was not succeed. What should I do to have it unstacked and gone further. Here is an example of how to run it:
echo -e 'https://golang.org\n/etc/passwd\nhttps://golang.org\nhttps://golang.org' | go run 1.go
Your program doesn't return because it waits the closed status of results channel.
In https://gobyexample.com/worker-pools the loop for getting results is different:
for a := 1; a <= numJobs; a++ {
<-results
}
If you want to use for num := range results you need close(results) and determine when to call it.
You can view another example using WaitGroup at https://gobyexample.com/waitgroups
I'm trying this code:
// GetFooter returns a string which is the Footer of an edi file
func GetFooter(file *os.File) (out string, err error) {
// TODO can scanner read files backwards? Seek can get us to the end of file
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
line1 := lines[len(lines)-2]
line2 := lines[len(lines)-1]
return line1 + "\n" + line2, scanner.Err()
}
I'm wondering if there's a cheaper way to get the last two lines of a file?
You can keep only the last two lines in memory as you scan the buffer.
Try it on Go playground.
package main
import (
"fmt"
"bufio"
"bytes"
"strconv"
)
func main() {
var buffer bytes.Buffer
for i := 0; i < 1000; i++ {
s := strconv.Itoa(i)
buffer.WriteString(s + "\n")
}
fmt.Println(GetFooter(&buffer))
}
func GetFooter(file *bytes.Buffer) (out string, err error) {
var line1, line2 string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line1, line2 = line2, scanner.Text()
}
return line1 + "\n" + line2, scanner.Err()
}
If you know roughly the size of the last two lines, you could set SOME_NUMBER to be that size plus some extra bytes to make sure you always capture the last two, then do something like
file, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer file.Close()
buf := make([]byte, SOME_NUMBER)
stat, err := os.Stat(fileName)
start := stat.Size() - SOME_NUMBER
_, err = file.ReadAt(buf, start)
if err != nil {
panic(err)
}
lines := strings.Split(string(start), "\n", -1)
lines = lines[len(lines)-2:]
Please consider the following go code, which I wrote to scan a TCP port (os.Arg[2]) of hostnames from a given filename (os.Arg[1]). It reads each hostname, tries to connect. If the connect failed, it would append the failed hostname to outfile.
package main
import(
"fmt"
"os"
"log"
"bufio"
"time"
"net"
)
func main(){
argc := len(os.Args)
if argc < 3 {
fmt.Printf("Invalid usage")
log.Fatal("Invalid usage")
}
stamp := time.Now().UnixNano()
outfile := fmt.Sprintf("%s%d.txt", "/tmp/port_check", stamp)
filename := os.Args[1]
file, err := os.Open(filename)
checkerr(err)
f, err := os.OpenFile(outfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
checkerr(err)
defer f.Close()
port := os.Args[2]
channel := make(chan string,17)
fscanner := bufio.NewScanner(file)
for fscanner.Scan(){
_line := fscanner.Text()
go check_sender(_line,port,f,channel)
}
_count := 0
for out := range channel{
fmt.Println("_count is:", _count, out)
_count += 1
}
close(channel)
}
func checkerr(err error){
if err != nil {
fmt.Println(err)
log.Fatal(err)
}
}
func check_sender(sender string, port string, f *os.File, channel chan string){
address_string := fmt.Sprintf("%s:%s", sender, port)
_, err := net.DialTimeout("tcp", address_string,4 * time.Second)
if err != nil {
write_this := fmt.Sprintf("%s\n", sender)
f.WriteString(write_this)
}
channel <- sender
}
Generate some content for it operate on:
$ for i in `seq 1 5`; do echo "someblog$RANDOM$RANDOM.blogspot.com"; done > /tmp/meh.txt
And when run as:
$ go run port_scan.go /tmp/meh.txt 80
_count is: 0 someblog50063432.blogspot.com
_count is: 1 someblog922816893.blogspot.com
_count is: 2 someblog622823698.blogspot.com
_count is: 3 someblog1074223783.blogspot.com
_count is: 4 someblog1876411881.blogspot.com
^Csignal: interrupt < ----------------------- this
It hangs after the last hostname, and does not exit until a ctlr-c is sent.
I would like it to exit by itself, what am I doing wrong here?
update 1:
channel := make(chan string,17)
fscanner := bufio.NewScanner(file)
+ spin := 0
for fscanner.Scan(){
_line := fscanner.Text()
go check_sender(_line,port,f,channel)
+ spin += 1
}
_count := 0
for out := range channel{
fmt.Println("_count is:", _count, out)
_count += 1
+ if _count == spin {
+ close(channel)
+ }
}
- close(channel)
}