I'm reading Donovan's "The Go Programming Language" book and trying to implement an exercise which prints duplicate lines from several files and the files in which they occur:
package main
import (
"fmt"
"io/ioutil"
"os"
"strings"
mapset "github.com/deckarep/golang-set"
)
func main() {
counts := make(map[string]int)
occurrences := make(map[string]mapset.Set)
for _, filename := range os.Args[1:] {
data, err := ioutil.ReadFile(filename)
if err != nil {
fmt.Fprintf(os.Stderr, "dup3: %v\n", err)
continue
}
for _, line := range strings.Split(string(data), "\n") {
counts[line]++
occurrences[line].Add(filename)
}
}
for line, n := range counts {
if n > 1 {
fmt.Printf("%d\t%s\t%s\n", n, line, strings.Join(occurrences[line], ", "))
}
}
}
To accomplish the exercise, I've used the https://godoc.org/github.com/deckarep/golang-set package. However, I'm not sure how to print out the elements of the set joined by a ", ". With this code, I get a
./hello.go:23:30: first argument to append must be slice; have interface { Add(interface {}) bool; Cardinality() int; CartesianProduct(mapset.Set) mapset.Set; Clear(); Clone() mapset.Set; Contains(...interface {}) bool; Difference(mapset.Set) mapset.Set; Each(func(interface {}) bool); Equal(mapset.Set) bool; Intersect(mapset.Set) mapset.Set; IsProperSubset(mapset.Set) bool; IsProperSuperset(mapset.Set) bool; IsSubset(mapset.Set) bool; IsSuperset(mapset.Set) bool; Iter() <-chan interface {}; Iterator() *mapset.Iterator; Pop() interface {}; PowerSet() mapset.Set; Remove(interface {}); String() string; SymmetricDifference(mapset.Set) mapset.Set; ToSlice() []interface {}; Union(mapset.Set) mapset.Set }
./hello.go:28:64: cannot use occurrences[line] (type mapset.Set) as type []string in argument to strings.Join
I wasn't able to easily find out how to convert the Set to a slice though. Any idea how I might accomplish this?
The XY problem is asking about your attempted solution rather than your actual problem: The XY Problem.
The Go Programming Language by Alan A. A. Donovan and Brian W. Kernighan, Exercise 1.4 is designed to use Go maps.
For example,
// Modify dup3 to print the names of all files in which each duplicated line occurs.
package main
import (
"fmt"
"io/ioutil"
"os"
"strings"
)
func main() {
// counts = [line][file]count
counts := make(map[string]map[string]int)
for _, filename := range os.Args[1:] {
data, err := ioutil.ReadFile(filename)
if err != nil {
fmt.Fprintf(os.Stderr, "Exercise 1.4: %v\n", err)
continue
}
for _, line := range strings.Split(string(data), "\n") {
files := counts[line]
if files == nil {
files = make(map[string]int)
counts[line] = files
}
files[filename]++
}
}
for line, files := range counts {
n := 0
for _, count := range files {
n += count
}
if n > 1 {
fmt.Printf("%d\t%s\n", n, line)
for name := range files {
fmt.Printf("%s\n", name)
}
}
}
}
Related
I am new to the language GO and working on an assignment where i should write a code that return the word frequencies of the text. However I know that the words 'Hello', 'HELLO' and 'hello' are all counted as 'hello', so I need to convert all strings to lower case.
I know that I should use strings.ToLower(), however I dont know where I should Included that in the class. Can someone please help me?
package main
import (
"fmt"
"io/ioutil"
"log"
"strings"
"time"
)
const DataFile = "loremipsum.txt"
// Return the word frequencies of the text argument.
func WordCount(text string) map[string]int {
fregs := make(map[string]int)
words := strings.Fields(text)
for _, word := range words {
fregs[word] += 1
}
return fregs
}
// Benchmark how long it takes to count word frequencies in text numRuns times.
//
// Return the total time elapsed.
func benchmark(text string, numRuns int) int64 {
start := time.Now()
for i := 0; i < numRuns; i++ {
WordCount(text)
}
runtimeMillis := time.Since(start).Nanoseconds() / 1e6
return runtimeMillis
}
// Print the results of a benchmark
func printResults(runtimeMillis int64, numRuns int) {
fmt.Printf("amount of runs: %d\n", numRuns)
fmt.Printf("total time: %d ms\n", runtimeMillis)
average := float64(runtimeMillis) / float64(numRuns)
fmt.Printf("average time/run: %.2f ms\n", average)
}
func main() {
// read in DataFile as a string called data
data, err:= ioutil.ReadFile("loremipsum.txt")
if err != nil {
log.Fatal(err)
}
// Convert []byte to string and print to screen
text := string(data)
fmt.Println(text)
fmt.Printf("%#v",WordCount(string(data)))
numRuns := 100
runtimeMillis := benchmark(string(data), numRuns)
printResults(runtimeMillis, numRuns)
}
You should convert words to lowercase when you are using them as map key
for _, word := range words {
fregs[strings.ToLower(word)] += 1
}
I get [a:822 a.:110 I want all a in the same. How do i a change the code so that a and a. is the same? – hello123
You need to carefully define a word. For example, a string of consecutive letters and numbers converted to lowercase.
func WordCount(s string) map[string]int {
wordFunc := func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}
counts := make(map[string]int)
for _, word := range strings.FieldsFunc(s, wordFunc) {
counts[strings.ToLower(word)]++
}
return counts
}
to remove all non-word characters you could use a regular expression:
package main
import (
"bufio"
"fmt"
"log"
"regexp"
"strings"
)
func main() {
str1 := "This is some text! I want to count each word. Is it cool?"
re, err := regexp.Compile(`[^\w]`)
if err != nil {
log.Fatal(err)
}
str1 = re.ReplaceAllString(str1, " ")
scanner := bufio.NewScanner(strings.NewReader(str1))
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
fmt.Println(strings.ToLower(scanner.Text()))
}
}
See strings.EqualFold.
Here is an example.
I'm trying to parse this string goats=1\r\nalligators=false\r\ntext=works.
contents := "goats=1\r\nalligators=false\r\ntext=works"
compile, err := regexp.Compile("([^#\\s=]+)=([a-zA-Z0-9.]+)")
if err != nil {
return
}
matchString := compile.FindAllStringSubmatch(contents, -1)
my Output looks like [[goats=1 goats 1] [alligators=false alligators false] [text=works text works]]
What I'm I doing wrong in my expression to cause goats=1 to be valid too? I only want [[goats 1]...]
For another approach, you can use the strings package instead:
package main
import (
"fmt"
"strings"
)
func parse(s string) map[string]string {
m := make(map[string]string)
for _, kv := range strings.Split(s, "\r\n") {
a := strings.Split(kv, "=")
m[a[0]] = a[1]
}
return m
}
func main() {
m := parse("goats=1\r\nalligators=false\r\ntext=works")
fmt.Println(m) // map[alligators:false goats:1 text:works]
}
https://golang.org/pkg/strings
I'm trying to write a Go script that takes in as many lines of comma-separated coordinates as the user wishes, split and convert the string of coordinates to float64, store each line as a slice, and then append each slice in a slice of slices for later usage.
Example inputs are:
1.1,2.2,3.3
3.14,0,5.16
Example outputs are:
[[1.1 2.2 3.3],[3.14 0 5.16]]
The equivalent in Python is
def get_input():
print("Please enter comma separated coordinates:")
lines = []
while True:
line = input()
if line:
line = [float(x) for x in line.replace(" ", "").split(",")]
lines.append(line)
else:
break
return lines
But what I wrote in Go seems way too long (pasted below), and I'm creating a lot of variables without the ability to change variable type as in Python. Since I literally just started writing Golang to learn it, I fear my script is long as I'm trying to convert Python thinking into Go. Therefore, I would like to ask for some advice as to how to write this script shorter and more concise in Go style? Thank you.
package main
import (
"fmt"
"os"
"bufio"
"strings"
"strconv"
)
func main() {
inputs := get_input()
fmt.Println(inputs)
}
func get_input() [][]float64 {
fmt.Println("Please enter comma separated coordinates: ")
var inputs [][]float64
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
if len(scanner.Text()) > 0 {
raw_input := strings.Replace(scanner.Text(), " ", "", -1)
input := strings.Split(raw_input, ",")
converted_input := str2float(input)
inputs = append(inputs, converted_input)
} else {
break
}
}
return inputs
}
func str2float(records []string) []float64 {
var float_slice []float64
for _, v := range records {
if s, err := strconv.ParseFloat(v, 64); err == nil {
float_slice = append(float_slice, s)
}
}
return float_slice
}
Using only string functions:
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
scanner := bufio.NewScanner(os.Stdin)
var result [][]float64
var txt string
for scanner.Scan() {
txt = scanner.Text()
if len(txt) > 0 {
values := strings.Split(txt, ",")
var row []float64
for _, v := range values {
fl, err := strconv.ParseFloat(strings.Trim(v, " "), 64)
if err != nil {
panic(fmt.Sprintf("Incorrect value for float64 '%v'", v))
}
row = append(row, fl)
}
result = append(result, row)
}
}
fmt.Printf("Result: %v\n", result)
}
Run:
$ printf "1.1,2.2,3.3
3.14,0,5.16
2,45,76.0, 45 , 69" | go run experiment2.go
Result: [[1.1 2.2 3.3] [3.14 0 5.16] [2 45 76 45 69]]
With given input, you can concatenate them to make a JSON string and then unmarshal (deserialize) that:
func main() {
var lines []string
for {
var line string
fmt.Scanln(&line)
if line == "" {
break
}
lines = append(lines, "["+line+"]")
}
all := "[" + strings.Join(lines, ",") + "]"
inputs := [][]float64{}
if err := json.Unmarshal([]byte(all), &inputs); err != nil {
fmt.Println(err)
return
}
fmt.Println(inputs)
}
I'm parsing a JSON object which contains an array of strings :
var ii interface{}
json := "{\"aString\": [\"aaa_111\", \"bbb_222\"], \"whatever\":\"ccc\"}"
err := json.Unmarshal([]byte(json), &ii)
if err != nil {
log.Fatal(err)
}
data := ii.(map[string]interface{})
fmt.Println(data["aString"]) // outputs: ["aaa_111" "bbb_222"]
I tried to convert data["aString"] to []string to be able to loop over it, but it fails :
test := []string(data["aString"]).([]string)
fmt.Println(test) // panic -> interface conversion:
// interface is string, not []string
How can I convert data["aString"] ?
edit:
I didn't express myself properly. If I print data, I have such map :
map[aString:["BBB-222","AAA-111"] whatever:ccc]
I want to loop over aString (to manipule each array entry). But I can't find how, because aString is type interface {} :
for i, v := range aString { // <-- fails
// ...
fmt.Println(i, v)
}
That's why I want to convert aString. I don't want to convert a string which looks like an array to an array.
I recommend you move away from this implementation in general. Your json may vary but you can easily use objects and avoid all this type unsafe nonsense.
Anyway, that conversion doesn't work because the types inside the slice are not string, they're also interface{}. You have to iterate the collection then do a type assertion on each item like so:
aInterface := data["aString"].([]interface{})
aString := make([]string, len(aInterface))
for i, v := range aInterface {
aString[i] = v.(string)
}
Is it what you need?
package main
import (
"fmt"
"encoding/json"
)
func main() {
js := "{\"aString\": [\"aaa_111\", \"bbb_222\"], \"whatever\":\"ccc\"}"
a := make(map[string]interface{})
json.Unmarshal([]byte(js), &a)
for _, v := range a["aString"].([]interface{}) {
str := v.(string)
fmt.Println(str)
}
}
Check on Go Playground
For another approach, you can use a struct instead:
package main
import (
"encoding/json"
"fmt"
)
func main() {
s := []byte(`{"aString": ["aaa_111", "bbb_222"], "whatever":"ccc"}`)
var t struct {
Astring []string
Whatever string
}
json.Unmarshal(s, &t)
fmt.Printf("%+v\n", t) // {Astring:[aaa_111 bbb_222] Whatever:ccc}
}
I need to read a file of integers into an array. I have it working with this:
package main
import (
"fmt"
"io"
"os"
)
func readFile(filePath string) (numbers []int) {
fd, err := os.Open(filePath)
if err != nil {
panic(fmt.Sprintf("open %s: %v", filePath, err))
}
var line int
for {
_, err := fmt.Fscanf(fd, "%d\n", &line)
if err != nil {
fmt.Println(err)
if err == io.EOF {
return
}
panic(fmt.Sprintf("Scan Failed %s: %v", filePath, err))
}
numbers = append(numbers, line)
}
return
}
func main() {
numbers := readFile("numbers.txt")
fmt.Println(len(numbers))
}
The file numbers.txt is just:
1
2
3
...
ReadFile() seems too long (maybe because of the error handing).
Is there a shorter / more Go idiomatic way to load a file?
Using a bufio.Scanner makes things nice. I've also used an io.Reader rather than taking a filename. Often that's a good technique, since it allows the code to be used on any file-like object and not just a file on disk. Here it's "reading" from a string.
package main
import (
"bufio"
"fmt"
"io"
"strconv"
"strings"
)
// ReadInts reads whitespace-separated ints from r. If there's an error, it
// returns the ints successfully read so far as well as the error value.
func ReadInts(r io.Reader) ([]int, error) {
scanner := bufio.NewScanner(r)
scanner.Split(bufio.ScanWords)
var result []int
for scanner.Scan() {
x, err := strconv.Atoi(scanner.Text())
if err != nil {
return result, err
}
result = append(result, x)
}
return result, scanner.Err()
}
func main() {
tf := "1\n2\n3\n4\n5\n6"
ints, err := ReadInts(strings.NewReader(tf))
fmt.Println(ints, err)
}
I would do it like this:
package main
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
)
// It would be better for such a function to return error, instead of handling
// it on their own.
func readFile(fname string) (nums []int, err error) {
b, err := ioutil.ReadFile(fname)
if err != nil { return nil, err }
lines := strings.Split(string(b), "\n")
// Assign cap to avoid resize on every append.
nums = make([]int, 0, len(lines))
for _, l := range lines {
// Empty line occurs at the end of the file when we use Split.
if len(l) == 0 { continue }
// Atoi better suits the job when we know exactly what we're dealing
// with. Scanf is the more general option.
n, err := strconv.Atoi(l)
if err != nil { return nil, err }
nums = append(nums, n)
}
return nums, nil
}
func main() {
nums, err := readFile("numbers.txt")
if err != nil { panic(err) }
fmt.Println(len(nums))
}
Your solution with fmt.Fscanf is fine. There are certainly a number of other ways to do though, depending on your situation. Mostafa's technique is one I use a lot (although I might allocate the result all at once with make. oops! scratch that. He did.) but for ultimate control you should learn bufio.ReadLine. See go readline -> string for some example code.