How to unmarshal a json gzipped file into a struct - go

I am writing a trie DS, json gzipped into a file trieSample.json.gz, and reading it back into the struct. Strangely, the unmarshal succeeds but the struct is not populated.
I have tried both json.Unmarshal and json.Decoder to no avail. Need help in finding what I am missing here. There is no error thrown while reading, just that the struct doesn't have any keys.
If I try normal json marshal -> Write to file and Read from file -> Unmarshal, it works properly.
var charSet = "0123456789bcdefghjkmnopqrstuvwxyz"
const logTagSlice = "trie.log"
type trieSlice struct {
Children []*tNode `json:"c"`
Charset map[int32]int8 `json:"l"` // Charset is the legend of what charset is used to create the keys and how to position them in trie array
logger loggingapi.Logger `json:"-"`
capacity int `json:"-"` // capacity is the slice capacity to have enough to hold all the characters in the charset
}
type tNode struct {
Children []*tNode `json:"c"` // children represents the next valid runes AFTER the current one
IsLeaf bool `json:"e,omitempty"` // isLeaf represents if this node represents the end of a valid word
Value int16 `json:"v,omitempty"` // value holds the corresponding value for key value pair, where key is the whole tree of nodes starting from parent representing a valid word
}
// NewTrieSlice returns a Trie, charset represents how the children need to be positioned in the array
func NewTrieSlice(charset string, logger loggingapi.Logger) *trieSlice {
m := map[int32]int8{}
for index, r := range charset {
m[r] = int8(index)
}
return &trieSlice{
Charset: m,
Children: make([]*tNode, len(charset)),
logger: logger,
capacity: len(charset),
}
}
func newNode(capacity int) *tNode {
return &tNode{
Children: make([]*tNode, capacity),
}
}
// getPosition gets the array index position that the rune should be put in
func (t *trieSlice) getPosition(r int32) (index int8, found bool) {
if index, ok := t.Charset[r]; ok {
return index, true
}
return -1, false
}
// Add ...
func (t *trieSlice) Add(key string, val int16) {
if len(key) == 0 {
t.logger.Info(logTagSlice, "trying to add empty key, return with no action")
return
}
runes := []rune(key)
prefix := runes[0]
var child *tNode
var pos int
index, ok := t.getPosition(prefix)
if !ok {
t.logger.Info(logTagSlice, "key is not present in the charset %s, cannot add to trieSlice", prefix)
return
}
// trie node with same prefix doesnt exist
if child = t.Children[index]; child == nil {
child = newNode(len(t.Charset))
t.Children[index] = child
}
pos = 1
for pos <= len(runes) {
// base condition
if pos == len(key) {
child.IsLeaf = true
child.Value = val
return
}
prefix := runes[pos]
index, ok := t.getPosition(prefix)
if !ok {
t.logger.Info(logTagSlice, "key is not present in the charset %s, cannot add to trieSlice", prefix)
return
}
// repeat with child node if prefix is already present
if newChild := child.Children[index]; newChild == nil {
child.Children[index] = newNode(len(t.Charset))
child = child.Children[index]
} else {
child = newChild
}
pos++
}
}
// Test using gzip writer, reader
func TestSample(t *testing.T) {
// Create trie and add a few keys
trie := NewTrieSlice(charSet, loggingapi.NewStdOut())
trie.Add("test", 10)
trie.Add("test1", 20)
trie.Add("test2", 30)
trie.Add("test3", 40)
trie.Add("test4", 50)
// Write gzipped json to file
var network bytes.Buffer
b, err := json.Marshal(trie)
if err != nil {
fmt.Println("error in marshal ... ", err.Error())
t.Fail()
}
w := gzip.NewWriter(&network)
w.Write(b)
ioutil.WriteFile("../resources/trieSample.json.gz", []byte(network.String()), 0644)
w.Close()
// Read gzipped json from file into struct
trieUnmarshal := NewTrieSlice(charSet, loggingapi.NewStdOut())
trieDecoder := NewTrieSlice(charSet, loggingapi.NewStdOut())
// attempt via json Unmarshal
file, err := os.Open("../resources/trieSample.json.gz")
if err != nil {
fmt.Println(err.Error())
t.Fail()
}
r, err := gzip.NewReader(file)
if err != nil {
fmt.Println(err.Error())
t.Fail()
}
sc := bufio.NewScanner(r)
json.Unmarshal(sc.Bytes(), trieUnmarshal)
// attempt via json Decoder
b, err = ioutil.ReadFile("../resources/trieSample.json.gz")
if err != nil {
fmt.Println(err.Error())
t.Fail()
}
bReader := bytes.NewReader(b)
json.NewDecoder(bReader).Decode(trieDecoder)
// spew.Dump shows that object is not populated
spew.Dump(trieUnmarshal)
spew.Dump(trieDecoder)
}
spew.Dump shows that the trieSlice Children array has all nil elements

Close the compressor before using the data. Decompress the data before using it. Don't chop it up with inappropriate use of bufio.Scanner.
var network bytes.Buffer
b, err := json.Marshal(trie)
if err != nil {
fmt.Println("error in marshal ... ", err.Error())
t.Fail()
}
w := gzip.NewWriter(&network)
w.Write(b)
w.Close()
err = ioutil.WriteFile("trieSample.json.gz", network.Bytes(), 0644)
if err != nil {
log.Fatal(err)
}
trieDecoder := NewTrieSlice(charSet)
// attempt via json Unmarshal
file, err := os.Open("trieSample.json.gz")
if err != nil {
log.Fatal(err)
}
r, err := gzip.NewReader(file)
if err != nil {
log.Fatal(err)
}
err = json.NewDecoder(r).Decode(trieDecoder)
if err != nil {
log.Fatal(err)
}
spew.Dump(trieDecoder)
https://play.golang.org/p/pYup3v8-f4c

Related

Parsing string to time with unknown layout

I'm having a csv file, and want to read:
Header names
Fields types
So, I wrote the below:
package main
import (
"encoding/csv"
"fmt"
"os"
"log"
"reflect"
"strconv"
)
func main() {
filePath := "./file.csv"
headerNames := make(map[int]string)
headerTypes := make(map[int]string)
// Load a csv file.
f, _ := os.Open(filePath)
// Create a new reader.
r := csv.NewReader(f)
// Read first row only
header, err := r.Read()
checkError("Some other error occurred", err)
// Add mapping: Column/property name --> record index
for i, v := range header {
headerNames[i] = v
}
// Read second row
record, err := r.Read()
checkError("Some other error occurred", err)
// Check record fields types
for i, v := range record {
var value interface{}
if value, err = strconv.Atoi(v); err != nil {
if value, err = strconv.ParseFloat(v, 64); err != nil {
if value, err = strconv.ParseBool(v); err != nil {
if value, err = strconv.ParseBool(v); err != nil { // <== How to do this with unknown layout
// Value is a string
headerTypes[i] = "string"
value = v
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
} else {
// Value is a timestamp
headerTypes[i] = "time"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// Value is a bool
headerTypes[i] = "bool"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// Value is a float
headerTypes[i] = "float"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// Value is an int
headerTypes[i] = "int"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
}
for i, _ := range header {
fmt.Printf("Header: %v \tis\t %v\n", headerNames[i], headerTypes[i])
}
}
func checkError(message string, err error) {
// Error Logging
if err != nil {
log.Fatal(message, err)
}
}
And with csv file as:
name,age,developer
"Hasan","46.4","true"
I got an output as:
Header: name is string
Header: age is float
Header: developer is bool
The output is correct.
The thing that I could not do is the one is checking if the field is string as I do not know what layout the field could be.
I aware I can pasre string to time as per the format stated at https://go.dev/src/time/format.go, and can build a custom parser, something like:
test, err := fmtdate.Parse("MM/DD/YYYY", "10/15/1983")
if err != nil {
panic(err)
}
But this will work only (as per my knowledge) if I know the layout?
So, again my question is, how can I parse time, or what shall I do to be able to parse it, if I do not know the layout?
Thanks to the comment by Burak, I found the solution by using this package: github.com/araddon/dateparse
// Normal parse. Equivalent Timezone rules as time.Parse()
t, err := dateparse.ParseAny("3/1/2014")
// Parse Strict, error on ambigous mm/dd vs dd/mm dates
t, err := dateparse.ParseStrict("3/1/2014")
> returns error
// Return a string that represents the layout to parse the given date-time.
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM"

Implement a struct-to-csv writer in Go

The following code attempt to implement a generic CSV writer for any simple struct. By "simple", I mean field value of the struct are of standard, simple types (int, string etc).
type (
CSV interface {
Header() []string
String([]string) (string, error)
}
CSVArray []CSV
)
func CSVOutput(w io.Writer, data CSVArray, cols []string) error {
if len(data) == 0 {
return nil
}
_, err := fmt.Fprintln(w, data[0].Header())
if err != nil {
return err
}
for _, d := range data {
str, err := d.String(cols)
if err != nil {
return err
}
_, err = fmt.Fprintln(w, str)
if err != nil {
return err
}
}
return nil
}
The problem is CSVOutput() does not actually work. e.g.:
var data []Employee //the Employee struct implements CSV interface
CSVOutput(w, data, nil)
Compilation failed: cannot use data (type []Employee) as type CSVArray in argument to CSVOutput
I understand that []CSV is not same as []Employee, as explained here, and many other resources available online.
That said, is it possible to rewrite the CSVOutput() function by using reflection:
func CSVOutput(w io.Writer, data interfac{}, cols []string) error {
sliceOfIntf = castToSlice(data) //how to do this?
if !implementedCSV(sliceOfIntf[0]) { //and how to do this?
return errors.New("not csv")
}
... ...
}
is it possible to rewrite the CSVOutput() function by using reflection
Yes
// if data is []Employee{...}, then you can do the following:
rv := reflect.ValueOf(data)
if rv.Kind() != reflect.Slice {
return fmt.Errorf("data is not slice")
}
if !rv.Type().Elem().Implements(reflect.TypeOf((*CSV)(nil)).Elem()) {
return fmt.Errorf("slice element does not implement CSV")
}
csvArr := make(CSVArray, rv.Len())
for i := 0; i < rv.Len(); i++ {
csvArr[i] = rv.Index(i).Interface().(CSV)
}
// now csvArr is CSVArray containing all the elements of data
https://go.dev/play/p/gcSOid533gx

How can I initialize struct with values from array of interface in Go?

I'm getting a message from server like [0,"on",[6,1,5,"market",45.7]] and save it to []interface{} variable. I want to initialize struct with values of this array.
I'm totally new in Go and try to do it like:
import "golang.org/x/net/websocket"
...
var msg []interface{}
// Server send response: `[0,"on",[6,1,5,"market",45.7]]`
if err := websocket.Message.Receive(ws, &msg); err != nil {
logger.Println(err)
} else {
type Order struct {
ID int32,
GID int32,
CID int32,
Type string,
Amount float64
}
// here msg is [0,"on",[6,1,5,"market",45.7]]
switch msg[1] {
case "on":
if rawOrder, ok := msg[2].([]interface{}); ok {
order := Order{int32(rawOrder[0]), int32(rawOrder[1]), int32(rawOrder[2]), string(rawOrder[3]), float64(rawOrder[4])}
}
}
But I'm getting an error "Cannot convert an expression of the type 'interface{}' to the type 'int32'" and the next step is use switch for every rawOrder[i] type, but it's toooo long.
How can I do it easilly?
If you know that the codec used on the websocket will always be json, you can formally define Order and give it an UnmarshalJSON function to do the decoding for you.
import "golang.org/x/net/websocket"
type Order struct {
ID, GID, CID int32
Type string
Amount float64
}
func (o *Order) UnmarshalJSON(data []byte) error {
var first []json.RawMessage
err := json.Unmarshal(data, &first)
if err != nil {
return fmt.Errorf("invalid order, must be array: %w", err)
}
if len(first) != 3 {
return fmt.Errorf("invalid order, length must be 3, got %d", len(first))
}
var second string
err = json.Unmarshal(first[1], &second)
if err != nil {
return fmt.Errorf("invalid order, second element must be string: %w", err)
}
switch second {
case "on":
var third []json.RawMessage
err = json.Unmarshal(first[2], &third)
if err != nil {
return fmt.Errorf("invalid order, third element must be array: %w", err)
}
if len(third) != 5 {
return fmt.Errorf("invalid order, element 3 length must be 5, got %d", len(third))
}
for i, f := range []interface{}{&o.ID, &o.GID, &o.CID, &o.Type, &o.Amount} {
err = json.Unmarshal(third[i], f)
if err != nil {
return fmt.Errorf("invalid order, wrong type for element 3[%d]: %w", i, err)
}
}
return nil
}
return fmt.Errorf("invalid order, unknown type %q", second)
}
...
var msg *Order
// Server send response: `[0,"on",[6,1,5,"market",45.7]]`
if err := websocket.JSON.Receive(ws, &msg); err != nil {
logger.Println(err)
}
// msg is now an &Order{ID:6, GID:1, CID:5, Type:"market", Amount:45.7}
}
The reason the UnmarshalJSON function is huge is because your API is bad. If you control the Server; then you should avoid using mixed types in the same array, and you should avoid using arrays for relational data.

Update mongo db collection to create new field with unique values without impacting existing data using mongo go driver

I am new to mongo and mongo go driver. Need to add new field "uri" to my collection with existing data - using mongo go driver. New field needs to be populated with unique values so that unique index can be created on it. the collection uses _id as well, if there is a way we can populate new field based on _id field that will work as well.
I am trying below code, not sure how to populate unique values.
//Step1: update all documents to add new field with unique values
_, err := myColl.UpdateMany(
ctx,
bson.D{},// select all docs in collection
bson.D{
{"$set", bson.D{{"uri", GenerateRandomUniqueString()}}},
},
)
if err != nil {
return err
}
// then next step is to create index on this field:
key := bson.D{{"uri", 1}}
opt := options.Index().SetName("uri-index").SetUnique(true)
model := mongo.IndexModel{Keys: key, Options: opt}
_, err = myColl.Indexes().CreateOne(ctx, model)
if err != nil {
return err
}
Once the index is set up, old records will marked read only, but we can not delete those. New data will have unique 'uri' string value.
Any help is much appreciated.
Using above code fails while unique index creation, as the same value is used for backfill.
I tried this as well:
func BackFillUri(db *mongo.Database) error {
myColl := db.Collection("myColl")
ctx := context.Background()
cursor, err := myColl.Find(ctx, bson.M{})
if err != nil {
return err
}
defer cursor.Close(ctx)
for cursor.Next(ctx) {
var ds bson.M
if err = cursor.Decode(&ds); err != nil {
return err
}
_, err1 := myColl.UpdateOne(
ctx,
bson.D{"_id": ds.ObjectId},
bson.D{
{"$set", bson.D{{"uri", rand.Float64()}}},
},
)
if err1 != nil {
return err1
}
}
return nil
}
But i am getting quite a few errors and not sure if any of the above logic is correct
I finally used below code, hope it helps someone who's new like me :-)
const charset = "abcdefghijklmnopqrstuvwxyz" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
func RandomUniqueString(length int) string {
return StringWithCharset(length, charset)
}
var seededRand *rand.Rand = rand.New(
rand.NewSource(time.Now().UnixNano()))
func StringWithCharset(length int, charset string) string {
b := make([]byte, length)
for i := range b {
b[i] = charset[seededRand.Intn(len(charset))]
}
return string(b)
}
// Adds index on uri
func AddUriIndex(db *mongo.Database) error {
mycoll := db.Collection("mycoll")
ctx := context.Background()
//backfill code starts
type attribute struct {
Key string `bson:"key"`
Value interface{} `bson:"value"`
}
type item struct {
ID primitive.ObjectID `bson:"_id"`
ResourceAttributes []attribute `bson:"resourceAttributes,omitempty"`
}
cursor, err := mycoll.Find(ctx, primitive.M{})
if err != nil {
return err
}
defer cursor.Close(ctx)
for cursor.Next(ctx) {
var result item
if err = cursor.Decode(&result); err != nil {
return err
}
//fmt.Println("Found() result:", result)
filter := primitive.M{"_id": result.ID}
update := primitive.M{"$set": primitive.M{"uri": RandomUniqueString(32)}}
if _, err := mycoll.UpdateOne(ctx, filter, update); err != nil {
return err
}
}
//add uri-index starts
key := bson.D{{"uri", 1}}
opt := options.Index().
SetName("uri-index").
SetUnique(true)
model := mongo.IndexModel{Keys: key, Options: opt}
_, err = mycoll.Indexes().CreateOne(ctx, model)
if err != nil {
return err
}
return nil
}

Golang: update slice in loop for empty interface

For example, we have 3 CSV files and common for all is Email column. In first file are Name and Email, in another are Email (plus different info) and no Name field. So, if I need to fill in 2 and 3 files field Name based on the correspondence of the Name and Đ•mail from the first file than... I wrote code like this:
package main
import (
"fmt"
"io/ioutil"
"log"
"path/filepath"
"strings"
"github.com/jszwec/csvutil"
)
type User struct {
Name string `csv:"name"`
Email string `csv:"email"`
}
type Good struct {
User
Dt string `csv:"details"`
}
type Strange struct {
User
St string `csv:"status"`
Dt string `csv:"details"`
}
var lst map[string]string
func readCSV(fn string, dat interface{}) error {
raw, err := ioutil.ReadFile(fn)
if err != nil {
return fmt.Errorf("Cannot read CSV: %w", err)
}
if err := csvutil.Unmarshal(raw, dat); err != nil {
return fmt.Errorf("Cannot unmarshal CSV: %w", err)
}
return nil
}
func fixNames(fl string, in interface{}) error {
if err := readCSV(fl, in); err != nil {
return fmt.Errorf("CSV: %w", err)
}
switch in.(type) {
case *[]Good:
var vals []Good
for _, v := range *in.(*[]Good) {
v.Name = lst[strings.TrimSpace(strings.ToLower(v.Email))]
vals = append(vals, v)
}
in = vals
case *[]Strange:
var vals []Strange
for _, v := range *in.(*[]Strange) {
v.Name = lst[strings.TrimSpace(strings.ToLower(v.Email))]
vals = append(vals, v)
}
in = vals
}
b, err := csvutil.Marshal(in)
if err != nil {
return fmt.Errorf("Cannot marshal CSV: %w", err)
}
ext := filepath.Ext(fl)
bas := filepath.Base(fl)
err = ioutil.WriteFile(bas[:len(bas)-len(ext)]+"-XIAOSE"+ext, b, 0644)
if err != nil {
return fmt.Errorf("Cannot save CSV: %w", err)
}
return nil
}
func main() {
var users []User
if err := readCSV("./Guitar_Contacts.csv", &users); err != nil {
log.Fatalf("CSV: %s", err)
}
lst = make(map[string]string)
for _, v := range users {
lst[strings.TrimSpace(strings.ToLower(v.Email))] = v.Name
}
var usersGood []Good
if err := fixNames("./Guitar-Good.csv", &usersGood); err != nil {
log.Fatalf("fix: %s", err)
}
var usersStrange []Strange
if err := fixNames("./Guitar-Uknown.csv", &usersStrange); err != nil {
log.Fatalf("fix: %s", err)
}
fmt.Println("OK")
}
in this code I don't like part in func fixNames where is switch:
switch in.(type) {
case *[]Good:
var vals []Good
for _, v := range *in.(*[]Good) {
v.Name = lst[strings.TrimSpace(strings.ToLower(v.Email))]
vals = append(vals, v)
}
in = vals
case *[]Strange:
var vals []Strange
for _, v := range *in.(*[]Strange) {
v.Name = lst[strings.TrimSpace(strings.ToLower(v.Email))]
vals = append(vals, v)
}
in = vals
}
because I just repeat code in part where *in.(SOME_TYPE). I want one loop and one action for different types, structs where are Name and Email fields...
Also was idea to do it with reflection smth. like this:
v := reflect.ValueOf(in)
v = v.Elem()
for i := 0; i < v.Len(); i++ {
fmt.Println(v.Index(i))
}
but I do not know what to do next, how to add in that v value for Name
You don't need reflection for this particular case. You can clean the code up by realizing that you are only working on the User part of the structs, and that you can simplify the type switch:
fix:=func(in *User) {
in.Name = lst[strings.TrimSpace(strings.ToLower(in.Email))]
}
switch k:=in.(type) {
case *[]Good:
for i := range *k {
fix( &(*k)[i].User )
}
case *[]Strange:
for i := range *k {
fix( &(*k)[i].User )
}
}
You have to repeat the for loop, but above code does the correction in place.
You can clean up a bit more by not passing a reference to the slice.
With reflect package, you can do that like this.
func fixNames(fl string, in interface{}) error {
//other code
v := reflect.ValueOf(in)
if v.Kind() == reflect.Ptr {
arr := v.Elem()
fmt.Println(arr.Len())
if arr.Kind() == reflect.Slice || arr.Kind() == reflect.Array {
for i := 0; i < arr.Len(); i++ {
elem := arr.Index(i)
f := elem.FieldByName("Name")
f.SetString("NameOfUser")
}
}
}
// other code
}
Also playground example: https://play.golang.org/p/KrGvLVprslH

Resources