I'm about to manipulate XMLs (without schema) in Go and my goal is to achieve a simple framework without a hardwired internal representation of xml's structure.
I have a code like this:
package main
import (
"bytes"
"encoding/xml"
"io"
"io/ioutil"
"log"
"os"
)
var decoder *xml.Decoder
var encoder *xml.Encoder
type Tokens struct {
root xml.Token
children []Tokens
}
type Tokenizer interface {
toStr() string
}
func setTokens(t xml.Token) Tokens {
res := Tokens{}
res.root = t
res.children = make([]Tokens, 0)
for {
tok, err := decoder.Token()
if err == io.EOF {
break
}
t := setTokens(xml.CopyToken(tok))
res.children = append(res.children, t)
}
return res
}
func (t Tokens) toStr() {
encoder.EncodeToken(t.root)
for i := range t.children {
t.children[i].toStr()
}
}
func main() {
fc, err := ioutil.ReadFile("testXML.xml")
if err != nil {
log.Fatal(err)
return
}
out, err := os.Create("./testOut.xml")
decoder = xml.NewDecoder(bytes.NewReader(fc))
encoder = xml.NewEncoder(out)
t, err := decoder.Token()
if err == io.EOF {
return
}
tokens := setTokens(t)
tokens.toStr()
encoder.Encode(t)
}
An input xml like this:
<ParamSection SectVersion="1">
<Version>22</Version>
</ParamSection>
And an output like this:
<ParamSection SectVersion="1">
<Version>22</Version>
</ParamSection><StartElement><Name></Name><Attr><Name></Name><Value>1</Value></Attr></StartElement>
Which is more than nothing. There are two obvious problems:
istead of a \t (tabulator)
<StartElement><Name></Name><Attr><Name></Name><Value>1</Value></Attr></StartElement>
instead of nothing (Encoder seems to append the attribs of the first
field).
Any idea what is the problem?
Related
I have a struct which contains a type based on an enum. I am trying to render it to a user friendly string. Here's minimum viable code:
package main
import (
"fmt"
"gopkg.in/yaml.v3"
)
type Job struct {
Engine Engine `json:"Engine" yaml:"Engine"`
}
//go:generate stringer -type=Engine --trimprefix=Engine
type Engine int
const (
engineUnknown Engine = iota // must be first
EngineDocker
engineDone // must be last
)
func main() {
j := Job{Engine: EngineDocker}
fmt.Printf("%+v\n\n", j)
out, _ := yaml.Marshal(j)
fmt.Println(string(out))
}
Here's the generated code:
// Code generated by "stringer -type=Engine --trimprefix=Engine"; DO NOT EDIT.
package main
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[engineUnknown-0]
_ = x[EngineDocker-1]
_ = x[engineDone-2]
}
const _Engine_name = "engineUnknownDockerengineDone"
var _Engine_index = [...]uint8{0, 13, 19, 29}
func (i Engine) String() string {
if i < 0 || i >= Engine(len(_Engine_index)-1) {
return "Engine(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Engine_name[_Engine_index[i]:_Engine_index[i+1]]
}
Here's the output:
{Engine:1}
Engine: 1
Here's what I'd like the output to be:
{Engine:Docker}
Engine: Docker
I thought the String() in the generated file would accomplish this. Is there any way to do this? Thanks!
yaml marshaler doesn't use String method. Instead YAML uses encoding.TextMarshaler and encoding.TextUnmarshaler interfaces. Actually, all other codec schemes - JSON, XML, TOML, etc. - use those interfaces to read/write the values. So, if you implement those methods for your type, you will receive all other codecs for free.
Here is an example how to make a human-readable encoding for your enum: https://go.dev/play/p/pEcBmAM-oZJ
type Engine int
const (
engineUnknown Engine = iota // must be first
EngineDocker
engineDone // must be last
)
var engineNames []string
var engineNameToValue map[string]Engine
func init() {
engineNames = []string{"Unknown", "Docker"}
engineNameToValue = make(map[string]Engine)
for i, name := range engineNames {
engineNameToValue[strings.ToLower(name)] = Engine(i)
}
}
func (e Engine) String() string {
if e < 0 || int(e) >= len(engineNames) {
panic(fmt.Errorf("Invalid engine code: %d", e))
}
return engineNames[e]
}
func ParseEngine(text string) (Engine, error) {
i, ok := engineNameToValue[strings.ToLower(text)]
if !ok {
return engineUnknown, fmt.Errorf("Invalid engine name: %s", text)
}
return i, nil
}
func (e Engine) MarshalText() ([]byte, error) {
return []byte(e.String()), nil
}
func (e *Engine) UnmarshalText(text []byte) (err error) {
name := string(text)
*e, err = ParseEngine(name)
return
}
How it works:
func main() {
j := Job{Engine: EngineDocker}
fmt.Printf("%#v\n\n", j)
out, err := yaml.Marshal(j)
if err != nil {
panic(err)
}
fmt.Printf("YAML: %s\n", string(out))
var jj Job
err = yaml.Unmarshal(out, &jj)
if err != nil {
panic(err)
}
fmt.Printf("%#v\n\n", jj)
// == JSON ==
out, err = json.Marshal(j)
if err != nil {
panic(err)
}
fmt.Printf("JSON: %s\n", string(out))
var jjs Job
err = json.Unmarshal(out, &jjs)
if err != nil {
panic(err)
}
fmt.Printf("%#v\n\n", jjs)
}
the output
main.Job{Engine:1}
YAML: Engine: Docker
main.Job{Engine:1}
JSON: {"Engine":"Docker"}
main.Job{Engine:1}
See? It writes and reads strings to both YAML and JSON without any extra effort.
So I was trying to create a mock DB, and in the current implementation, I am trying to make an insert and select which insert rows and select returns them. I decided to use a bytes.Buffer to help maintain a memory block I could insert a slice of rows in, and deserialize that memory block when I call select but it seems select just returns the first row instead of all the rows passed to the array.
main.go
func main() {
inputBuffer := compiler.NewInputBuffer()
scanner := bufio.NewScanner(os.Stdin)
for {
PrintPrompt()
scanner.Scan()
command := scanner.Text()
inputBuffer.Buffer = command
if strings.HasPrefix(inputBuffer.Buffer, ".") {
switch compiler.DoMetaCommand(inputBuffer) {
case compiler.MetaCommandSuccess:
continue
case compiler.MetaCommandUnrecognizedCommand:
fmt.Printf("Unrecognized command %q \n", inputBuffer.Buffer)
continue
}
}
var statement compiler.Statement
switch compiler.PrepareStatement(inputBuffer, &statement) {
case compiler.PrepareSuccess:
case compiler.PrepareUnrecognizedStatement:
fmt.Printf("Unrecognized command at start of %q \n", inputBuffer.Buffer)
continue
case compiler.PrepareSyntaxError:
fmt.Println("Syntax error. Could not parse statement.")
continue
}
compiler.ExecuteStatement(statement)
fmt.Println("Executed")
}
}
func PrintPrompt() {
fmt.Printf("db > ")
}
Above is the code responsible for collecting user input etc.
package compiler
import (
"bytes"
"log"
"os"
"strconv"
"strings"
)
type Row struct {
ID int32
Username string
Email string
}
type Statement struct {
RowToInsert Row
Type StatementType
}
var (
RowsTable = make([]Row, 0)
RowsTableBuffer bytes.Buffer
)
func DoMetaCommand(buffer InputBuffer) MetaCommandResult {
if buffer.Buffer == ".exit" {
os.Exit(0)
} else {
return MetaCommandUnrecognizedCommand
}
return MetaCommandSuccess
}
func PrepareStatement(buffer InputBuffer, statement *Statement) PrepareResult {
if len(buffer.Buffer) > 6 {
bufferArguments := strings.Fields(buffer.Buffer)
if bufferArguments[0] == "insert" {
statement.Type = StatementInsert
if len(bufferArguments) < 4 {
return PrepareSyntaxError
} else {
i, err := strconv.Atoi(bufferArguments[1])
if err != nil {
log.Printf("%q is not a valid id\n", bufferArguments[1])
return PrepareSyntaxError
} else {
statement.RowToInsert.ID = int32(i)
}
statement.RowToInsert.Username = bufferArguments[2]
statement.RowToInsert.Email = bufferArguments[3]
}
RowsTable = append(RowsTable, statement.RowToInsert)
return PrepareSuccess
}
}
if buffer.Buffer == "select" {
statement.Type = StatementSelect
return PrepareSuccess
}
return PrepareUnrecognizedStatement
}
func ExecuteStatement(statement Statement) {
switch statement.Type {
case (StatementInsert):
SerializeRow(RowsTable)
case (StatementSelect):
DeserializeRow()
}
}
The code above is for parsing and appending the entries into statements and depending on the keywords, it's either an insert or select [Took the code for defining enums out and left core logic]
func SerializeRow(r []Row) {
encoder := gob.NewEncoder(&RowsTableBuffer)
err := encoder.Encode(r)
if err != nil {
log.Println("encode error:", err)
}
}
func DeserializeRow() {
var rowsBuffer = RowsTableBuffer
rowsTable := make([]Row, 0)
decoder := gob.NewDecoder(&rowsBuffer)
err := decoder.Decode(&rowsTable)
if err != nil {
log.Println("decode error:", err)
}
fmt.Println(rowsTable)
}
So the code above uses a global buffer in which the slice being appended to in PrepareStatement()will be encoded after an insert is done. A select ought to return the slice of all rows but just returns the first element for some reason.
Example (in terminal):
If I make 2 inserts:
db > insert 1 john c#mail.com
db > insert 2 collins k#mail.com
Then I make a select:
select
=> it returns [{1 john c#mail.com}] only.
Is there anything I am missing here? Thanks for your support.
So the answer was pretty simple. We were creating a new encoder in the SerializeRow function instead of creating it once. We pulled it out of the function and created a global.
var (
encoder = gob.NewEncoder(&RowsTableBuffer)
decoder = gob.NewDecoder(&RowsTableBuffer)
)
func SerializeRow(r Row) {
err := encoder.Encode(r)
if err != nil {
log.Println("encode error:", err)
}
}
func DeserializeRow() {
var rows Row
err := decoder.Decode(&rows)
for err == nil {
if err != nil {
log.Fatal("decode error:", err)
}
fmt.Printf("%d %s %s\n", rows.ID, rows.Username, rows.Email)
err = decoder.Decode(&rows)
}
}
Query Api and response a custom JSON, how to Unmarshal it. the sample JSON:
{"14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu": {
"final_balance": 61914248289,
"n_tx": 3472,
"total_received": 3479994002972
}}
The key is a hex string. So how to handle it with golang convention, anyone can help me?
Below is my try test code:
c.OnResponse(func(r *colly.Response) {
jsonData := r.Body
fmt.Println(string(jsonData))
fmt.Println("==================")
//parse bitcoin json
jsonMap := make(map[string]interface{})
err := json.Unmarshal([]byte(jsonData), &jsonMap)
if err != nil {
panic(err)
}
fmt.Println(jsonMap)
dumpMap("", jsonMap)
})
func dumpMap(space string, m map[string]interface{}) {
for k, v := range m {
if mv, ok := v.(map[string]interface{}); ok {
fmt.Printf("{ \"%v\": \n", k)
dumpMap(space+"\t", mv)
fmt.Printf("}\n")
} else {
fmt.Printf("%v %v : %v\n", space, k, v)
}
}
}
and go run cmd/main.go, the console is print here:
{"14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu": {
"final_balance": 75494521080,
"n_tx": 3493,
"total_received": 3493574275763
}}
==================
map[14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu:map[n_tx:3493 total_received:3.493574275763e+12 final_balance:7.549452108e+10]]
{ "14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu":
final_balance : 7.549452108e+10
n_tx : 3493
total_received : 3.493574275763e+12
}
Do I need customised unmarshal func to get string key? If I use 14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu as key I can't easily to access. I just want to know how handle it.
you can unmarshal it into map, so you can get generated key as a key of map
https://play.golang.org/p/IfEjjvKakpu
package main
import (
"encoding/json"
"fmt"
"log"
)
var input = `{"14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu": {
"final_balance": 61914248289,
"n_tx": 3472,
"total_received": 3479994002972
}}`
type object struct {
FinalBalance uint64 `json:"final_balance"`
NTX uint64 `json:"n_tx"`
TotalReceived uint64 `json:"total_received"`
}
func main() {
var result map[string]object;
err := json.Unmarshal([]byte(input), &result);
if err != nil {
log.Fatal(err)
}
fmt.Printf("result: %+v", result)
// result: map[14AcKEr19gHJvgwQhK7sfFm6YJGmoZZoqu:{FinalBalance:61914248289 NTX:3472 TotalReceived:3479994002972}]
}
I'm parsing through the triples of the Freebase RDF compressed and streaming with the XML package in Golang. However, I'm getting an out of memory error.
Do I have to garbage-collect? How can I do that? How can I clear the memory after I'm doing writing that triple to the XML file?
Here's my code: http://play.golang.org/p/dWvbtcs7wy
package main
import(
"bufio"
"flag"
"fmt"
"io"
"net/url"
"os"
"regexp"
"strings"
)
var inputFile = flag.String("infile", "freebase-rdf", "Input file path")
var filter, _ = regexp.Compile("^file:.*|^talk:.*|^special:.*|^wikipedia:.*|^wiktionary:.*|^user:.*|^user_talk:.*")
type Redirect struct {
Title string `xml:"title,attr"`
}
type Page struct {
Title string `xml:"title"`
Abstract string `xml:""`
}
func CanonicaliseTitle(title string) string{
can := strings.ToLower(title)
can = strings.Replace(can, " ", "_", -1)
can = url.QueryEscape(can)
return can
}
func convertFreebaseId(uri string) string{
if strings.HasPrefix(uri, "<") && strings.HasSuffix(uri, ">") {
var id = uri[1 : len(uri)-1]
id = strings.Replace(id, "http://rdf.freebase.com/ns", "", -1)
id = strings.Replace(id, ".", "/", -1)
return id
}
return uri
}
func parseTriple(line string) (string, string, string){
var parts = strings.Split(line, "\t")
subject := convertFreebaseId(parts[0])
predicate := convertFreebaseId(parts[1])
object := convertFreebaseId(parts[2])
return subject, predicate, object
}
var (
validRegexp = regexp.MustCompile("^[A-Za-z0-9][A-Za-z0-9_-]*$")
englishRegexp = regexp.MustCompile("#en")
)
func validTitle(content []string) bool{
for _, v := range content{
if !englishRegexp.MatchString(v) && len(v) > 1 && strings.Index(v, "[]") != -1{
}
}
return true
}
func validText(content []string) bool{
for _, v := range content{
if !validRegexp.MatchString(v) && len(v) > 1 && strings.Index(v, "[]") != -1{
return false
}
}
return true
}
func processTopic(id string, properties map[string][]string, file io.Writer){
if validTitle(properties["/type/object/name"]) && validText(properties["/common/document/text"]){
fmt.Fprintf(file, "<card>\n")
fmt.Fprintf(file, "<title>\"%s\"</title>\n", properties["/type/object/name"])
fmt.Fprintf(file, "<image>\"%s/%s\"</image>\n", "https://usercontent.googleapis.com/freebase/v1/image", id)
fmt.Fprintf(file, "<text>\"%s\"</text>\n", properties["/common/document/text"])
fmt.Fprintf(file, "<facts>\n")
for k, v := range properties{
for _, value := range v{
fmt.Fprintf(file, "<fact property=\"%s\">%s</fact>\n", k, value)
}
}
fmt.Fprintf(file, "</facts>\n")
fmt.Fprintf(file, "</card>\n")
}
}
func main(){
var current_mid = ""
current_topic := make(map[string][]string)
f, err := os.Open(*inputFile)
if err != nil {
fmt.Println(err)
return
}
r := bufio.NewReader(f)
xmlFile, _ := os.Create("freebase.xml")
line, err := r.ReadString('\n')
for err == nil{
subject, predicate, object := parseTriple(line)
if subject == current_mid{
current_topic[predicate] = append(current_topic[predicate], object)
}else if len(current_mid) > 0{
processTopic(current_mid, current_topic, xmlFile)
current_topic = make(map[string][]string)
}
current_mid = subject
line, err = r.ReadString('\n')
}
processTopic(current_mid, current_topic, xmlFile)
if err != io.EOF {
fmt.Println(err)
return
}
}
I'm not sure that this is your problem, although reading your code it seems you're not leaking anything - but you can tune GC behavior a bit with SetGCPercent() http://golang.org/pkg/runtime/debug/#SetGCPercent
According to TFM, a collection is triggered when the ratio of freshly allocated data to live data remaining after the previous collection reaches this percentage.. The default rate is 100%, meaning for programs that make lots of small allocations and hold lots of RAM, the overhead can be huge. I had an HTTP cache take up over 200% the cache size once. Try tuning the percentage to somewhere around 10% and see if it helps.
Is there a way to unmarshal XML tags with dynamic attributes (I don't know which attributes I'll get every time).
Maybe it's not supported yet. See Issue 3633: encoding/xml: support for collecting all attributes
Something like :
package main
import (
"encoding/xml"
"fmt"
)
func main() {
var v struct {
Attributes []xml.Attr `xml:",any"`
}
data := `<TAG ATTR1="VALUE1" ATTR2="VALUE2" />`
err := xml.Unmarshal([]byte(data), &v)
if err != nil {
panic(err)
}
fmt.Println(v)
}
As of late 2017, this is supported by using:
var v struct {
Attributes []xml.Attr `xml:",any,attr"`
}
Please see https://github.com/golang/go/issues/3633
You need to implement your own XMLUnmarshaler
package main
import (
"encoding/xml"
"fmt"
)
type CustomTag struct {
Name string
Attributes []xml.Attr
}
func (c *CustomTag) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
c.Name = start.Name.Local
c.Attributes = start.Attr
return d.Skip()
}
func main() {
v := &CustomTag{}
data := []byte(`<tag ATTR1="VALUE1" ATTR2="VALUE2" />`)
err := xml.Unmarshal(data, &v)
if err != nil {
panic(err)
}
fmt.Printf("%+v\n", v)
}
outputs
&{Name:tag Attributes:[{Name:{Space: Local:ATTR1} Value:VALUE1} {Name:{Space: Local:ATTR2} Value:VALUE2}]}
http://play.golang.org/p/9ZrpIT32o_