It may be a stupid question because I just learned Golang. I hope you understand.
I am making a program to extract data from the homepage using the goquery package:
package main
import (
"fmt"
"log"
"net/http"
"github.com/PuerkitoBio/goquery"
)
var url string = "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=3"
func main() {
getPages()
}
func getPages() int {
res, err := http.Get(url)
checkErr(err)
checkCode(res)
defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
checkErr(err)
doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
fmt.Println(s.Find("a"))
})
return 0
}
func checkErr(err error) {
if err != nil {
log.Fatalln(err)
fmt.Println(err)
}
}
func checkCode(res *http.Response) {
if res.StatusCode != 200 {
log.Fatalln("Request failed with statusCode:", res.StatusCode)
}
}
It prints below:
&{[0x140002db0a0 0x140002db570 0x140002db810 0x140002dbd50 0x140002dc000 0x140002dc2a0 0x140002dc540 0x140002dc850] 0x140000b2438 0x14000305680}
&{[0x140002dcd90 0x140002dd810] 0x140000b2438 0x14000305710}
But I just want to print only the first array out. Like this:
[0x140002dcd90 0x140002dd810]
How can I destruct them?
The problem is that you are printing as result is matched.
You can save the *goquery.Selection in a new slice and print only the last element. This example is working because you want the last occurrence, but in real life you must parse the query result for something in specific to not depend about result order.
// type Selection struct {
// Nodes []*html.Node
// document *Document
// prevSel *Selection
// }
var temp []*goquery.Selection
temp = append(temp, doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
s.Find("a")
}))
fmt.Printf("last: %v\n", temp[len(temp)-1])
temp[len(temp)-1]: &{[0xc0002dcd90 0xc0002e0a80] 0xc00000e3f0 0xc000309770}
The Nodes []*html.Node can be accessed with same example:
fmt.Printf("last: %v\n", temp[len(temp)-1].Nodes)
As per your comment you were looking to parse the page and get the number of pages and number of posts. Here is my attempt:
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"math"
"net/http"
"strconv"
"strings"
)
func errCheck(err error) {
if err != nil {
log.Fatal(err)
}
}
func ExampleScrape() {
url := "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=%s"
page := 3
fmt.Println("Current page:", page)
res, err := http.Get(fmt.Sprintf(url, page))
errCheck(err)
defer res.Body.Close()
if res.StatusCode != 200 {
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
errCheck(err)
posts_div := doc.Find(".recruit-info div.dev_list.lists-cnt")
total_count_div := posts_div.Nodes[0]
var total_count int
for _, a := range total_count_div.Attr {
if a.Key == "total-count" {
total_count, err = strconv.Atoi(a.Val)
errCheck(err)
break
}
}
fmt.Println("Total count:", total_count)
titles := posts_div.Find(".list-post .title")
fmt.Println("On this page:", len(titles.Nodes))
fmt.Println("Pages:", math.Ceil(float64(total_count)/float64(len(titles.Nodes))))
fmt.Println("\nTitles on this page:")
titles.Each(func(i int, s *goquery.Selection) {
fmt.Println("\t-", strings.TrimSpace(s.Text()))
})
}
func main() {
ExampleScrape()
}
Related
I'm trying to develop a Terraform provider but I have a problem of the first request body. Here is the code:
type Body struct {
id string
}
func resourceServerCreate(d *schema.ResourceData, m interface{}) error {
key := d.Get("key").(string)
token := d.Get("token").(string)
workspace_name := d.Get("workspace_name").(string)
board_name := d.Get("board_name").(string)
resp, err := http.Post("https://api.trello.com/1/organizations?key="+key+"&token="+token+"&displayName="+workspace_name,"application/json",nil)
if err != nil {
log.Fatalln(err)
}
defer resp.Body.Close()
//lettura body.
body := new(Body)
json.NewDecoder(resp.Body).Decode(body)
log.Println("[ORCA MADONNA] il log funzia "+body.id)
d.Set("board_id",body.id)
resp1, err1 := http.Post("https://api.trello.com/1/boards?key="+key+"&token="+token+"&idOrganization="+body.id+"&=&name="+board_name,"application/json",nil)
if err1 != nil {
log.Fatalln(resp1)
}
defer resp1.Body.Close()
d.SetId(board_name)
return resourceServerRead(d, m)
}
In the log is empty, but the second call have it and work fine. How is it possible?
Go doesn't force you to check error responses, therefore it's easy to make silly mistakes. Had you checked the return value from Decode(), you would have immediately discovered a problem.
err := json.NewDecoder(resp.Body).Decode(body)
if err != nil {
log.Fatal("Decode error: ", err)
}
Decode error: json: Unmarshal(non-pointer main.Body)
So your most immediate fix is to use & to pass a pointer to Decode():
json.NewDecoder(resp.Body).Decode(&body)
Also of note, some programming editors will highlight this mistake for you:
Here's a working demonstration, including a corrected Body structure as described at json.Marshal(struct) returns “{}”:
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"net/http"
"time"
)
type JSON = map[string]interface{}
type JSONArray = []interface{}
func ErrFatal(err error, msg string) {
if err != nil {
log.Fatal(msg+": ", err)
}
}
func handleTestRequest(w http.ResponseWriter, req *http.Request) {
w.Write(([]byte)("{\"id\":\"yourid\"}"))
}
func launchTestServer() {
http.HandleFunc("/", handleTestRequest)
go http.ListenAndServe(":8080", nil)
time.Sleep(1 * time.Second) // allow server to get started
}
// Medium: "Don’t use Go’s default HTTP client (in production)"
var restClient = &http.Client{
Timeout: time.Second * 10,
}
func DoREST(method, url string, headers, payload JSON) *http.Response {
requestPayload, err := json.Marshal(payload)
ErrFatal(err, "json.Marshal(payload")
request, err := http.NewRequest(method, url, bytes.NewBuffer(requestPayload))
ErrFatal(err, "NewRequest "+method+" "+url)
for k, v := range headers {
request.Header.Add(k, v.(string))
}
response, err := restClient.Do(request)
ErrFatal(err, "DoRest client.Do")
return response
}
type Body struct {
Id string `json:"id"`
}
func clientDemo() {
response := DoREST("POST", "http://localhost:8080", JSON{}, JSON{})
defer response.Body.Close()
var body Body
err := json.NewDecoder(response.Body).Decode(&body)
ErrFatal(err, "Decode")
fmt.Printf("Body: %#v\n", body)
}
func main() {
launchTestServer()
for i := 0; i < 5; i++ {
clientDemo()
}
}
Running the below code, the stack trace is outputted with the line number of fmt.Print(...). But I want to output the line of logError(err). I think I need to call xerrors.Caller(1) to do that but I don't know how. Help me.
import (
"fmt"
"io/ioutil"
"golang.org/x/xerrors"
)
func main() {
_, err := ioutil.ReadFile("")
if err != nil {
logError(err)
return
}
}
func logError(err error) {
fmt.Printf("%+v", xerrors.Errorf(": %w", err))
}
I think what you want is:
import (
"fmt"
"io/ioutil"
"golang.org/x/xerrors"
)
func doWhatever() error {
_, err := ioutil.ReadFile("")
if err != nil {
return xerrors.Errorf("failed doing whatever: %w", err)
}
return nil
}
func main() {
err := doWhatever()
if err != nil {
logError(err)
return
}
}
func logError(err error) {
fmt.Printf("%+v", err)
}
I made this to monitor a few websites and notify me if one of them goes down. I'm testing it on just two urls. When it starts it uses about 5mb of memory (I checked with systemctl status monitor). After 40 minutes, it's using 7.4mb. After 8 hours, it uses over 50mb of memory. Why is it doing this? Is this called a memory leak?
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"sync"
"time"
"monitor/utils/slack"
"gopkg.in/yaml.v2"
)
var config struct {
Frequency int
Urls []string
}
type statusType struct {
values map[string]int
mux sync.Mutex
}
var status = statusType{values: make(map[string]int)}
func (s *statusType) set(url string, value int) {
s.mux.Lock()
s.values[url] = value
s.mux.Unlock()
}
func init() {
data, err := ioutil.ReadFile("config.yaml")
if err != nil {
fmt.Printf("Invalid config: %s\n", err)
os.Exit(0)
}
err = yaml.Unmarshal(data, &config)
if err != nil {
fmt.Printf("Invalid config: %s\n", err)
os.Exit(0)
}
for _, url := range config.Urls {
status.set(url, 200)
}
}
func main() {
ticker := time.NewTicker(time.Duration(config.Frequency) * time.Second)
for _ = range ticker.C {
for _, url := range config.Urls {
go check(url)
}
}
}
func check(url string) {
res, err := http.Get(url)
if err != nil {
res = &http.Response{StatusCode: 500}
}
// the memory problem occurs when this condition is never satisfied, so I didn't post the slack package.
if res.StatusCode != status.values[url] {
status.set(url, res.StatusCode)
err := slack.Alert(url, res.StatusCode)
if err != nil {
fmt.Println(err)
}
}
}
If this belongs in Code Review then I will put it there.
Yes, this is a memory leak. One obvious source I can spot is that you're not closing the response bodies from your requests:
func check(url string) {
res, err := http.Get(url)
if err != nil {
res = &http.Response{StatusCode: 500}
} else {
defer res.Body.Close() // You need to close the response body!
}
if res.StatusCode != status.values[url] {
status.set(url, res.StatusCode)
err := slack.Alert(url, res.StatusCode)
if err != nil {
fmt.Println(err)
}
}
}
Better still, so that Go can use keepalive, you want to read the full body and close it:
defer func() {
io.Copy(ioutil.Discard, res.Body)
res.Body.Close()
}()
You can further analyse where memory usage is coming from by profiling your application with pprof. There's a good rundown on the Go blog and a web search will turn up many more articles on the topic.
Is there an easy way to get the permanent MAC Address using Go?
package main
import (
"fmt"
"log"
"net"
)
func getMacAddr() ([]string, error) {
ifas, err := net.Interfaces()
if err != nil {
return nil, err
}
var as []string
for _, ifa := range ifas {
a := ifa.HardwareAddr.String()
if a != "" {
as = append(as, a)
}
}
return as, nil
}
func main() {
as, err := getMacAddr()
if err != nil {
log.Fatal(err)
}
for _, a := range as {
fmt.Println(a)
}
}
I'm trying to get the .mp4 video source of a vine, using GoQuery. However when I run it, I get nothing, no error, or return. Just a blank line.
package main
import (
"fmt"
"log"
"github.com/PuerkitoBio/goquery"
)
func getMP4URL() {
doc, err := goquery.NewDocument("https://vine.co/v/MlWtKgwh7WY")
if err != nil {
log.Fatal(err)
}
doc.Find(".vine-video-container").Each(func(i int, s *goquery.Selection) {
mp4, _ := s.Find("video").Attr("src")
fmt.Printf("MP4: %s", mp4)
})
}
func main() {
getMP4URL()
}
Is this a problem with my code, or with vine itself?
seems that vine adds that id with javascript
if I add
html, err := doc.Html()
if err != nil {
log.Fatal(err)
}
log.Println(html)
before doc.Find there is no .vine-video-container in the html output
try this code :)
package main
import (
"fmt"
"log"
"github.com/PuerkitoBio/goquery"
)
func getMP4URL() {
doc, err := goquery.NewDocument("https://vine.co/v/MlWtKgwh7WY")
if err != nil {
log.Fatal(err)
}
doc.Find("meta").Each(func(i int, s *goquery.Selection) {
op, _ := s.Attr("itemprop")
if op == "contentURL" {
fmt.Println(s.Attr("content"))
}
})
}
func main() {
getMP4URL()
}
Vine embed the metadata of the video in JSON format in the <script type="application/ld+json">. So you need to extract the JSON blob from the tag and decode the JSON to get the src of the video.
The following is complete working code to get the src URL of Vine video:
package main
import (
"encoding/json"
"github.com/PuerkitoBio/goquery"
)
type SharedContent struct {
ContentUrl string `json:"contentUrl"`
}
type VineVideoMetadata struct {
SC SharedContent `json:"sharedContent"`
}
func DecodeVineJsonBlob(blob string) VineVideoMetadata {
meta := VineVideoMetadata{}
err := json.Unmarshal([]byte(blob), &meta)
if err != nil {
panic(err)
}
return meta
}
func GetVineVideoJsonBlob(url string) string {
doc, err := goquery.NewDocument(url)
if err != nil {
panic(err)
}
return doc.Find("script[type=\"application/ld+json\"]").Text()
}
func GetVineVideoSrc(url string) string {
jsonBlob := GetVineVideoJsonBlob(url)
meta := DecodeVineJsonBlob(jsonBlob)
return meta.SC.ContentUrl
}
func main() {
println(GetVineVideoSrc("https://vine.co/v/MlWtKgwh7WY"))
}
output:
https://mtc.cdn.vine.co/r/videos/67FAC9DFA21115619347885645824_22a564aec15.5.0.17428816123715427422.mp4?versionId=4zcm5ySoFhqUQBXU7Ehm3YOuOSjFbkg3