I have created a simple scraper that takes the top 10 news from a website and returns a JSON with the title and the score. I want to pass the title and the score as HTML template so I can generate a webpage. I'm not familiar with the templating Go language and I don't know how to pass the values for each of the links. Here is the HTML code that I should use and my implementation for now:
<!DOCTYPE html>
<html>
<head><linkrel="stylesheet" href="https://unpkg.com/mvp.css"
/>
</head>
<body>
<h1>{{.PageTitle}}</h1>
<ul>
{{range .Links}}
<li>{{.Title}}: {{.Score}}</li>
{{end}}
</ul>
</body>
</html>
My code:
package main
import (
"encoding/json"
"html/template"
"log"
"net/http"
"strconv"
)
type TopStories struct {
Title string `json:"title"`
Score int `json:"score"`
}
type TopStoriesPayload struct {
TopStories []TopStories
}
type NewsScraper struct {
url string
Data []TopStories
}
type templateData struct {
PageTitle string
Data []TopStories
}
func NewNewsScraper(url string) *NewsScraper {
return &NewsScraper{url: url}
}
func Top10Stories() []string {
req, err := http.NewRequest("GET", "https://hacker-news.firebaseio.com/v0/topstories.json", nil)
if err != nil {
log.Fatal(err)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
var IDs []int
json.NewDecoder(resp.Body).Decode(&IDs)
IDs = IDs[:10]
var IDsString []string
for _, id := range IDs {
IDsString = append(IDsString, strconv.Itoa(id))
}
return IDsString
}
func (n *NewsScraper) GetTopStories() {
req, err := http.NewRequest("GET", n.url, nil)
if err != nil {
log.Fatal(err)
}
for _, id := range Top10Stories() {
req.URL.Path = "/v0/item/" + id + ".json"
resp, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
var topStory TopStories
json.NewDecoder(resp.Body).Decode(&topStory)
n.Data = append(n.Data, topStory)
}
}
//create html template handler for top stories
func HTMLHandler(w http.ResponseWriter, r *http.Request) {
scraper := NewNewsScraper("https://hacker-news.firebaseio.com")
scraper.GetTopStories()
tmpl:= template.Must(template.ParseFiles("template.html"))
data := templateData{
PageTitle: "Top Stories",
Data :[]TopStories{
//what should I put here?
},
}
tmpl.Execute(w, data)
}
func main() {
mux := http.NewServeMux()
mux.HandleFunc("/top", HTMLHandler)
http.ListenAndServe(":8080", mux)
}
I see three issues with your code:
a) The template.html file should have space between link & rel
<linkrel="stylesheet" href="https://unpkg.com/mvp.css"/>
to
<link rel="stylesheet" href="https://unpkg.com/mvp.css"/>
b) The template.html file should contain .Data instead of .Links.
c) The go code should be replaced from the below
Data :[]TopStories{
//what should I put here?
},
to
Data : scraper.Data,
Related
Trying to move my golang html templates from files to using embed
Works fine:
func loadTemplates() multitemplate.Render {
r := multitemplate.New()
layouts, err := filepath.Glob("templates/layouts/*.tmpl")
if err != nil {
panic(err.Error())
}
includes, err := filepath.Glob("templates/includes/*.tmpl")
if err != nil {
panic(err.Error())
}
// Generate our templates map from our layouts/ and includes/ directories
for _, layout := range layouts {
files := append(includes, layout)
r.Add(filepath.Base(layout), template.Must(template.ParseFiles(files...)))
log.Println(filepath.Base(layout) + ": " + files[0])
}
return r
}
Very similar code returns blank page, no errors:
//go:embed templates/*
var f embed.FS
func loadTemplates() multitemplate.Render {
r := multitemplate.New()
// Generate our templates map from our layouts/ and includes/ directories
layouts, err := embed.FS.ReadDir(f, "templates/layouts")
if err != nil {
panic(err.Error())
}
for _, layout := range layouts {
embeddedTemplate, err := template.ParseFS(f, "templates/layouts/"+layout.Name(), "templates/includes/base.tmpl")
if err != nil {
log.Println(err)
}
r.Add(layout.Name(), embeddedTemplate)
log.Println(layout.Name() + " loaded")
}
return r
}
I confirmed in the debugger that all templates contain no errors and their respective content. Other embedded files such as static assets work fine and get served ok. Even other templates loaded from a database work fine. Just those from embed end up blank.
Any hints what's happening here?
Thanks!
Edit: Full example:
main.go
package main
import (
"embed"
"html/template"
"log"
"path/filepath"
"github.com/gin-contrib/multitemplate"
"github.com/gin-gonic/gin"
)
//go:embed templates/*
var f embed.FS
func main() {
router := gin.Default()
router.HTMLRender = loadTemplates()
router.GET("/embed", HomeHandlerEmbed(router))
router.GET("/file", HomeHandlerFile(router))
router.Run(":8080")
}
func loadTemplates() multitemplate.Render {
r := multitemplate.New()
//load same template from embed FS
embeddedTemplate, err := template.ParseFS(f, "templates/layouts/home.tmpl", "templates/includes/base.tmpl")
if err != nil {
log.Println(err)
}
r.Add("homeEmbed.tmpl", embeddedTemplate)
log.Println("homeEmbed.tmpl" + " loaded from embed FS")
// load same template from real file system
layoutsFile, err := filepath.Glob("templates/layouts/*.tmpl")
if err != nil {
panic(err.Error())
}
includes, err := filepath.Glob("templates/includes/*.tmpl")
if err != nil {
panic(err.Error())
}
for _, layout := range layoutsFile {
files := append(includes, layout)
r.Add(filepath.Base(layout), template.Must(template.ParseFiles(files...)))
log.Println(filepath.Base(layout) + ": " + files[0])
}
return r
}
func HomeHandlerEmbed(r *gin.Engine) gin.HandlerFunc {
return gin.HandlerFunc(func(c *gin.Context) {
c.HTML(200, "homeEmbed.tmpl", nil)
})
}
func HomeHandlerFile(r *gin.Engine) gin.HandlerFunc {
return gin.HandlerFunc(func(c *gin.Context) {
c.HTML(200, "home.tmpl", nil)
})
}
templates/includes/base.tmpl
<!DOCTYPE html>
<html>
<head>
{{template "head" .}}
</head>
<body>
{{template "body" .}}
</body>
</html>
templates/layouts/home.tmpl
{{define "head"}}<title>Test</title>{{end}}
{{define "body"}}
Body
{{end}}
/file works fine, /embed comes up blank
In function loadTemplates() just fix this line:
embeddedTemplate, err := template.ParseFS(f, "templates/includes/base.tmpl", "templates/layouts/home.tmpl")
In your example patterns will be presented in this sequence:
first: "templates/layouts/home.tmpl"
second: "templates/includes/base.tmpl"
But if I understood correctly, the sequence of patterns is important for the function template.ParseFS, because base.tmpl will be included in all you templates.
The function template.ParseFS reads the templates in the process and tries to generate them.
It may be a stupid question because I just learned Golang. I hope you understand.
I am making a program to extract data from the homepage using the goquery package:
package main
import (
"fmt"
"log"
"net/http"
"github.com/PuerkitoBio/goquery"
)
var url string = "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=3"
func main() {
getPages()
}
func getPages() int {
res, err := http.Get(url)
checkErr(err)
checkCode(res)
defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
checkErr(err)
doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
fmt.Println(s.Find("a"))
})
return 0
}
func checkErr(err error) {
if err != nil {
log.Fatalln(err)
fmt.Println(err)
}
}
func checkCode(res *http.Response) {
if res.StatusCode != 200 {
log.Fatalln("Request failed with statusCode:", res.StatusCode)
}
}
It prints below:
&{[0x140002db0a0 0x140002db570 0x140002db810 0x140002dbd50 0x140002dc000 0x140002dc2a0 0x140002dc540 0x140002dc850] 0x140000b2438 0x14000305680}
&{[0x140002dcd90 0x140002dd810] 0x140000b2438 0x14000305710}
But I just want to print only the first array out. Like this:
[0x140002dcd90 0x140002dd810]
How can I destruct them?
The problem is that you are printing as result is matched.
You can save the *goquery.Selection in a new slice and print only the last element. This example is working because you want the last occurrence, but in real life you must parse the query result for something in specific to not depend about result order.
// type Selection struct {
// Nodes []*html.Node
// document *Document
// prevSel *Selection
// }
var temp []*goquery.Selection
temp = append(temp, doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
s.Find("a")
}))
fmt.Printf("last: %v\n", temp[len(temp)-1])
temp[len(temp)-1]: &{[0xc0002dcd90 0xc0002e0a80] 0xc00000e3f0 0xc000309770}
The Nodes []*html.Node can be accessed with same example:
fmt.Printf("last: %v\n", temp[len(temp)-1].Nodes)
As per your comment you were looking to parse the page and get the number of pages and number of posts. Here is my attempt:
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"math"
"net/http"
"strconv"
"strings"
)
func errCheck(err error) {
if err != nil {
log.Fatal(err)
}
}
func ExampleScrape() {
url := "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=%s"
page := 3
fmt.Println("Current page:", page)
res, err := http.Get(fmt.Sprintf(url, page))
errCheck(err)
defer res.Body.Close()
if res.StatusCode != 200 {
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
errCheck(err)
posts_div := doc.Find(".recruit-info div.dev_list.lists-cnt")
total_count_div := posts_div.Nodes[0]
var total_count int
for _, a := range total_count_div.Attr {
if a.Key == "total-count" {
total_count, err = strconv.Atoi(a.Val)
errCheck(err)
break
}
}
fmt.Println("Total count:", total_count)
titles := posts_div.Find(".list-post .title")
fmt.Println("On this page:", len(titles.Nodes))
fmt.Println("Pages:", math.Ceil(float64(total_count)/float64(len(titles.Nodes))))
fmt.Println("\nTitles on this page:")
titles.Each(func(i int, s *goquery.Selection) {
fmt.Println("\t-", strings.TrimSpace(s.Text()))
})
}
func main() {
ExampleScrape()
}
I'm trying to develop a Terraform provider but I have a problem of the first request body. Here is the code:
type Body struct {
id string
}
func resourceServerCreate(d *schema.ResourceData, m interface{}) error {
key := d.Get("key").(string)
token := d.Get("token").(string)
workspace_name := d.Get("workspace_name").(string)
board_name := d.Get("board_name").(string)
resp, err := http.Post("https://api.trello.com/1/organizations?key="+key+"&token="+token+"&displayName="+workspace_name,"application/json",nil)
if err != nil {
log.Fatalln(err)
}
defer resp.Body.Close()
//lettura body.
body := new(Body)
json.NewDecoder(resp.Body).Decode(body)
log.Println("[ORCA MADONNA] il log funzia "+body.id)
d.Set("board_id",body.id)
resp1, err1 := http.Post("https://api.trello.com/1/boards?key="+key+"&token="+token+"&idOrganization="+body.id+"&=&name="+board_name,"application/json",nil)
if err1 != nil {
log.Fatalln(resp1)
}
defer resp1.Body.Close()
d.SetId(board_name)
return resourceServerRead(d, m)
}
In the log is empty, but the second call have it and work fine. How is it possible?
Go doesn't force you to check error responses, therefore it's easy to make silly mistakes. Had you checked the return value from Decode(), you would have immediately discovered a problem.
err := json.NewDecoder(resp.Body).Decode(body)
if err != nil {
log.Fatal("Decode error: ", err)
}
Decode error: json: Unmarshal(non-pointer main.Body)
So your most immediate fix is to use & to pass a pointer to Decode():
json.NewDecoder(resp.Body).Decode(&body)
Also of note, some programming editors will highlight this mistake for you:
Here's a working demonstration, including a corrected Body structure as described at json.Marshal(struct) returns “{}”:
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"net/http"
"time"
)
type JSON = map[string]interface{}
type JSONArray = []interface{}
func ErrFatal(err error, msg string) {
if err != nil {
log.Fatal(msg+": ", err)
}
}
func handleTestRequest(w http.ResponseWriter, req *http.Request) {
w.Write(([]byte)("{\"id\":\"yourid\"}"))
}
func launchTestServer() {
http.HandleFunc("/", handleTestRequest)
go http.ListenAndServe(":8080", nil)
time.Sleep(1 * time.Second) // allow server to get started
}
// Medium: "Don’t use Go’s default HTTP client (in production)"
var restClient = &http.Client{
Timeout: time.Second * 10,
}
func DoREST(method, url string, headers, payload JSON) *http.Response {
requestPayload, err := json.Marshal(payload)
ErrFatal(err, "json.Marshal(payload")
request, err := http.NewRequest(method, url, bytes.NewBuffer(requestPayload))
ErrFatal(err, "NewRequest "+method+" "+url)
for k, v := range headers {
request.Header.Add(k, v.(string))
}
response, err := restClient.Do(request)
ErrFatal(err, "DoRest client.Do")
return response
}
type Body struct {
Id string `json:"id"`
}
func clientDemo() {
response := DoREST("POST", "http://localhost:8080", JSON{}, JSON{})
defer response.Body.Close()
var body Body
err := json.NewDecoder(response.Body).Decode(&body)
ErrFatal(err, "Decode")
fmt.Printf("Body: %#v\n", body)
}
func main() {
launchTestServer()
for i := 0; i < 5; i++ {
clientDemo()
}
}
I'm writing a parser HTML in Go. I need to get HTML and pass it to another function.
I did it so:
Can`t pass "doc" to another function
receivedURL, err := http.Get("http://lavillitacafe.com/")
doc, err := goquery.NewDocumentFromReader(receivedURL.Body)
//"linkScrape" this is another function
contactURL := linkScrape(doc)
and
HTML is transferred in parts to another function.
resp, err := http.Get("http://lavillitacafe.com/")
if err != nil {
fmt.Println(err)
return
}
defer resp.Body.Close()
for true {
bs := make([]byte, 1014)
n, err := resp.Body.Read(bs)
contactURL := linkScrape(bs[:n])
if n == 0 || err != nil{
break
}
}
How do I do it right?
Here's the basic goquery example adjusted to your use case:
package main
import (
"fmt"
"log"
"strings"
"github.com/PuerkitoBio/goquery"
)
func findHeader(d *goquery.Document) string {
header := d.Find("h1").Text()
return header
}
func main() {
// create from a string
data := `
<html>
<head>
<title>My document</title>
</head>
<body>
<h1>Header</h1>
</body>
</html>`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
if err != nil {
log.Fatal(err)
}
fmt.Println(findHeader(doc))
}
I'm rather new to Stackoverflow. I'm stuck at this problem. I'm trying to make a map.
package main
import (
"encoding/json"
"fmt"
"html/template"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
)
func main() {
http.HandleFunc("/", handler)
http.HandleFunc("/showimage", showimage)
fmt.Println("listening...")
err := http.ListenAndServe(GetPort(), nil)
if err != nil {
log.Fatal("ListenAndServe: ", err)
}
}
func GetPort() string {
var port = os.Getenv("PORT")
if port == "" {
port = "4747"
fmt.Println("INFO: No PORT environment variable detected, defaulting to " + port)
}
return ":" + port
}
func handler (w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, rootForm)
}
const rootForm =
`<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Flickr photos</title>
</head>
<body>
<h1>Flickr photos</h1>
<p>Find photos by tags!</p>
<form action="/showimage" method="post" accept-charset="utf-8">
<input type="text" name="str" value="Type Tags..." id="str">
<input type="submit" value=".. and see the images!">
</form>
</body>
</html>`
var upperTemplate = template.Must(template.New("showimage").Parse(upperTemplateHTML)) //irrelevant to issue here
func showimage(w http.ResponseWriter, r *http.Request) {
tag := r.FormValue("str")
safeTag := url.QueryEscape(tag)
fullUrl := fmt.Sprintf("https://api.instagram.com/v1/users/search?q=%s&access_token=ACCESS-TOKEN&count=1", safeTag)
client := &http.Client{}
req, err := http.NewRequest("GET", fullUrl, nil)
if err != nil {
log.Fatal("NewRequest: ", err)
return
}
resp, requestErr := client.Do(req)
if requestErr != nil {
log.Fatal("Do: ", requestErr)
return
}
defer resp.Body.Close()
body, dataReadErr := ioutil.ReadAll(resp.Body)
if dataReadErr != nil {
log.Fatal("ReadAll: ", dataReadErr)
return
}
res := make(map[string][]map[string]interface{})
However, when I try to put data into the interface
json.Unmarshal(body, &res)
userid, _ := res["data"][0]["username"]
queryUrl := fmt.Sprintf("http://instagram.com/%s", userid)
I get the error
http: panic serving [::1]:63089: runtime error: index out of range
goroutine 28 [running]:
any idea why? This error is resolved if I remove the [] in res:= and userid :=, but I won't be able to access the data I want.
It is wrong with "res"'s using. "res" is a map(key is string, value is a slice),so res["data"] may be a nil in your code, and it will panic when you use res["data"][0] .You should do like this:
json.Unmarshal(body, &res)
s, ok := res["data"]
if ok {
if len(s)>0{
userid , ok := s[0]["username"]
if ok{
queryUrl := fmt.Sprintf("http://instagram.com/%s", userid)
}
}
}