I have the following code which I can't run on play because I use the gin framework and filewalk.
package main
import (
"fmt"
"os"
"path/filepath"
"time"
"regexp"
"github.com/gin-gonic/gin"
"sort"
"strings"
"github.com/davidscholberg/go-durationfmt"
)
var filext = regexp.MustCompile(`\.[mM][pP]4|\.[mM]4[vV]|\.jpg|\.[hH]264|\.go`)
var m map[int]string
var keys []int
func main() {
if gin.IsDebugging() {
fmt.Print("This progamm shows only the path and the age of a file in 'STARTDIR'\n")
fmt.Print("only the following files will be handled '.[mM][pP]4|.[mM]4[vV$|.[hH]264|.go'\n")
fmt.Print("The git repository: https://gitlab.com/aleks001/show-files-age \n")
}
if len(os.Getenv("LISTEN_IP_PORT")) == 0 {
fmt.Print("I need a ip and port on which I should listen.\n")
os.Exit(1)
}
router := gin.Default()
gin.DisableConsoleColor()
router.GET("/videoinfo",getInfo)
router.Run(os.Getenv("LISTEN_IP_PORT"))
}
func getInfo(c *gin.Context) {
loc, _ := time.LoadLocation("Europe/Vienna")
var startdir = ""
if os.Getenv("STARTDIR") != "" {
startdir = os.Getenv("STARTDIR")
} else if c.GetHeader("STARTDIR") != "" {
startdir = c.GetHeader("STARTDIR")
} else {
c.String(404,"Startdir not found <br>\n")
return
}
m = make(map[int]string)
keys = nil
filepath.Walk(startdir,walkpath)
for k := range m {
keys = append(keys, k)
}
sort.Ints(keys)
for _, k := range keys {
t := time.Date(time.Now().Year(),time.Now().Month(),time.Now().Day(),time.Now().Hour(),k,time.Now().Second(),time.Now().Nanosecond(),loc)
durStr, err := durationfmt.Format(time.Since(t), "%h:%m")
if err != nil {
fmt.Println(err)
} else {
//fmt.Println(durStr)
fmt.Printf("Key: %s Value: %s\n", durStr , m[k])
c.String(200,"Minutes: %s File: %s\n", durStr, m[k])
}
}
}
func walkpath(path string, f os.FileInfo, err error) error {
if err != nil {
fmt.Println(err)
} else {
if filext.MatchString(path) {
age := time.Now().Sub(f.ModTime())
path_new := strings.Replace(path,"/videos/","",1)
// path_new := strings.Replace(path,"..\\","",1)
/*
fmt.Printf("Path: %s, ModTime: %s, Age: %s <br>\n", walker.Path(), walker.Stat().ModTime(), age)
c.String(200,"Path: %s, ModTime: %s, Age: %s <br>\n", walker.Path(), walker.Stat().ModTime(), age)
*/
fmt.Printf("Path: %s, Age: %d age minutes %0.2f <br>\n", path_new, age, age.Minutes())
m[int(age.Minutes())]=path_new
//c.String(200,"Path: %s, Age: %0.2f <br>\n", path, age.Minutes())
}
//fmt.Printf("%s with %d bytes at motime %s\n", path,f.Size(), f.ModTime())
}
return nil
}
What I want to do is a sorted output of files based on filext als filter and the modtime as sort criteria.
I was able to fulfil the most part of the request but the output looks ugly as you can see below.
I have used https://github.com/davidscholberg/go-durationfmt to format the duration but the output looks ugly or I missus the library.
Minutes: 0:6 File: upload/dir003/file1.m4v
Minutes: 0:5 File: transfer/dir5/file2.jpg
Minutes: -5:-48 File: transfer/dir001/file.mp4
Minutes: -6:-21 File: transfer/03.jpg
Minutes: -6:-22 File: transfer/02.mp4
FYI, if you just want to quickly display a duration, the built-in formatting works well:
fmt.Sprintf("duration: %s", d)
will display something like this:
duration: 7h3m45s
For example, to provide a custom format for a duration,
package main
import (
"fmt"
"time"
)
func fmtDuration(d time.Duration) string {
d = d.Round(time.Minute)
h := d / time.Hour
d -= h * time.Hour
m := d / time.Minute
return fmt.Sprintf("%02d:%02d", h, m)
}
func main() {
modTime := time.Now().Round(0).Add(-(3600 + 60 + 45) * time.Second)
since := time.Since(modTime)
fmt.Println(since)
durStr := fmtDuration(since)
fmt.Println(durStr)
}
Playground: https://play.golang.org/p/HT4bFfoA5r
Output:
1h1m45s
01:02
If you want to sort on a duration then use the Go sort package. I would sort on ModTime to defer the calculation of the duration, Since(ModTime), to be accurate at the time it is printed. For example,
package main
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
func isVideo(path string) bool {
videos := []string{".mp4", ".m4v", ".h264"}
ext := strings.ToLower(filepath.Ext(path))
for _, video := range videos {
if ext == video {
return true
}
}
return false
}
type modTimeInfo struct {
path string
modTime time.Time
}
func walkModTime(root string) ([]modTimeInfo, error) {
var infos []modTimeInfo
err := filepath.Walk(
root,
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Mode().IsRegular() {
path = filepath.Clean(path)
if !isVideo(path) {
return nil
}
sep := string(filepath.Separator)
dir := sep + `Videos` + sep
path = strings.Replace(path, dir, sep, 1)
infos = append(infos, modTimeInfo{
path: path,
modTime: info.ModTime()},
)
}
return nil
},
)
if err != nil {
return nil, err
}
return infos, nil
}
func sortModTime(infos []modTimeInfo) {
sort.SliceStable(
infos,
func(i, j int) bool {
return infos[i].modTime.Before(infos[j].modTime)
},
)
}
func fmtAge(d time.Duration) string {
d = d.Round(time.Minute)
h := d / time.Hour
d -= h * time.Hour
m := d / time.Minute
return fmt.Sprintf("%02d:%02d", h, m)
}
func main() {
root := `/home/peter/Videos` // Testing ...
infos, err := walkModTime(root)
if err != nil {
fmt.Println(err)
return
}
sortModTime(infos)
now := time.Now()
for _, info := range infos {
age := fmtAge(now.Sub(info.modTime))
fmt.Println("Age (H:M):", age, "File:", info.path)
}
}
Playground: https://play.golang.org/p/j2TUmJdAi4
Another way to format the duration if you don't care about the day, month or year
package main
import (
"fmt"
"time"
)
type Timespan time.Duration
func (t Timespan) Format(format string) string {
z := time.Unix(0, 0).UTC()
return z.Add(time.Duration(t)).Format(format)
}
func main() {
dur := 7777 * time.Second
fmt.Println(Timespan(dur).Format("15:04:05")) // 02:09:37
}
https://play.golang.org/p/XM-884oYMvE
Another easy way is to use the built in time.Duration String function.
duration.String()
Output: 28m26.550805438s
Or to round the seconds first
duration.Round(time.Second).String()
Output: 28m27s
Format yourself.
package main
import (
"fmt"
"time"
)
func main() {
d := 7777 * time.Second
hour := int(d.Seconds() / 3600)
minute := int(d.Seconds()/60) % 60
second := int(d.Seconds()) % 60
fmt.Printf("%d:%02d:%02d\n", hour, minute, second) // 02:09:37
}
https://go.dev/play/p/AFay62Qg2GB
Related
Why is this program so slow? I thought the code was fairly optimized, but it takes significantly long than the find command when use on my root filesystem.
It takes about 4 minutes, as opposed to the find command which takes about 40 seconds.
I tried removing the sorting algorithm, but doesn't speed up the program.
package main
import (
"fmt"
"io"
"io/fs"
"log"
"os"
"sort"
"sync"
"github.com/google/fscrypt/filesystem"
"github.com/sirupsen/logrus"
"gopkg.in/alecthomas/kingpin.v2"
)
var (
mountpoint = kingpin.Flag("mount", "The mount to find the largest file usages. Can be a subath of mount").Required().String()
limit = kingpin.Flag("limit", "The maximum number of files return to the display").Default("10").Short('l').Int()
)
var device string
type fileDisplay struct {
Size int64
Path string
}
type bySize []fileDisplay
func (a bySize) Len() int { return len(a) }
func (a bySize) Less(i, j int) bool { return a[i].Size < a[j].Size }
func (a bySize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
var fileChan = make(chan fileDisplay)
var files []fileDisplay
func main() {
log.SetOutput(io.Discard)
kingpin.Version("0.0.1")
kingpin.Parse()
//Define limit after parsing
logrus.SetLevel(logrus.FatalLevel)
if (*mountpoint)[len(*mountpoint)-1:] != "/" {
*mountpoint = *mountpoint + "/"
}
fmt.Println("Finding the top", *limit, "largest files on filesystem", *mountpoint, "\n================================================")
mount, err := filesystem.FindMount(*mountpoint)
if err != nil {
logrus.Fatal(err)
}
device = mount.Device
entries, err := os.ReadDir(*mountpoint)
if err != nil {
logrus.Fatal(err)
}
var wg sync.WaitGroup
getFiles(*mountpoint, entries, &wg)
go func() {
defer close(fileChan)
wg.Wait()
}()
var last int64
for file := range fileChan {
if file.Size > last {
files = append(files, file)
} else {
files = append([]fileDisplay{file}, files...)
}
}
sort.Sort(bySize(files))
var shortFiles []fileDisplay
if len(files) > *limit {
shortFiles = files[len(files)-*limit:]
} else {
shortFiles = files
}
for _, file := range shortFiles {
fmt.Println(file.Path, file.DisplaySizeIEC())
}
}
func getFiles(start string, entries []fs.DirEntry, wg *sync.WaitGroup) {
for _, entry := range entries {
wg.Add(1)
go handleEntry(start, entry, wg)
}
}
func handleEntry(start string, entry fs.DirEntry, wg *sync.WaitGroup) {
defer wg.Done()
var file fileDisplay
mount, err := filesystem.FindMount(start + entry.Name())
if err != nil {
logrus.Fatalln(err, start+entry.Name())
return
}
if mount.Device == device {
if entry.Type().IsRegular() {
fileInfo, err := os.Stat(start + entry.Name())
if err != nil {
logrus.Fatalln(err, start+entry.Name())
return
}
file.Path = start + entry.Name()
file.Size = fileInfo.Size()
fileChan <- file
} else if entry.IsDir() {
entries, err := os.ReadDir(start + entry.Name())
if err != nil {
logrus.Fatalln(err, start+entry.Name())
return
}
logrus.Info("Searching ", start+entry.Name())
getFiles(start+entry.Name()+"/", entries, wg)
}
}
}
func (f *fileDisplay) DisplaySizeIEC() string {
const unit = 1024
b := f.Size
if b < unit {
return fmt.Sprintf("%dB", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.2f%ciB",
float64(b)/float64(div), "KMGTPE"[exp])
}
Edit: I tried removing the channel and just appending to the slice. This sped it up, but it's not safe because multiple routines could be accessing it.
My final draft involved dropping the channel and using sync.RWMutex to lock the list and a custom append function to append with the lock. This allowed me to drop the channel and use append without risking multiple routines editing the same slice.
I dropped the channel because this was causing routines to stay open until the for loop over the open channel could reach their message. My channek operations were blocking. So the routines caused it to slow to the speed of the for loop iterating over the channel.
You can see the differences here:
package main
import (
"fmt"
"io"
"io/fs"
"log"
"os"
"sort"
"sync"
"github.com/google/fscrypt/filesystem"
"github.com/sirupsen/logrus"
"gopkg.in/alecthomas/kingpin.v2"
)
var (
mountpoint = kingpin.Flag("mount", "The mount to find the largest file usages. Can be a subath of mount").Required().String()
limit = kingpin.Flag("limit", "The maximum number of files return to the display").Default("10").Short('l').Int()
)
var device string
type fileDisplays struct {
sync.RWMutex
Files []fileDisplay
}
var files fileDisplays
type fileDisplay struct {
Size int64
Path string
}
type bySize []fileDisplay
func (a bySize) Len() int { return len(a) }
func (a bySize) Less(i, j int) bool { return a[i].Size < a[j].Size }
func (a bySize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func main() {
log.SetOutput(io.Discard)
kingpin.Version("0.0.1")
kingpin.Parse()
//Define limit after parsing
logrus.SetLevel(logrus.FatalLevel)
if (*mountpoint)[len(*mountpoint)-1:] != "/" {
*mountpoint = *mountpoint + "/"
}
fmt.Println("Finding the top", *limit, "largest files on filesystem", *mountpoint, "\n================================================")
mount, err := filesystem.FindMount(*mountpoint)
if err != nil {
logrus.Fatal(err)
}
device = mount.Device
entries, err := os.ReadDir(*mountpoint)
if err != nil {
logrus.Fatal(err)
}
var wg sync.WaitGroup
getFiles(*mountpoint, entries, &wg)
wg.Wait()
sort.Sort(bySize(files.Files))
var shortFiles []fileDisplay
if len(files.Files) > *limit {
shortFiles = files.Files[len(files.Files)-*limit:]
} else {
shortFiles = files.Files
}
for _, file := range shortFiles {
fmt.Println(file.Path, file.DisplaySizeIEC())
}
}
func getFiles(start string, entries []fs.DirEntry, wg *sync.WaitGroup) {
for _, entry := range entries {
wg.Add(1)
go handleEntry(start, entry, wg)
}
}
func handleEntry(start string, entry fs.DirEntry, wg *sync.WaitGroup) {
defer wg.Done()
var file fileDisplay
mount, err := filesystem.FindMount(start + entry.Name())
if err != nil {
logrus.Errorln(err, start+entry.Name())
return
}
if mount.Device == device {
if entry.Type().IsRegular() {
fileInfo, err := os.Stat(start + entry.Name())
if err != nil {
logrus.Errorln(err, start+entry.Name())
return
}
file.Path = start + entry.Name()
file.Size = fileInfo.Size()
files.Append(file)
} else if entry.IsDir() {
entries, err := os.ReadDir(start + entry.Name())
if err != nil {
logrus.Errorln(err, start+entry.Name())
return
}
logrus.Info("Searching ", start+entry.Name())
getFiles(start+entry.Name()+"/", entries, wg)
}
}
}
func (f *fileDisplay) DisplaySizeIEC() string {
const unit = 1024
b := f.Size
if b < unit {
return fmt.Sprintf("%dB", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.2f%ciB",
float64(b)/float64(div), "KMGTPE"[exp])
}
func (fd *fileDisplays) Append(item fileDisplay) {
fd.Lock()
defer fd.Unlock()
fd.Files = append(fd.Files, item)
}
It may be a stupid question because I just learned Golang. I hope you understand.
I am making a program to extract data from the homepage using the goquery package:
package main
import (
"fmt"
"log"
"net/http"
"github.com/PuerkitoBio/goquery"
)
var url string = "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=3"
func main() {
getPages()
}
func getPages() int {
res, err := http.Get(url)
checkErr(err)
checkCode(res)
defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
checkErr(err)
doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
fmt.Println(s.Find("a"))
})
return 0
}
func checkErr(err error) {
if err != nil {
log.Fatalln(err)
fmt.Println(err)
}
}
func checkCode(res *http.Response) {
if res.StatusCode != 200 {
log.Fatalln("Request failed with statusCode:", res.StatusCode)
}
}
It prints below:
&{[0x140002db0a0 0x140002db570 0x140002db810 0x140002dbd50 0x140002dc000 0x140002dc2a0 0x140002dc540 0x140002dc850] 0x140000b2438 0x14000305680}
&{[0x140002dcd90 0x140002dd810] 0x140000b2438 0x14000305710}
But I just want to print only the first array out. Like this:
[0x140002dcd90 0x140002dd810]
How can I destruct them?
The problem is that you are printing as result is matched.
You can save the *goquery.Selection in a new slice and print only the last element. This example is working because you want the last occurrence, but in real life you must parse the query result for something in specific to not depend about result order.
// type Selection struct {
// Nodes []*html.Node
// document *Document
// prevSel *Selection
// }
var temp []*goquery.Selection
temp = append(temp, doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
s.Find("a")
}))
fmt.Printf("last: %v\n", temp[len(temp)-1])
temp[len(temp)-1]: &{[0xc0002dcd90 0xc0002e0a80] 0xc00000e3f0 0xc000309770}
The Nodes []*html.Node can be accessed with same example:
fmt.Printf("last: %v\n", temp[len(temp)-1].Nodes)
As per your comment you were looking to parse the page and get the number of pages and number of posts. Here is my attempt:
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"math"
"net/http"
"strconv"
"strings"
)
func errCheck(err error) {
if err != nil {
log.Fatal(err)
}
}
func ExampleScrape() {
url := "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=%s"
page := 3
fmt.Println("Current page:", page)
res, err := http.Get(fmt.Sprintf(url, page))
errCheck(err)
defer res.Body.Close()
if res.StatusCode != 200 {
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
errCheck(err)
posts_div := doc.Find(".recruit-info div.dev_list.lists-cnt")
total_count_div := posts_div.Nodes[0]
var total_count int
for _, a := range total_count_div.Attr {
if a.Key == "total-count" {
total_count, err = strconv.Atoi(a.Val)
errCheck(err)
break
}
}
fmt.Println("Total count:", total_count)
titles := posts_div.Find(".list-post .title")
fmt.Println("On this page:", len(titles.Nodes))
fmt.Println("Pages:", math.Ceil(float64(total_count)/float64(len(titles.Nodes))))
fmt.Println("\nTitles on this page:")
titles.Each(func(i int, s *goquery.Selection) {
fmt.Println("\t-", strings.TrimSpace(s.Text()))
})
}
func main() {
ExampleScrape()
}
Try to write a directory traversing program by goroutine and channel, but unable to get the needed results. Expect to get the number of total sub-directory, files count. But when I run the code below, it will stuck in "dirCount <-1". PS: is that possible to write such a program with infinite depth traversing
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"github.com/gorilla/mux"
)
type DirectoryItem struct {
Name string `json:"name,omitemty"`
IsDir bool `json:"isDir,omitempty"`
Size int64 `json:"size,omitempty"`
}
type DirectoryInfo struct {
Path string `json:"path,omitemty"`
Dirs []DirectoryItem `json:"dirs,omitempty"`
}
var dirItems []DirectoryItem
var dirInfo DirectoryInfo
func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
fpath := "E:\\"
query := req.URL.Query()
path := query["path"][0]
fpath = fpath + path
dirInfo, _ := CheckEachItem(fpath)
json.NewEncoder(w).Encode(dirInfo)
}
func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
var items []DirectoryItem
dir, err := ioutil.ReadDir(dirPath)
if err != nil {
return directory, err
}
for _, fi := range dir {
if fi.IsDir() {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
} else {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
}
}
directory = DirectoryInfo{Path: dirPath, Dirs: items}
return directory, nil
}
func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
query := req.URL.Query()
path := query["path"][0]
url := "http://localhost:8090/api/GetOneDirItems?path="
url += path
dirCount := make(chan int)
fileCount := make(chan int)
go Recr(url, dirCount, fileCount)
//
dirTotalCount := 0
for i := range dirCount {
dirTotalCount += i
}
fmt.Println(dirTotalCount)
}
func Recr(url string, dirCount chan int, fileCount chan int) {
fmt.Println(url)
resp, _ := http.Get(url)
dirInfo = DirectoryInfo{}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
json.Unmarshal([]byte(body), &dirInfo)
for _, itm := range dirInfo.Dirs {
fmt.Println("--")
if itm.IsDir {
newUrl := url + "/" + itm.Name
//// looks like stuck in here
dirCount <- 1
go Recr(newUrl, dirCount, fileCount)
} else {
fileCount <- 1
}
}
}
func main() {
router := mux.NewRouter()
//#1 func one:
//result sample:
//{"path":"E:\\code","dirs":[{"name":"A","isDir":true},{"name":"B","isDir":false}]}
router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
//#2 2nd api to invoke 1st api recursively
//expected result
//{"path":"E:\\code","dirCount":2, "fileCount":3]}
router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
log.Fatal(http.ListenAndServe(":8090", router))
}
find some code example but not feedback the right number...
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"path/filepath"
"sync"
"github.com/gorilla/mux"
)
//!+1
var done = make(chan struct{})
func cancelled() bool {
select {
case <-done:
return true
default:
return false
}
}
//!-1
type DirectoryItem struct {
Name string `json:"name,omitemty"`
IsDir bool `json:"isDir,omitempty"`
Size int64 `json:"size,omitempty"`
}
type DirectoryInfo struct {
Path string `json:"path,omitemty"`
Dirs []DirectoryItem `json:"dirs,omitempty"`
}
var dirItems []DirectoryItem
var dirInfo DirectoryInfo
func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
fpath := "E:\\"
query := req.URL.Query()
path := query["path"][0]
fpath = fpath + path
dirInfo, _ := CheckEachItem(fpath)
json.NewEncoder(w).Encode(dirInfo)
}
func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
var items []DirectoryItem
dir, err := ioutil.ReadDir(dirPath)
if err != nil {
return directory, err
}
for _, fi := range dir {
if fi.IsDir() {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
} else {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
}
}
directory = DirectoryInfo{Path: dirPath, Dirs: items}
return directory, nil
}
func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
query := req.URL.Query()
path := query["path"][0]
url := "http://localhost:8090/api/GetOneDirItems?path="
url += path
fpath := "E:\\"
fpath = fpath + path
dirInfo, _ := CheckEachItem(fpath)
fileSizes := make(chan int64)
dirCount := make(chan int, 100)
var n sync.WaitGroup
for _, item := range dirInfo.Dirs {
n.Add(1)
url = url + "/" + item.Name
go Recr(url, &n, dirCount, fileSizes)
}
go func() {
n.Wait()
close(fileSizes)
close(dirCount)
}()
// Print the results periodically.
// tick := time.Tick(500 * time.Millisecond)
var nfiles, ndirs, nbytes int64
loop:
//!+3
for {
select {
case <-done:
// Drain fileSizes to allow existing goroutines to finish.
for range fileSizes {
// Do nothing.
}
return
case size, ok := <-fileSizes:
// ...
//!-3
if !ok {
break loop // fileSizes was closed
}
nfiles++
nbytes += size
case _, ok := <-dirCount:
// ...
//!-3
if !ok {
break loop // dirCount was closed
}
ndirs++
// case <-tick:
// printDiskUsage(nfiles, ndirs, nbytes)
}
}
printDiskUsage(nfiles, ndirs, nbytes) // final totals
}
func Recr(url string, n *sync.WaitGroup, dirCount chan<- int, fileSizes chan<- int64) {
defer n.Done()
resp, _ := http.Get(url)
dirInfo = DirectoryInfo{}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
json.Unmarshal([]byte(body), &dirInfo)
for _, itm := range dirInfo.Dirs {
if itm.IsDir {
dirCount <- 1
n.Add(1)
newUrl := url + "/" + itm.Name
go Recr(newUrl, n, dirCount, fileSizes)
} else {
fileSizes <- itm.Size
}
}
}
func main() {
// Determine the initial directories.
roots := os.Args[1:]
if len(roots) == 0 {
roots = []string{"."}
}
// API Services
router := mux.NewRouter()
router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
log.Fatal(http.ListenAndServe(":8090", router))
}
func printDiskUsage(nfiles, ndirs, nbytes int64) {
fmt.Printf("%d files %.1f GB %d dirs\n", nfiles, float64(nbytes)/1e9, ndirs)
}
// walkDir recursively walks the file tree rooted at dir
// and sends the size of each found file on fileSizes.
//!+4
func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64, dirCount chan<- int) {
defer n.Done()
if cancelled() {
return
}
for _, entry := range dirents(dir) {
// ...
//!-4
if entry.IsDir() {
dirCount <- 1
n.Add(1)
subdir := filepath.Join(dir, entry.Name())
go walkDir(subdir, n, fileSizes, dirCount)
} else {
fileSizes <- entry.Size()
}
//!+4
}
}
//!-4
var sema = make(chan struct{}, 20) // concurrency-limiting counting semaphore
// dirents returns the entries of directory dir.
//!+5
func dirents(dir string) []os.FileInfo {
select {
case sema <- struct{}{}: // acquire token
case <-done:
return nil // cancelled
}
defer func() { <-sema }() // release token
// ...read directory...
//!-5
f, err := os.Open(dir)
if err != nil {
fmt.Fprintf(os.Stderr, "du: %v\n", err)
return nil
}
defer f.Close()
entries, err := f.Readdir(0) // 0 => no limit; read all entries
if err != nil {
fmt.Fprintf(os.Stderr, "du: %v\n", err)
// Don't return: Readdir may return partial results.
}
return entries
}
The problem here is the your program has no way of ending. Basically whenever the code recurses into another directory, you need to count that, and then when it finishes processing the directory, you need to push 1 to a done channel. When the count of directories recursed into == the number done, you can exit the channel select (that's the other missing part):
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"github.com/gorilla/mux"
)
type DirectoryItem struct {
Name string `json:"name,omitemty"`
IsDir bool `json:"isDir,omitempty"`
Size int64 `json:"size,omitempty"`
}
type DirectoryInfo struct {
Path string `json:"path,omitemty"`
Dirs []DirectoryItem `json:"dirs,omitempty"`
}
var dirItems []DirectoryItem
var dirInfo DirectoryInfo
func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
fpath := "E:\\"
query := req.URL.Query()
path := query["path"][0]
fpath = fpath + path
dirInfo, err := CheckEachItem(fpath)
if err != nil {
panic(err)
}
json.NewEncoder(w).Encode(dirInfo)
}
func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
var items []DirectoryItem
dir, err := ioutil.ReadDir(dirPath)
if err != nil {
return directory, err
}
for _, fi := range dir {
if fi.IsDir() {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
} else {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
}
}
directory = DirectoryInfo{Path: dirPath, Dirs: items}
return directory, nil
}
func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
query := req.URL.Query()
path := query["path"][0]
url := "http://localhost:8090/api/GetOneDirItems?path="
url += path
dirCount := make(chan int, 10)
fileCount := make(chan int, 10)
doneCount := make(chan int, 10)
go Recr(url, dirCount, fileCount, doneCount)
//
dirTotalCount := 0
doneTotalCount := 0
out:
for {
select {
case dir := <-dirCount:
dirTotalCount += dir
fmt.Printf("dirTotalCount=%d\n", dirTotalCount)
case <-fileCount:
case done := <-doneCount:
doneTotalCount += done
fmt.Printf("doneTotalCount=%d dirTotalCount=%d\n", doneTotalCount, dirTotalCount)
if doneTotalCount == dirTotalCount+1 { // need -1 because of the root dir, which is not counted as a subdirectory
break out
}
}
}
fmt.Println("ALL DONE")
fmt.Printf("TOTAL=%d\n", dirTotalCount)
}
func Recr(url string, dirCount chan int, fileCount chan int, doneCount chan int) {
// fmt.Printf("url=%s\n", url)
resp, _ := http.Get(url)
dirInfo = DirectoryInfo{}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
json.Unmarshal([]byte(body), &dirInfo)
// fmt.Printf("dirInfo=%+v body=%s", dirInfo, string(body))
for _, itm := range dirInfo.Dirs {
if itm.IsDir {
newUrl := url + "/" + itm.Name
//// looks like stuck in here
// fmt.Printf("pushing one dir from %s\n", url)
dirCount <- 1
go Recr(newUrl, dirCount, fileCount, doneCount)
} else {
// fmt.Println("pushing one file")
fileCount <- 1
}
}
doneCount <- 1
}
func main() {
router := mux.NewRouter()
//#1 func one:
//result sample:
//{"path":"E:\\code","dirs":[{"name":"A","isDir":true},{"name":"B","isDir":false}]}
router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
//#2 2nd api to invoke 1st api recursively
//expected result
//{"path":"E:\\code","dirCount":2, "fileCount":3]}
router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
log.Fatal(http.ListenAndServe(":8090", router))
}
I'm trying to parse a large image dataset. I'm using filepath.Walk ]and processing each file I find there. I'd like the filepath.
package main
import (
"fmt"
"image/color"
"image/png"
"math/rand"
"os"
)
var (
Black = color.Gray{0}
)
func getRandFloatNumber(min, max float32) float32 {
return (rand.Float32()*2 - min) * max
}
func openImage(path string, info os.FileInfo, err error) error {
infile, _ := os.Open(path)
defer infile.Close()
img, err := png.Decode(infile)
if err != nil {
return nil
}
array := make([]float32, 128*128)
for y := 0; y < 128; y++ {
for x := 0; x < 128; x++ {
c := color.GrayModel.Convert(img.At(x, y)).(color.Gray)
if c == Black {
array[x*y] = getRandFloatNumber(0.7, 0.95)
} else {
array[x*y] = getRandFloatNumber(0.1, 0.25)
}
}
}
fmt.Println(info.Name())
return nil
}
How to run openImage as a gorutine?
Or how to optimize this code?
You can't get filepath.Walk to call your function in a goroutine, but you can simply start a goroutine in your WalkFunc.
package main
import (
"os"
"path/filepath"
)
func main() {
filepath.Walk("/my/dir", func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
// Check more criteria if necessary. Also consider limiting the number
// of concurrent goroutines.
go openImage(path, info)
return nil
})
}
func openImage(path string, info os.FileInfo) {
}
I a task written in Go to get a unique list from a bunch of text files. I put in some parallelization using channels and am having inconsistent results now - a variance of 5 records output/not output each time with the same input files.
The am testing it with go run process.go | wc -l on Fedora x86_64, go1.1.2, 8 core amd.
The code is:
package main
import (
"fmt"
"os"
"io"
"encoding/csv"
"regexp"
"log"
)
var (
cleanRe *regexp.Regexp = regexp.MustCompile("[^0-9]+")
comma rune ='\t'
fieldsPerRecord=-1
)
func clean(s string) string {
clean:=cleanRe.ReplaceAllLiteralString(s,"")
if len(clean)<6 {return ""}
return clean
}
func uniqueChannel(inputChan chan []string, controlChan chan string) {
defer func(){controlChan<-"Input digester."}()
uniq:=make(map[string]map[string]bool)
i:=0
for record:= range inputChan {
i++
id,v:=record[0],record[1]
if uniq[id]==nil {
uniq[id]=make(map[string]bool)
} else if !uniq[id][v] {
uniq[id][v]=true
fmt.Println(id,string(comma),v)
}
}
log.Println("digest ", i)
}
func processFile(fileName string, outputChan chan []string, controlChan chan string) {
defer func(){controlChan<-fileName}()
f,err:=os.Open(fileName)
if err!=nil{log.Fatal(err)}
r:=csv.NewReader(f)
r.FieldsPerRecord = fieldsPerRecord
r.Comma = comma
// Process the records
i:=0
for record,err:=r.Read();err!=io.EOF;record,err=r.Read() {
if err!=nil{continue}
id:=record[0]
for _,v:=range record[1:] {
if cleanV:=clean(v);cleanV!=""{
i++
outputChan<-[]string{id,cleanV}
}
}
}
log.Println(fileName,i)
}
func main() {
inputs:=[]string{}
recordChan:=make(chan []string,100)
processesLeft:=len(inputs)+1
controlChan:=make(chan string,processesLeft)
// Ingest the inputs
for _,fName:=range inputs {
go processFile(fName,recordChan,controlChan)
}
// This is the loop to ensure it's all unique
go uniqueChannel(recordChan,controlChan)
// Make sure all the channels close up
for processesLeft>0 {
if processesLeft==1{
close(recordChan)
}
c:=<-controlChan
log.Println(c)
processesLeft--
}
close(controlChan)
}
It seems like it closes the channel before it's empty and quite. Without the closing mechanism I was getting deadlocks - I'm out of ideas.
You could ditch the control channel and use a sync.WaitGroup:
package main
import (
"encoding/csv"
"fmt"
"io"
"log"
"os"
"regexp"
"sync"
)
var (
cleanRe *regexp.Regexp = regexp.MustCompile("[^0-9]+")
comma rune = '\t'
fieldsPerRecord = -1
)
func clean(s string) string {
clean := cleanRe.ReplaceAllLiteralString(s, "")
if len(clean) < 6 {
return ""
}
return clean
}
func uniqueChannel(inputChan chan []string) {
uniq := make(map[string]map[string]bool)
i := 0
for record := range inputChan {
i++
id, v := record[0], record[1]
if uniq[id] == nil {
uniq[id] = make(map[string]bool)
} else if !uniq[id][v] {
uniq[id][v] = true
fmt.Println(id, string(comma), v)
}
}
log.Println("digest ", i)
}
func processFile(fileName string, outputChan chan []string) {
f, err := os.Open(fileName)
if err != nil {
log.Fatal(err)
}
r := csv.NewReader(f)
r.FieldsPerRecord = fieldsPerRecord
r.Comma = comma
// Process the records
for record, err := r.Read(); err != io.EOF; record, err = r.Read() {
if err != nil {
continue
}
id := record[0]
for _, v := range record[1:] {
if cleanV := clean(v); cleanV != "" {
outputChan <- []string{id, cleanV}
}
}
}
}
func main() {
inputs := []string{"ex.tsv"}
recordChan := make(chan []string)
var wg sync.WaitGroup
// Ingest the inputs
for _, fName := range inputs {
wg.Add(1)
go func() {
processFile(fName, recordChan)
wg.Done()
}()
}
go func() {
wg.Wait()
close(recordChan)
}()
// This is the loop to ensure it's all unique
uniqueChannel(recordChan)
}