RGBA to Grayscale in parallel Golang - image

I have written a program which converts RGBA image to Grayscale sequentially. I'm now trying to convert it so it runs in parallel.
I kind of understand how I need to be doing this but I'm struggling to get started.
Here is what I have so far.
package main
import (
"image"
"image/color"
"image/jpeg"
"log"
"os"
)
var lum float64
type ImageSet interface {
Set(x, y int, c color.Color)
}
func rgbtogray(r uint32, g uint32, b uint32) float64{
lum = 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
return lum
}
func main() {
file, err := os.Open("flower.jpg")
if err != nil {
log.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
log.Fatal(os.Stderr, "%s: %v\n", "flower.jpg", err)
}
channel1 := make(chan float64)
channel2 := make(chan float64)
b := img.Bounds()
imgSet := image.NewRGBA(b)
halfImage := b.Max.X/2
fullImage := b.Max.X
for y := 0; y < b.Max.Y; y++ {
go func() {
for x := 0; x < halfImage; x++ {
oldPixel := img.At(x, y)
r, g, b, _ := oldPixel.RGBA()
channel1 <- rgbtogray(r, g, b)
pixel := color.Gray{uint8(lum / 256)}
imgSet.Set(x, y, pixel)
}
}()
go func() {
for x := halfImage; x< fullImage; x++ {
oldPixel := img.At(x, y)
r, g, b, _ := oldPixel.RGBA()
channel2 <- rgbtogray(r, g, b)
pixel := color.Gray{uint8(lum / 256)}
imgSet.Set(x, y, pixel)
}
}()
}
outFile, err := os.Create("changed.jpg")
if err != nil {
log.Fatal(err)
}
defer outFile.Close()
jpeg.Encode(outFile, imgSet, nil)
}
This runs but just returns a black image. I know the way I'm going about it is wrong but I'm not 100% what route I need to be taking.
My idea is to split the image down the middle, have one channel work on the pixels on the left and one channel work on the pixels on the right. Before moving down to the next y coordinate and so on.
I've tried moving all of the code in my go functions into my rgbatogray function but I was getting multiple errors to do with passing through variables etc. Would it be best to create another function which deals with the splitting etc as I think I calling my go functions should just look something like:
go func() {
channel1 <- rgbtogray(r, g, b)
}()
go func() {
channel2 <- rgbtogray(r, g, b)
}()
I'm unsure what steps I should be taking next on this so any tips and help greatly appreciated.

Here's an implementation of #JimB's suggestion in comments. It exploits the fact of JPEG images being in YCbCr to process the image setting inplace Cb and Cr components to 128 using one goroutine for each of them.
func set(wg *sync.WaitGroup, a []uint8, v uint8) {
for i := 0; i < len(a); i++ {
a[i] = v
}
wg.Done()
}
func gray(i *image.YCbCr) {
var wg sync.WaitGroup
wg.Add(2)
go set(&wg, i.Cb, 128)
go set(&wg, i.Cr, 128)
wg.Wait()
}
func main() {
i, err := jpeg.Decode(os.Stdin)
if err != nil {
log.Fatalf("decoding image: %v", err)
}
gray(i.(*image.YCbCr))
err = jpeg.Encode(os.Stdout, i, nil)
if err != nil {
log.Fatalf("encoding image: %v", err)
}
}
It turned out pretty simple.
Of course it could be extended to create more goroutines (possibly one per available core) assigning slices of Cb & Cr arrays to each for processing.

How about creating a pipeline of pixels read from the image, converted to gray and finally set to a new image, where all steps run concurrently and each step could be internally parallelized?
Then Go's runtime will parallelize the tasks using all available cores.
This implementation works:
package main
import (
"image"
"image/color"
"image/jpeg"
"log"
"os"
"sync"
)
type Setter interface {
Set(x, y int, c color.Color)
}
type Pixel struct {
X, Y int
C color.Color
}
func sendPixels(in image.Image, out chan Pixel) {
b := in.Bounds()
for x := 0; x < b.Max.X; x++ {
for y := 0; y < b.Max.Y; y++ {
out <- Pixel{x, y, in.At(x, y)}
}
}
close(out)
}
func convertToGray(in chan Pixel, out chan Pixel) {
var wg sync.WaitGroup
for p := range in {
wg.Add(1)
go func(p Pixel) {
r, g, b, _ := p.C.RGBA()
l := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
out <- Pixel{p.X, p.Y, color.Gray{uint8(l / 256)}}
wg.Done()
}(p)
}
wg.Wait()
close(out)
}
func buildImage(in chan Pixel, out Setter, done chan int) {
for p := range in {
out.Set(p.X, p.Y, p.C)
}
close(done)
}
func main() {
i, err := jpeg.Decode(os.Stdin)
if err != nil {
log.Fatalf("decoding image: %v", err)
}
a := make(chan Pixel, 1000)
go sendPixels(i, a)
b := make(chan Pixel, 1000)
go convertToGray(a, b)
c := image.NewRGBA(i.Bounds())
d := make(chan int)
go buildImage(b, c, d)
<-d
err = jpeg.Encode(os.Stdout, c, nil)
if err != nil {
log.Fatalf("encoding image: %v", err)
}
}
Which can be tested running:
go run main.go <in.jpg >out.jpg

Related

How can I avoid the wrong situation that happens when creating a jsonl type file while handling concurrency?

Crawling is in progress using golang, and at this time, channels are also used to exchange data. However, when I run this code, The speed has definitely increased, but when I try to create a jsonl-type file while processing it with concurrency, a situation such as two line breaks or two consecutive writes occurs. What should I do to prevent this situation?
package next
import (
"encoding/json"
"fmt"
"io/fs"
"net/http"
"net/url"
"os"
"strconv"
)
type IApi struct {
Tree []ITree `json:"tree"`
}
type ITree struct {
CodeTitle string `json:"code_title"`
Degree string `json:"degree"`
EngText string `json:"eng_text"`
GubunCode string `json:"gubun_code"`
KorText string `json:"kor_text"`
LastNodeAt string `json:"last_node_at"`
LevelNo int `json:"level_no"`
LevelTitle string `json:"level_title"`
}
func Main() {
KCD_numbers := []int{5, 6, 7, 8}
c := make(chan IApi)
c1 := make(chan IApi)
c2 := make(chan IApi)
c3 := make(chan IApi)
f, _ := os.OpenFile("./koicd_code_list.json", os.O_CREATE|os.O_RDWR|os.O_APPEND, fs.FileMode(0644))
for _, KCD_NO := range KCD_numbers {
go kcdLoop(KCD_NO, c, f)
}
for _, KCD_NO := range KCD_numbers {
api_url := fmt.Sprintf("https://www.koicd.kr/kcd/0%s/kcd.tree.json", strconv.Itoa(KCD_NO))
k1_list := (<-c).Tree
for _, k1 := range k1_list {
go kcdLoop1(k1, c1, api_url, f)
}
for i := 0; i < len(k1_list); i++ {
k2_list := (<-c1).Tree
for _, k2 := range k2_list {
go kcdLoop2(k2, c2, api_url, f)
}
for j := 0; j < len(k2_list); j++ {
k3_list := (<-c2).Tree
for _, k3 := range k3_list {
go kcdLoop3(k3, c3, api_url, f)
}
for k := 0; k < len(k3_list); k++ {
k4_list := (<-c3).Tree
for _, k4 := range k4_list {
w1, _ := json.Marshal(k4)
f.Write([]byte("\n"))
f.Write(w1)
}
}
}
}
}
f.Close()
}
func kcdLoop(KCD_NO int, c chan IApi, f *os.File) {
api_url := fmt.Sprintf("https://www.koicd.kr/kcd/0%s/kcd.tree.json", strconv.Itoa(KCD_NO))
resp, herr := http.PostForm(api_url, url.Values{})
if herr != nil {
fmt.Printf("error making http request: %s\n", herr)
os.Exit(1)
}
var res IApi
json.NewDecoder(resp.Body).Decode(&res)
c <- res
}
func kcdLoop1(k1 ITree, c1 chan IApi, api_url string, f *os.File) {
k1_resp, k1_herr := http.PostForm(api_url, url.Values{"level_no": {strconv.Itoa(k1.LevelNo)}, "level_title": {k1.LevelTitle}})
if k1_herr != nil {
fmt.Printf("error making http request: %s\n", k1_herr)
os.Exit(1)
}
var k1_res IApi
json.NewDecoder(k1_resp.Body).Decode(&k1_res)
c1 <- k1_res
}
func kcdLoop2(k2 ITree, c2 chan IApi, api_url string, f *os.File) {
k2_resp, k2_herr := http.PostForm(api_url, url.Values{"level_no": {strconv.Itoa(k2.LevelNo)}, "level_title": {k2.LevelTitle}})
if k2_herr != nil {
fmt.Printf("error making http request: %s\n", k2_herr)
os.Exit(1)
}
var k2_res IApi
json.NewDecoder(k2_resp.Body).Decode(&k2_res)
c2 <- k2_res
}
func kcdLoop3(k3 ITree, c3 chan IApi, api_url string, f *os.File) {
w, _ := json.Marshal(k3)
f.Write([]byte("\n"))
f.Write(w)
k3_resp, k3_herr := http.PostForm(api_url, url.Values{"level_no": {strconv.Itoa(k3.LevelNo)}, "level_title": {k3.LevelTitle}})
if k3_herr != nil {
fmt.Printf("error making http request: %s\n", k3_herr)
os.Exit(1)
}
var k3_res IApi
json.NewDecoder(k3_resp.Body).Decode(&k3_res)
c3 <- k3_res
}
The speed has definitely increased, but when I try to create a jsonl-type file while processing it with concurrency, a situation such as two line breaks or two consecutive writes occurs. What should I do to prevent this situation?

How to collect values from a channel into a slice in Go?

Suppose I have a helper function helper(n int) which returns a slice of integers of variable length. I would like to run helper(n) in parallel for various values of n and collect the output in one big slice. My first attempt at this is the following:
package main
import (
"fmt"
"golang.org/x/sync/errgroup"
)
func main() {
out := make([]int, 0)
ch := make(chan int)
go func() {
for i := range ch {
out = append(out, i)
}
}()
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
// time.Sleep(time.Second)
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
func helper(n int) []int {
out := make([]int, 0)
for i := 0; i < n; i++ {
out = append(out, i)
}
return out
}
However, if I run this example I do not get all 5 expected values, instead I get
[0 1 0 1]
(If I uncomment the time.Sleep I do get all five values, [0 1 2 0 1], but this is not an acceptable solution).
It seems that the problem with this is that out is being updated in a goroutine, but the main function returns before it is done updating.
One thing that would work is using a buffered channel of size 5:
func main() {
ch := make(chan int, 5)
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
out := make([]int, 0)
for i := range ch {
out = append(out, i)
}
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
However, although in this simplified example I know what the size of the output should be, in my actual application this is not known a priori. Essentially what I would like is an 'infinite' buffer such that sending to the channel never blocks, or a more idiomatic way to achieve the same thing; I've read https://blog.golang.org/pipelines but wasn't able to find a close match to my use case. Any ideas?
In this version of the code, the execution is blocked until ch is closed.
ch is always closed at the end of a routine that is responsible to push into ch. Because the program pushes to ch in a routine, it is not needed to use a buffered channel.
package main
import (
"fmt"
"golang.org/x/sync/errgroup"
)
func main() {
ch := make(chan int)
go func() {
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
}()
out := make([]int, 0)
for i := range ch {
out = append(out, i)
}
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
func helper(n int) []int {
out := make([]int, 0)
for i := 0; i < n; i++ {
out = append(out, i)
}
return out
}
Here is the fixed version of the first code, it is convoluted but demonstrates the usage of sync.WaitGroup.
package main
import (
"fmt"
"sync"
"golang.org/x/sync/errgroup"
)
func main() {
out := make([]int, 0)
ch := make(chan int)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for i := range ch {
out = append(out, i)
}
}()
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
wg.Wait()
// time.Sleep(time.Second)
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
func helper(n int) []int {
out := make([]int, 0)
for i := 0; i < n; i++ {
out = append(out, i)
}
return out
}

Slice automatically be sorted?

While I want to create my own pipeline to practice with goroutines, there's something particularly weird.
I use the random perm function to generate some int numbers, randomly obviously, I write them to IO writer and then read them form IO reader, cuz its binary source so I print them and they are sorted!!
Here's the code:
func RandomSource(tally int) chan int {
out := make(chan int)
sli := rand.Perm(tally)
fmt.Println(sli)
go func() {
for num := range sli {
out <- num
}
close(out)
}()
return out
}
func ReaderSource(reader io.Reader) chan int {
out := make(chan int)
go func() {
buffer := make([]byte, 8)
for ; ; {
n, err := reader.Read(buffer)
if n > 0 {
v := int(binary.BigEndian.Uint64(buffer))
out <- v
}
if err != nil {
break
}
}
close(out)
}()
return out
}
func WriterSink(writer io.Writer, in chan int) {
for v := range in {
buffer := make([]byte, 8)
binary.BigEndian.PutUint64(
buffer, uint64(v))
writer.Write(buffer)
}
}
func main() {
fileName := "small.in"
file, err := os.Create(fileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
p := RandomSource(500)
WriterSink(file, p)
file, err = os.Open(fileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
p = ReaderSource(file)
for v := range p {
fmt.Println(v)
}
}
range returns an index as the first value for an array or slice, which always goes from 0 up to len - 1. Use for _, num := range sli { if you want to iterate over the values themselves rather than the set of indices.

Why does this for loop never exit?

There is the following code, the use of which leads to an infinite loop. The values ​​from the channel are correct, the value of the variable sum is also correct. All the goroutines end up without errors.
func responseHandler(w http.ResponseWriter, r *http.Request) {
var c = make(chan string)
for i := 0; i < 100; i++ {
url := fmt.Sprintf("someurl/page%v/etc", i)
go parse(url, i, c)
if i%5 == 0 {
time.Sleep(1000 * time.Millisecond)
}
}
for range c {
sum = append(sum, <-c)
}
fmt.Println("Exit from channel wait")
fmt.Fprintln(w, sum)
}
func parse(url string, num int, c chan string) {
response, err1 := http.Get(url)
if err1 != nil {
log.Fatal(err1)
}
defer response.Body.Close()
if response.StatusCode != 200 {
log.Fatalf("status code error: %d %s", response.StatusCode,
response.Status)
}
res, err := DecodeHTMLBody(response.Body, "windows-1251")
doc, err := goquery.NewDocumentFromReader(res)
if err != nil {
log.Fatal(err)
}
doc.Find(".b-advItem__content").Each(func(i int, s *goquery.Selection) {
title := strings.TrimSpace(s.Find(".someclass").Text())
price := strings.TrimSpace(s.Find(".someclass").Text())
formatPrice := parsePrice(price)
c <- fmt.Sprintf("output %d: %s:%s\n", i, title, formatPrice)
})
fmt.Printf("Channel %d - exit\n", num)
Sum - global []string.
The range statement over a channel exits only when the channel is closed (well, think about it: how the range would otherwise detect there's no more data to fetch?), and nothing closes the channel in your code.
func responseHandler(w http.ResponseWriter, r *http.Request) {
...
for range c {
sum = append(sum, <-c)
if len(aa) == 100 {
close(c)
}
fmt.Fprintln(w, sum)
}
func parse(...){
...
aa = append(aa, num)
}
Adding such a check allows you to exit the loop correctly

How to make fmt.Scanln() read into a slice of integers

I have a line containing 3 numbers that I want to read from stdin with fmt.Scanln() but this code won't work:
X := make([]int, 3)
fmt.Scanln(X...)
fmt.Printf("%v\n", X)
I get this error message:
cannot use X (type []int) as type []interface {} in function argument
I don't get it.
Idiomatic Go would be:
func read(n int) ([]int, error) {
in := make([]int, n)
for i := range in {
_, err := fmt.Scan(&in[i])
if err != nil {
return in[:i], err
}
}
return in, nil
}
interface{} means nothing. Please don't use it if you don't have to.
For example,
package main
import "fmt"
func intScanln(n int) ([]int, error) {
x := make([]int, n)
y := make([]interface{}, len(x))
for i := range x {
y[i] = &x[i]
}
n, err := fmt.Scanln(y...)
x = x[:n]
return x, err
}
func main() {
x, err := intScanln(3)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("%v\n", x)
}
Input:
1 2 3
Output:
[1 2 3]
I think the the correct version should be
X := make([]int, 3)
fmt.Scanln(&X[0], &X[1], &X[2])
fmt.Printf("%v\n", X)
This error message occurs b/c there's no reasonable way to convert []int to []interface{}. Note, this is in reference to a slice. So the syntax your using is correct, but fmt.Scanln expects []interface{}. This has implications outside of pkg fmt.
The reason I've seen given for this is due to Go giving you control over memory layout so it currently has no reasonable way to do the slice conversion. This means you'll need to do the conversion manually before passing it to a function expecting the slice of a given type. For example:
package main
import (
"fmt"
)
func main() {
x := make([]int, 3)
y := make([]interface{}, 3)
y[0] = x[0]
y[1] = x[1]
y[2] = x[2]
fmt.Println(y...)
}
Or something a little more general:
x := make([]int, 3)
y := make([]interface{}, len(x))
for i, v := range x {
y[i] = v
}
fmt.Println(y...)
Regarding your specific issue, see the following:
x := make([]*int, 3)
for i := range x {
x[i] = new(int)
}
y := make([]interface{}, 3)
for i, v := range x {
y[i] = v
}
if _, err := fmt.Scanln(y...); err != nil {
fmt.Println("Scanln err: ", err)
}
for _, v := range y {
val := v.(*int)
fmt.Println(*val)
}
I saw in a comment you said the lines can have different lengths. In that case
you can implement your own fmt.Scanner:
package main
import (
"bytes"
"fmt"
)
type slice struct {
tok []int
}
func (s *slice) Scan(state fmt.ScanState, verb rune) error {
tok, err := state.Token(false, func(r rune) bool { return r != '\n' })
if err != nil { return err }
if _, _, err := state.ReadRune(); err != nil {
if len(tok) == 0 {
panic(err)
}
}
b := bytes.NewReader(tok)
for {
var d int
_, err := fmt.Fscan(b, &d)
if err != nil { break }
s.tok = append(s.tok, d)
}
return nil
}
func main() {
var s slice
fmt.Scan(&s)
fmt.Println(s.tok)
}
https://golang.org/pkg/fmt#Scanner

Resources