Goroutine Kafka Consumers - go

I currently have a program that creates a workergroup of size 1, which then calls startworker:
package main
import (
"db_write_consumer/db"
"db_write_consumer/worker"
"os"
"os/signal"
"syscall"
)
func main() {
sigchan := make(chan os.Signal, 1)
signal.Notify(sigchan, syscall.SIGINT, syscall.SIGTERM)
mySQLClient, _ := db.NewMySQLDBClient("root", "", "localhost", 3306, "testbase")
workers := worker.CreateGroup("localhost:9092", "testgroup", 1)
for _, w := range workers {
w_ := w
worker.StartWorker(w_, []string{"test-topic"}, sigchan, mySQLClient)
}
}
where CreateGroup is written:
func CreateGroup(bootstrapServers string, groupId string, numWorkers int) []*kafka.Consumer {
consumers := []*kafka.Consumer{}
for i := 0; i < numWorkers; i++ {
c := NewWorker(bootstrapServers, groupId)
consumers = append(consumers, c)
}
return consumers
}
and Startworker is written:
func StartWorker(c *kafka.Consumer, topics []string, sigchan chan os.Signal, mySQLClient *sql.DB) {
_ = c.SubscribeTopics(topics, nil)
fmt.Println(c)
run := true
for run {
select {
case sig := <-sigchan:
fmt.Printf("Caught signal %v: terminating\n", sig)
run = false
default:
ev, _ := c.ReadMessage(100)
if ev == nil {
continue
}
msg := &pb.Person{}
proto.Unmarshal(ev.Value, msg)
WriteStuff(mySQLClient, msg.Id, msg.Lastname, msg.Firstname, msg.Address, msg.City)
if ev.Headers != nil {
fmt.Printf("%% Headers: %v\n", ev.Headers)
}
_, err := c.StoreMessage(ev)
if err != nil {
fmt.Fprintf(os.Stderr, "%% Error storing offset after message %s:\n",
ev.TopicPartition)
}
}
}
fmt.Printf("Closing consumer\n")
c.Close()
}
this works fine for workergroup size 1, but every attempt to make this work for a larger workergroup size fails--all i've learned so far is that i'll want context.WithCancel(context.Background()) passed down into the worker funcs from main, but i'm lost with how to set up a waitgroup or goroutines to actually do this work

I understand that your question is how to manage lifetime of workers using context (instead of sigchan). Easiest way is to use signal.NotifyContext - this gives you a context which gets cancelled when one of the signals is sent. So the main would become
func main() {
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()
mySQLClient, _ := db.NewMySQLDBClient("root", "", "localhost", 3306, "testbase")
workers := worker.CreateGroup("localhost:9092", "testgroup", 1)
var wg sync.WaitGroup
for _, w := range workers {
w_ := w
wg.Add(1)
go func() {
defer wg.Done()
worker.StartWorker(ctx, w_, []string{"test-topic"}, mySQLClient)
}()
}
wg.Wait()
}
Note also the use of WaitGroup to avoid the main exiting before all the workers finish. And StartWorker would be like
func StartWorker(ctx context.Context, c *kafka.Consumer, topics []string, mySQLClient *sql.DB) {
_ = c.SubscribeTopics(topics, nil)
fmt.Println(c)
for {
select {
case <-ctx.Done:
return
default:
...

Related

Execute multiple independent jobs continuously

I have a set of jobs which are independent of each other. Hence each of these jobs can be run concurrently using goroutines. Note that once a single job completes, it should wait for few seconds and start again (applies to all the jobs) and this goes on in a loop until the Go API service stops. Also note that all these jobs execute the same goroutine (makes a REST call). What would be the best pattern to implement this in Go. Please note that I would also want to wait for currently executing jobs to complete before my service shuts down.
If I got you right, you are looking for something likes this
This is a service with a consumer pool to execute jobs concurrently. When a job is done, it will repeat again after a interval until you stop the service.
type job struct {
id int
result chan error
}
func newJob(id int) job {
return job{
id: id,
result: make(chan error, 1),
}
}
type service struct {
pending chan job
consumerLimit int
repeatInterval time.Duration
isClosed chan struct{}
shutdown chan chan error
}
func newService(repeatInterval time.Duration, consumerLimit int, pendingChannelSize int) *service {
s := &service{
pending: make(chan job, pendingChannelSize),
consumerLimit: consumerLimit,
repeatInterval: repeatInterval,
isClosed: make(chan struct{}, consumerLimit),
shutdown: make(chan chan error),
}
for i := 0; i < s.consumerLimit; i++ {
go s.consumer()
}
return s
}
func (s *service) do(ctx context.Context, job job) error {
select {
case <-ctx.Done():
return ctx.Err()
case s.pending <- job:
return <-job.result
case <-s.isClosed:
return errors.New("service has been shut down")
}
}
func (s *service) consumer() {
for {
select {
case j := <-s.pending:
//Simulate working process
time.Sleep(time.Duration(rand.Intn(200)) + 200)
j.result <- nil
fmt.Println(fmt.Sprintf("job %v is done", j.id))
go func() {
//Repeat after a time
time.Sleep(s.repeatInterval)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if err := s.do(ctx, newJob(j.id)); err != nil {
fmt.Println(fmt.Errorf("failed to send job to repeat: %v", err))
}
}()
case result := <-s.shutdown:
result <- nil
return
}
}
}
func (s *service) close() error {
result := make(chan error, 1)
for i := 0; i < s.consumerLimit; i++ {
s.shutdown <- result
}
close(s.isClosed)
return <-result
}
func main() {
interrupt := make(chan os.Signal, 1)
signal.Notify(interrupt, os.Interrupt)
service := newService(time.Second, 5, 1000)
//Assign jobs
for i := 1; i < 10; i++ {
go func(i int) {
if err := service.do(context.Background(), newJob(i)); err != nil {
fmt.Println(fmt.Errorf("failed to send job: %v", err))
}
}(i)
}
select {
case <-interrupt:
switch err := service.close(); err {
case nil:
fmt.Println("service has been shutdown successfully")
default:
fmt.Println(fmt.Errorf("failed to graceful shut down service: %w", err))
}
return
}
}
If I understand correctly, you are looking for something like this.
This code will run the workers in a loop, the workers run parallel as a group until you exit the program sending an end signal, but wait for the current loop to finihish before exiting.
func main() {
srv := server{
workers: 5,
}
srv.Run()
}
// inspired by: https://goinbigdata.com/golang-wait-for-all-goroutines-to-finish/#:~:text=A%20WaitGroup%20allows%20to%20wait,until%20all%20goroutines%20have%20finished.
func work(wg *sync.WaitGroup, i int) {
defer wg.Done()
rand.Seed(time.Now().UnixNano())
n := rand.Intn(10)
fmt.Printf("Worker %v: Started\n", i)
time.Sleep(time.Duration(n) * time.Second)
fmt.Printf("Worker %v: Finished\n", i)
}
type server struct {
running bool
workers int
}
func (srv *server) Run() {
done := make(chan bool, 1) // this channel
signalCh := make(chan os.Signal, 1) // this channel will get a signal on system call
signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-signalCh
srv.running = false
done <- true
}()
srv.running = true
for srv.running {
var wg sync.WaitGroup
for i := 0; i < srv.workers; i++ {
wg.Add(1)
go work(&wg, i)
}
wg.Wait()
}
<-done
}
You want to implement worker pools. Here is the simple way to create pools of worker. You can customize worker method and jobs type according to your requirement.
package main
import (
"fmt"
"sync"
)
type Jobs struct {
ID string
// or anything you want to add
}
func main() {
jobs := make(Jobs)
var wg sync.WaitGroup
numWorker := 16
for i := 0; i < numWorker; i++ {
wg.Add(1)
go func() {
worker(jobs)
wg.Done()
}()
}
tasks := []Jobs{}
// inset your task here
for _, i := range tasks {
jobs <- i
}
close(jobs)
wg.Wait()
}
func worker(jobs chan Jobs) {
for job := range jobs {
// do whatever you want to do
doSomething(job)
}
}
func doSomething(job Jobs) {
fmt.Println(job)
}

Sarama Kafka library: how to unit test a consumer group's session.MarkMessage()?

I'm trying to adapt code from the consumer group example for github.com/Shopify/sarama, and am struggling to add a unit test which tests the functionality of session.MarkMessage() in the ConsumeClaim method (https://github.com/Shopify/sarama/blob/5466b37850a38f4ed6d04b94c6f058bd75032c2a/examples/consumergroup/main.go#L160).
Here is my adapted code with a consume() function:
package main
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"sync"
"syscall"
"github.com/Shopify/sarama"
)
var (
addrs = []string{"localhost:9092"}
topic = "my-topic"
)
func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var wg sync.WaitGroup
defer wg.Wait()
consumer := &Consumer{ready: make(chan bool)}
close := consume(ctx, &wg, consumer)
defer close()
<-consumer.ready
log.Println("Sarama consumer up and running!")
sigterm := make(chan os.Signal, 1)
signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM)
select {
case <-ctx.Done():
log.Println("terminating: context cancelled")
case <-sigterm:
log.Println("terminating: via signal")
}
}
func consume(ctx context.Context, wg *sync.WaitGroup, consumer *Consumer) (close func()) {
config := sarama.NewConfig()
config.Version = sarama.V0_11_0_2 // The version has to be at least V0_10_2_0 to support consumer groups
config.Consumer.Offsets.Initial = sarama.OffsetOldest
consumerGroup, err := sarama.NewConsumerGroup(addrs, "my-group", config)
if err != nil {
log.Fatalf("NewConsumerGroup: %v", err)
}
wg.Add(1)
go func() {
defer wg.Done()
for {
if err := consumerGroup.Consume(ctx, []string{topic}, consumer); err != nil {
log.Panicf("Consume: %v", err)
}
if ctx.Err() != nil {
return
}
consumer.ready = make(chan bool)
}
}()
close = func() {
if err := consumerGroup.Close(); err != nil {
log.Panicf("Close: %v", err)
}
}
return
}
// Consumer represents a Sarama consumer group consumer
type Consumer struct {
ready chan bool
handle func([]byte) error
}
// Setup is run at the beginning of a new session, before ConsumeClaim
func (consumer *Consumer) Setup(sarama.ConsumerGroupSession) error {
// Mark the consumer as ready
close(consumer.ready)
return nil
}
// Cleanup is run at the end of a session, once all ConsumeClaim goroutines have exited
func (consumer *Consumer) Cleanup(sarama.ConsumerGroupSession) error {
return nil
}
// ConsumeClaim must start a consumer loop of ConsumerGroupClaim's Messages().
func (consumer *Consumer) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
for message := range claim.Messages() {
log.Printf("Message claimed: value = %s, timestamp = %v, topic = %s", message.Value, message.Timestamp, message.Topic)
if consumer.handle != nil {
if err := consumer.handle(message.Value); err != nil {
return fmt.Errorf("handle message %s: %v", message.Value, err)
}
}
session.MarkMessage(message, "")
}
return nil
}
Here are a couple of unit tests I've written for it:
package main
import (
"context"
"fmt"
"log"
"sync"
"testing"
"time"
"github.com/Shopify/sarama"
"github.com/stretchr/testify/require"
"gotest.tools/assert"
)
func TestConsume(t *testing.T) {
config := sarama.NewConfig()
config.Producer.Return.Successes = true
producer, err := sarama.NewSyncProducer(addrs, config)
require.NoError(t, err)
partition, offset, err := producer.SendMessage(&sarama.ProducerMessage{
Topic: topic,
Value: sarama.ByteEncoder([]byte("foobar")),
})
require.NoError(t, err)
t.Logf("Sent message to partition %d with offset %d", partition, offset)
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
consumer := &Consumer{ready: make(chan bool)}
close := consume(ctx, &wg, consumer)
<-consumer.ready
log.Println("Sarama consumer up and running!")
time.Sleep(1 * time.Second)
cancel()
wg.Wait()
close()
}
func TestConsumeTwice(t *testing.T) {
config := sarama.NewConfig()
config.Producer.Return.Successes = true
producer, err := sarama.NewSyncProducer(addrs, config)
require.NoError(t, err)
data1, data2 := "foobar1", "foobar2"
for _, data := range []string{data1, data2} {
partition, offset, err := producer.SendMessage(&sarama.ProducerMessage{
Topic: topic,
Key: sarama.StringEncoder("foobar"),
Value: sarama.StringEncoder(data),
})
require.NoError(t, err)
t.Logf("Sent message to partition %d with offset %d", partition, offset)
}
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
messageReceived := make(chan []byte)
consumer := &Consumer{
ready: make(chan bool),
handle: func(data []byte) error {
messageReceived <- data
fmt.Printf("Received message: %s\n", data)
return nil
},
}
close := consume(ctx, &wg, consumer)
<-consumer.ready
log.Println("Sarama consumer up and running!")
for i := 0; i < 2; i++ {
data := <-messageReceived
switch i {
case 0:
assert.Equal(t, data1, string(data))
case 1:
assert.Equal(t, data2, string(data))
}
}
cancel()
wg.Wait()
close()
}
The tests can be run after running Kafka and Zookeeper in a Docker container such as johnnypark/kafka-zookeeper like so:
docker run -p 2181:2181 -p 9092:9092 -e ADVERTISED_HOST=127.0.0.1 -e NUM_PARTITIONS=10 johnnypark/kafka-zookeeper
What I'm struggling with is the following: if I comment out the line
session.MarkMessage(message, "")
the tests still pass. According to https://godoc.org/github.com/Shopify/sarama#ConsumerGroupSession, MarkMessage marks a message as consumed, but how would I test this in a unit test?
sarama.ConsumerGroupSession.MarkMessage calls sarama.PartitionOffsetManager.MarkOffset, and in the method comment they said: "Note: calling MarkOffset does not necessarily commit the offset to the backend store immediately for efficiency reasons, and it may never be committed if your application crashes. This means that you may end up processing the same message twice."
So in unit tests, MarkMessage does not commit offset fast enough. I faced the same problem and Google brought me here. Sleeping for a second at the end of test functions can be a workaround.

Concurrency in Go

How to I go about implementing the aggregation pattern in Go, I have to send a bunch of http request concurrently where each go routine will call the endpoint and send the response status on a channel. Now on the main calling function I will range through the channel and display all the responses.
The problem is how do I unblock the channel ?? - I cannot close the channel from the go routines as it will be closed before the complete work is done
package main
import (
"fmt"
"net/http"
"sync"
"time"
"golang.org/x/net/context"
)
func main() {
var wg sync.WaitGroup
wg.Add(10)
c := make(chan string, 100)
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
for i := 1; i <= 10; i++ {
go SendHttpRequest(ctx, c, &wg)
}
for v := range c {
fmt.Println(v)
}
wg.Wait()
}
func SendHttpRequest(ctx context.Context, c chan string, wg *sync.WaitGroup) {
//defer wg.Done()
client := http.Client{}
req, err := http.NewRequest("POST", "https://jsonplaceholder.typicode.com/posts/1", nil)
if err != nil {
panic(err)
}
req.WithContext(ctx)
res, _ := client.Do(req)
select {
case <-time.After(1 * time.Microsecond):
c <- res.Status
case <-ctx.Done():
c <- "599 ToLong"
}
if res != nil {
defer res.Body.Close()
}
//close(c)
defer wg.Done()
}
Use the WaitGroup
go func(){
wg.Wait()
close(c)
}()
for v := range c {
fmt.Println(v)
}
// Don't bother with wg.Wait() here
In this kind of situation use a generator and idiomatic early defer patterns:
import (
"fmt"
"errors"
"net/http"
"sync"
"time"
"golang.org/x/net/context"
)
func main() {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel() // defer early context cancel
for v := range requests(ctx) {
fmt.Println(v)
}
}
// requests generator (handling synchro)
func requests(ctx context.Context)<-chan string {
c := make(chan string/*, 100*/) // No need for buffer, do it on the fly
go func(){
defer close(c) // defer early chan close, will also check goroutine ending
var wg sync.WaitGroup
defer wg.Wait() // defer early wait
wg.Add(10)
for i := 1; i <= 10; i++ {
go func() {
defer wg.Done() // defer early goroutine waitgroup done
if status, err := SendHttpRequest(ctx, c); err != nil {
c <- status
}
}()
}
}
return c
}
// SendHttpRequest looks more conventional, no goroutines, no syncro (waitgroup not spread)
func SendHttpRequest(ctx context.Context) (status string, err error) {
client := http.Client{}
req, err := http.NewRequest("POST", "https://jsonplaceholder.typicode.com/posts/1", nil)
if err != nil {
return
}
req.WithContext(ctx)
res, err := client.Do(req)
if err != nil {
if errors.Is(err, context.Canceled) { // check that request was not cancelled by context cancel trigger
status = "599 ToLong"
}
return
}
defer res.Body.Close() // defer early response body close (in case of no error)
status = res.Status
return
}

Confusion regarding channel directions and blocking in Go

In a function definition, if a channel is an argument without a direction, does it have to send or receive something?
func makeRequest(url string, ch chan<- string, results chan<- string) {
start := time.Now()
resp, err := http.Get(url)
defer resp.Body.Close()
if err != nil {
fmt.Printf("%v", err)
}
resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
defer resp.Body.Close()
secs := time.Since(start).Seconds()
if err != nil {
fmt.Printf("%v", err)
}
// Cannot move past this.
ch <- fmt.Sprintf("%f", secs)
results <- <- ch
}
func MakeRequestHelper(url string, ch chan string, results chan string, iterations int) {
for i := 0; i < iterations; i++ {
makeRequest(url, ch, results)
}
for i := 0; i < iterations; i++ {
fmt.Println(<-ch)
}
}
func main() {
args := os.Args[1:]
threadString := args[0]
iterationString := args[1]
url := args[2]
threads, err := strconv.Atoi(threadString)
if err != nil {
fmt.Printf("%v", err)
}
iterations, err := strconv.Atoi(iterationString)
if err != nil {
fmt.Printf("%v", err)
}
channels := make([]chan string, 100)
for i := range channels {
channels[i] = make(chan string)
}
// results aggregate all the things received by channels in all goroutines
results := make(chan string, iterations*threads)
for i := 0; i < threads; i++ {
go MakeRequestHelper(url, channels[i], results, iterations)
}
resultSlice := make([]string, threads*iterations)
for i := 0; i < threads*iterations; i++ {
resultSlice[i] = <-results
}
}
In the above code,
ch <- or <-results
seems to be blocking every goroutine that executes makeRequest.
I am new to concurrency model of Go. I understand that sending to and receiving from a channel blocks but find it difficult what is blocking what in this code.
I'm not really sure that you are doing... It seems really convoluted. I suggest you read up on how to use channels.
https://tour.golang.org/concurrency/2
That being said you have so much going on in your code that it was much easier to just gut it to something a bit simpler. (It can be simplified further). I left comments to understand the code.
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
// using structs is a nice way to organize your code
type Worker struct {
wg sync.WaitGroup
semaphore chan struct{}
result chan Result
client http.Client
}
// group returns so that you don't have to send to many channels
type Result struct {
duration float64
results string
}
// closing your channels will stop the for loop in main
func (w *Worker) Close() {
close(w.semaphore)
close(w.result)
}
func (w *Worker) MakeRequest(url string) {
// a semaphore is a simple way to rate limit the amount of goroutines running at any single point of time
// google them, Go uses them often
w.semaphore <- struct{}{}
defer func() {
w.wg.Done()
<-w.semaphore
}()
start := time.Now()
resp, err := w.client.Get(url)
if err != nil {
log.Println("error", err)
return
}
defer resp.Body.Close()
// don't have any examples where I need to also POST anything but the point should be made
// resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
// if err != nil {
// log.Println("error", err)
// return
// }
// defer resp.Body.Close()
secs := time.Since(start).Seconds()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("error", err)
return
}
w.result <- Result{duration: secs, results: string(b)}
}
func main() {
urls := []string{"https://facebook.com/", "https://twitter.com/", "https://google.com/", "https://youtube.com/", "https://linkedin.com/", "https://wordpress.org/",
"https://instagram.com/", "https://pinterest.com/", "https://wikipedia.org/", "https://wordpress.com/", "https://blogspot.com/", "https://apple.com/",
}
workerNumber := 5
worker := Worker{
semaphore: make(chan struct{}, workerNumber),
result: make(chan Result),
client: http.Client{Timeout: 5 * time.Second},
}
// use sync groups to allow your code to wait for
// all your goroutines to finish
for _, url := range urls {
worker.wg.Add(1)
go worker.MakeRequest(url)
}
// by declaring wait and close as a seperate goroutine
// I can get to the for loop below and iterate on the results
// in a non blocking fashion
go func() {
worker.wg.Wait()
worker.Close()
}()
// do something with the results channel
for res := range worker.result {
fmt.Printf("Request took %2.f seconds.\nResults: %s\n\n", res.duration, res.results)
}
}
The channels in channels are nil (no make is executed; you make the slice but not the channels), so any send or receive will block. I'm not sure exactly what you're trying to do here, but that's the basic problem.
See https://golang.org/doc/effective_go.html#channels for an explanation of how channels work.

How to close a channel

I try to adapt this example:
https://gobyexample.com/worker-pools
But I don't know how to stop the channel because program don't exit at the end of the channel loop.
Can you explain how to exit the program?
package main
import (
"github.com/SlyMarbo/rss"
"bufio"
"fmt"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne +1
} else {
continue
}
}
}
}
func main() {
jobs := make(chan string)
results := make(chan string)
for w := 1; w <= 16; w++ {
go worker(w, jobs, results)
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
The flux.txt is a flat text file like :
http://blog.case.edu/news/feed.atom
...
The problem is that, in the example you are referring to, the worker pool reads from results 9 times:
for a := 1; a <= 9; a++ {
<-results
}
Your program, on the other hand, does a range loop over the results which has a different semantics in go. The range operator does not stop until the channel is closed.
for msg := range results {
fmt.Println(msg)
}
To fix your problem you'd need to close the results channel. However, if you just call close(results) before the for loop, you most probably will
get a panic, because the workers might be writing on results.
To fix this problem, you need to add another channel to be notified when all the workers are done. You can do this either using a sync.WaitGroup or :
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
var wg sync.WaitGroup
for w := 0; w < workers; w++ {
go func() {
wg.Add(1)
defer wg.Done()
worker(w, jobs, results)
}()
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
wg.Wait()
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
Or a done channel:
package main
import (
"bufio"
"fmt"
"github.com/SlyMarbo/rss"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string, done chan struct{}) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne + 1
} else {
continue
}
}
}
close(done)
}
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
dones := make([]chan struct{}, workers)
for w := 0; w < workers; w++ {
dones[w] = make(chan struct{})
go worker(w, jobs, results, dones[w])
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
for _, done := range dones {
<-done
}
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}

Resources