message does not start with magic byte - go

I am trying to produce avro encoded data into kafka topic using /linkedin/goavro package in Go. The goal is to be able to consume the topic using different clients.
First I register the schema as following:
curl -X POST -H "Content-Type: application/vnd.schemaregistry.v1+json" --data '{"schema": "{\"name\":\"test_topic2\",\"type\":\"record\", \"fields\":[{\"name\":\"user\",\"type\":\"string\"},{\"name\":\"password\",\"size\":10,\"type\":\"string\"}]}"}' http://localhost:8081/subjects/test_topic2-value/versions
Then I create avro data, produce and consume it with Go.
package main
import (
"github.com/Shopify/sarama"
"github.com/linkedin/goavro"
"fmt"
)
const (
brokers = "localhost:9092"
topic = "test_topic2"
)
const loginEventAvroSchema = `{"name":"test_topic2","type":"record", "fields":[{"name":"user","type":"string"},{"name":"password","size":10,"type":"string"}]}`
func main() {
// Create Message
codec, err := goavro.NewCodec(loginEventAvroSchema)
if err != nil {
panic(err)
}
m := map[string]interface{}{
"user": "pikachu", "password": 231231,
}
single, err := codec.SingleFromNative(nil, m)
if err != nil {
panic(err)
}
// Producer
config := sarama.NewConfig()
config.Consumer.Return.Errors = true
config.Producer.Return.Successes = true
config.Version = sarama.V2_4_0_0
//get broker
cluster, err := sarama.NewSyncProducer(brokers, config)
if err != nil {
panic(err)
}
defer func() {
if err := cluster.Close(); err != nil {
panic(err)
}
}()
msg := &sarama.ProducerMessage{
Topic: topic,
Value: sarama.StringEncoder(single),
}
cluster.SendMessage(msg)
// Consumer
clusterConsumer, err := sarama.NewConsumer(brokers, config)
if err != nil {
panic(err)
}
defer func() {
if err := clusterConsumer.Close(); err != nil {
panic(err)
}
}()
msgK, _ := clusterConsumer.ConsumePartition(topic, 0, sarama.OffsetOldest)
for {
q := <-msgK.Messages()
native, _, err := codec.NativeFromSingle([]byte(q.Value))
if err != nil {
fmt.Println(err)
}
fmt.Println(native)
}
This code works fine and I can successfully produce and consume messages into the kafka topic.
Now I try to consume the topics from python avro-consumer:
from confluent_kafka import KafkaError
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError
c = AvroConsumer({
'bootstrap.servers': 'localhost',
'group.id': 'groupid',
'schema.registry.url': 'http://localhost:8081',
'auto.offset.reset': 'earliest'})
c.subscribe(['test_topic2'])
while True:
try:
msg = c.poll(10)
except SerializerError as e:
print("Message deserialization failed for {}: {}".format(msg, e))
break
if msg is None:
continue
if msg.error():
print("AvroConsumer error: {}".format(msg.error()))
continue
print(msg.value(), msg.key())
c.close()
But I get the following error:
confluent_kafka.avro.serializer.SerializerError: Message deserialization failed for message at test_topic2 [0] offset 1: message does not start with magic byte
I think that I have missed something on the Go producer part, I would much appreciate it if someone can share his/her experience on how to fix this issue.

goavro doesn't use the Schema Registry.
Plus, you're using the StringEncoder, which I assume outputs only a string slice and not Avro bytes
StringEncoder implements the Encoder interface for Go strings so that they can be used as the Key or Value in a ProducerMessage.
FWIW, I would suggest testing a consumer with kafka-avro-console-consumer, if you have it

Related

XACK is not deleting the message, even if it is processed successfully?

I am trying to implement redis stream where we have a producer.
package producer
import (
"RedisStream/models"
"encoding/json"
"fmt"
"github.com/garyburd/redigo/redis"
)
type Producer struct {
streamName string
}
func NewProducer(streamName string) *Producer {
return &Producer{streamName: streamName}
}
func (p *Producer) WriteEvents(conn redis.Conn, key string) {
// Create a new struct
employee := models.Employee{
Name: "ashutosh",
Employer: "self-employee",
}
// Convert struct to JSON
e, _ := json.Marshal(employee)
// Send key and value to Redis stream
_, err := conn.Do("XADD", p.streamName, "*", key, e)
if err != nil {
fmt.Println(err)
}
fmt.Println("Successfully sent data to Redis stream")
}
then I have implemented a consumer
func (c *Consumer) ReadEventsCons1() {
// Connect to Redis
conn, err := redis.Dial("tcp", ":6379")
if err != nil {
fmt.Println(err)
return
}
defer conn.Close()
for {
// Read key and value from Redis stream
reply, err := conn.Do("XREADGROUP", "GROUP", c.groupName[0], "ashu", "COUNT", "1", "STREAMS", c.streamName, ">")
vs, err := redis.Values(reply, err)
if err != nil {
if errors.Is(err, redis.ErrNil) {
continue
}
fmt.Printf("Error: %+v", err)
}
// Get the first and only value in the array since we're only
// reading from one stream "some-stream-name" here.
vs, err = redis.Values(vs[0], nil)
if err != nil {
fmt.Printf("Error: %+v", err)
}
// Ignore the stream name as the first value as we already have
// that in hand! Just get the second value which is guaranteed to
// exist per the docs, and parse it as some stream entries.
res, err := entries(vs[1], nil)
if err != nil {
fmt.Errorf("error parsing entries: %w", err)
}
for _, val := range res {
for k, v := range val.Fields {
empl := &models.Employee{}
_ = json.Unmarshal(v, empl)
fmt.Printf("From Consumer Ashu: Key: %s and val: %+v \n", k, empl)
}
reply, err := redis.Int(conn.Do("XACK", c.streamName, c.groupName[0], val.ID))
if reply != 1 {
fmt.Printf("failed to ack: err: %+v", err)
}
}
}
}
Once a consumer from a consumergroup successfully processed a message, I sent acknowledgement to redis.But messages still resides in redis stream. because post running
XLEN streamName
I can see length is growing. This may create memory challenge, since messages are residing in perpetuity. Is there any intelligent way to handle this issue?

kafka retry many times when i download large file

I am newbie in kafka, i try build a service send mail with attach files.
Execution flow:
Kafka will receive a message to send mail
function get file will download file from url , scale image, and save file
when send mail i will get files from folder and attach to form
Issues:
when i send mail with large files many times , kafka retry many times, i will receive many mail
kafka error: "kafka server: The provided member is not known in the current generation"
I listened MaxProcessingTime , but i try to test a mail with large file, it still work fine
Kafka info : 1 broker , 3 consumer
func (s *customerMailService) SendPODMail() error { filePaths, err := DownloadFiles(podURLs, orderInfo.OrderCode)
if err != nil{
countRetry := 0
for countRetry <= NUM_OF_RETRY{
filePaths, err = DownloadFiles(podURLs, orderInfo.OrderCode)
if err == nil{
break
}
countRetry++
}
}
err = s.sendMailService.Send(ctx, orderInfo.CustomerEmail, tmsPod, content,filePaths)}
function download file :
func DownloadFiles(files []string, orderCode string) ([]string, error) {
var filePaths []string
err := os.Mkdir(tempDir, 0750)
if err != nil && !os.IsExist(err) {
return nil, err
}
tempDirPath := tempDir + "/" + orderCode
err = os.Mkdir(tempDirPath, 0750)
if err != nil && !os.IsExist(err) {
return nil, err
}
for _, fileUrl := range files {
fileUrlParsed, err := url.ParseRequestURI(fileUrl)
if err != nil {
logrus.WithError(err).Infof("Pod url is invalid %s", orderCode)
return nil, err
}
extFile := filepath.Ext(fileUrlParsed.Path)
dir, err := os.MkdirTemp(tempDirPath, "tempDir")
if err != nil {
return nil, err
}
f, err := os.CreateTemp(dir, "tmpfile-*"+extFile)
if err != nil {
return nil, err
}
defer f.Close()
response, err := http.Get(fileUrl)
if err != nil {
return nil, err
}
defer response.Body.Close()
contentTypes := response.Header["Content-Type"]
isTypeAllow := false
for _, contentType := range contentTypes {
if contentType == "image/png" || contentType == "image/jpeg" {
isTypeAllow = true
}
}
if !isTypeAllow {
logrus.WithError(err).Infof("Pod image type is invalid %s", orderCode)
return nil, errors.New("Pod image type is invalid")
}
decodedImg, err := imaging.Decode(response.Body)
if err != nil {
return nil, err
}
resizedImg := imaging.Resize(decodedImg, 1024, 0, imaging.Lanczos)
imaging.Save(resizedImg, f.Name())
filePaths = append(filePaths, f.Name())
}
return filePaths, nil}
function send mail
func (s *tikiMailService) SendFile(ctx context.Context, receiver string, templateCode string, data interface{}, filePaths []string) error {
path := "/v1/emails"
fullPath := fmt.Sprintf("%s%s", s.host, path)
formValue := &bytes.Buffer{}
writer := multipart.NewWriter(formValue)
_ = writer.WriteField("template", templateCode)
_ = writer.WriteField("to", receiver)
if data != nil {
b, err := json.Marshal(data)
if err != nil {
return errors.Wrapf(err, "Cannot marshal mail data to json with object %+v", data)
}
_ = writer.WriteField("params", string(b))
}
for _, filePath := range filePaths {
part, err := writer.CreateFormFile(filePath, filepath.Base(filePath))
if err != nil {
return err
}
pipeReader, pipeWriter := io.Pipe()
go func() {
defer pipeWriter.Close()
file, err := os.Open(filePath)
if err != nil {
return
}
defer file.Close()
io.Copy(pipeWriter, file)
}()
io.Copy(part, pipeReader)
}
err := writer.Close()
if err != nil {
return err
}
request, err := http.NewRequest("POST", fullPath, formValue)
if err != nil {
return err
}
request.Header.Set("Content-Type", writer.FormDataContentType())
resp, err := s.doer.Do(request)
if err != nil {
return errors.Wrap(err, "Cannot send request to send email")
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return errors.New(fmt.Sprintf("Send email with code %s error: status code %d, response %s",
templateCode, resp.StatusCode, string(b)))
} else {
logrus.Infof("Send email with attachment ,code %s success with response %s , box-code", templateCode, string(b),filePaths)
}
return nil
}
Thank
My team found my problem when I redeploy k8s pods, which lead to conflict leader partition causing rebalance. It will try to process the remaining messages in buffer of pods again.
Solution: I don't fetch many messages saved in buffer , I just get a message and process it by config :
ChannelBufferSize = 0
Example conflict leader parition:
consumer A and B startup in the same time
consumer A registers itself as leader, and owns the topic with all partitions
consumer B registers itself as leader, and then begins to rebalance and owns all partitions
consumer A rebalance and obtains all partitions, but can not consume because the memberId is old and need a new one
consumer B rebalance again and owns the topic with all partitions, but it's already obtained by consumer A
My two cents: in case of very big attachments, the consumer takes quite a lot of time to read the file and to send it as an attachment.
This increases the amount of time between two poll() calls. If that time is greater than max.poll.interval.ms, the consumer is thought to be failed and the partition offset is not committed. As a result, the message is processed again and eventually, if by chance the execution time stays below the poll interval, the offset is committed. The effect is a multiple email send.
Try increasing the max.poll.interval.ms on the consumer side.

Golang bufio from websocket breaking after first read

I am trying to stream JSON text from a websocket. However after an initial read I noticed that the stream seems to break/disconnect. This is from a Pleroma server (think: Mastodon). I am using the default Golang websocket library.
package main
import (
"bufio"
"fmt"
"log"
"golang.org/x/net/websocket"
)
func main() {
origin := "https://poa.st/"
url := "wss://poa.st/api/v1/streaming/?stream=public"
ws, err := websocket.Dial(url, "", origin)
if err != nil {
log.Fatal(err)
}
s := bufio.NewScanner(ws)
for s.Scan() {
line := s.Text()
fmt.Println(line)
}
}
After the initial JSON text response, the for-loop breaks. I would expect it to send a new message every few seconds.
What might be causing this? I am willing to switch to the Gorilla websocket library if I can use it with bufio.
Thanks!
Although x/net/websocket connection has a Read method with the same signature as the Read method in io.Reader, the connection does not work like an io.Reader. The connection will not work as you expect when wrapped with a bufio.Scanner.
The poa.st endpoint sends a stream of messages where each message is a JSON document. Use the following code to read the messages using the Gorilla package:
url := "wss://poa.st/api/v1/streaming/?stream=public"
ws, _, err := websocket.DefaultDialer.Dial(url, nil)
if err != nil {
log.Fatal(err)
}
defer ws.Close()
for {
_, p, err := ws.ReadMessage()
if err != nil {
log.Fatal(err)
}
// p is a []byte containing the JSON document.
fmt.Printf("%s\n", p)
}
The Gorilla package has a helper method for decoding JSON messages. Here's an example of how to use that method.
url := "wss://poa.st/api/v1/streaming/?stream=public"
ws, _, err := websocket.DefaultDialer.Dial(url, nil)
if err != nil {
log.Fatal(err)
}
defer ws.Close()
for {
// The JSON documents are objects containing two fields,
// the event type and the payload. The payload is a JSON
// document itself.
var e struct {
Event string
Payload string
}
err := ws.ReadJSON(&e)
if err != nil {
log.Fatal(err)
}
// TODO: decode e.Payload based on e.Event
}

How to use self-describing message for protbuf

One of the use cases I'm working on while using protocol buffers is to deserialize the Protocol Buffers Kafka messages which I receive at the consumer end (using sarama library and Go).
The way how i'm doing currently is i defined the sample pixel.proto file as show below.
syntax = "proto3";
package saramaprotobuf;
message Pixel {
// Session identifier stuff
string session_id = 2;
}
i'm sending the message through sarama.Producer(by marshalling it) receiving it sarama.Consumer (unmarshalling message it by referencing with complied pixel.proto.pb). Code is as below.
import (
"github.com/Shopify/sarama"
"github.com/golang/protobuf/proto"
"log"
"os"
"os/signal"
"protobuftest/example"
"syscall"
"time"
)
func main() {
topic := "test_topic"
brokerList := []string{"localhost:9092"}
producer, err := newSyncProducer(brokerList)
if err != nil {
log.Fatalln("Failed to start Sarama producer:", err)
}
go func() {
ticker := time.NewTicker(time.Second)
for {
select {
case t := <-ticker.C:
elliot := &example.Pixel{
SessionId: t.String(),
}
pixelToSend := elliot
pixelToSendBytes, err := proto.Marshal(pixelToSend)
if err != nil {
log.Fatalln("Failed to marshal example:", err)
}
msg := &sarama.ProducerMessage{
Topic: topic,
Value: sarama.ByteEncoder(pixelToSendBytes),
}
producer.SendMessage(msg)
log.Printf("Pixel sent: %s", pixelToSend)
}
}
}()
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
partitionConsumer, err := newPartitionConsumer(brokerList, topic)
if err != nil {
log.Fatalln("Failed to create Sarama partition consumer:", err)
}
log.Println("Waiting for messages...")
for {
select {
case msg := <-partitionConsumer.Messages():
receivedPixel := &example.Pixel{}
err := proto.Unmarshal(msg.Value, receivedPixel)
if err != nil {
log.Fatalln("Failed to unmarshal example:", err)
}
log.Printf("Pixel received: %s", receivedPixel)
case <-signals:
log.Print("Received termination signal. Exiting.")
return
}
}
}
func newSyncProducer(brokerList []string) (sarama.SyncProducer, error) {
config := sarama.NewConfig()
config.Producer.RequiredAcks = sarama.WaitForAll
config.Producer.Retry.Max = 5
config.Producer.Return.Successes = true
// TODO configure producer
producer, err := sarama.NewSyncProducer(brokerList, config)
if err != nil {
return nil, err
}
return producer, nil
}
func newPartitionConsumer(brokerList []string, topic string) (sarama.PartitionConsumer, error) {
conf := sarama.NewConfig()
// TODO configure consumer
consumer, err := sarama.NewConsumer(brokerList, conf)
if err != nil {
return nil, err
}
partitionConsumer, err := consumer.ConsumePartition(topic, 0, sarama.OffsetOldest)
if err != nil {
return nil, err
}
return partitionConsumer, err
}
In the code as you can see I have imported the .proto file and referencing it in the main function inorder to send and receive the message. The problem here is, the solution is not generic. I will receive the message of different .proto type at the consumer end.
How can I make it generic? I know there is something called as self-describing message(dynamic message) as the part of protobuf. I referred this link https://developers.google.com/protocol-buffers/docs/techniques?csw=1#self-description . But it doesn't has any explaination on how to embed this as the part of pixel.proto(example which i have used) so that at the consumer end i came directly deserialize it to required type.
You would define a generic container message type that would include a DescriptorSet and an Any fields.
When sending, you build an instance of that generic message type, setting the field of type Any with an instance of your Pixel message and setting the DescriptorSet field with the DescriptorSet of the Pixel type.
That would allow the receiver of such message to parse the Any contents using the DescriptorSet you are attaching. In practical terms, this is sending a piece of proto definition together with the message. So receivers wouldn't need pre-shared proto definitions or generated code.
Having said that, I'm not sure this is what you really want because if you are planning to share proto definitions or generated code with clients then I'd suggest simply using a oneof field in a container type would be much simpler to use.

Unable to consume messages from locally running Kafka server, using Golang Sarama Package

I am making a simple Telegram bot that would read messages from a local Kafka server and print it out to a chat.
Both zookeeper and kafka server config files are at their defaults. Console consumer works. The problem rises when I try to consume messages from code using Golang Sarama package. Before I added these lines:
case err := <-pc.Errors():
log.Panic(err)
the program only printed the messages once, after which it would stall.
Now it panics prinitng this to the log:
kafka: error while consuming test1/0: kafka: broker not connected
Here's the code:
type kafkaResponse struct {
telega *tgbotapi.Message
message []byte
}
type kafkaRequest struct {
telega *tgbotapi.Message
topic string
}
var kafkaBrokers = []string{"localhost:9092"}
func main() {
//channels for request response
var reqChan = make(chan kafkaRequest)
var respChan = make(chan kafkaResponse)
//starting kafka client routine to listen to topic channnel
go consumer(reqChan, respChan, kafkaBrokers)
//bot thingy here
bot, err := tgbotapi.NewBotAPI(token)
if err != nil {
log.Panic(err)
}
bot.Debug = true
log.Printf("Authorized on account %s", bot.Self.UserName)
u := tgbotapi.NewUpdate(0)
u.Timeout = 60
updates, err := bot.GetUpdatesChan(u)
for {
select {
case update := <-updates:
if update.Message == nil {
continue
}
switch update.Message.Text {
case "Topic: test1":
topic := "test1"
reqChan <- kafkaRequest{update.Message, topic}
}
case response := <-respChan:
bot.Send(tgbotapi.NewMessage(response.telega.Chat.ID, string(response.message)))
}
}
here's the consumer.go:
func consumer(reqChan chan kafkaRequest, respChan chan kafkaResponse, brokers []string) {
config := sarama.NewConfig()
config.Consumer.Return.Errors = true
// Create new consumer
consumer, err := sarama.NewConsumer(brokers, config)
if err != nil {
panic(err)
}
defer func() {
if err := consumer.Close(); err != nil {
panic(err)
}
}()
select {
case request := <-reqChan:
//get all partitions on the given topic
partitionList, err := consumer.Partitions(request.topic)
if err != nil {
fmt.Println("Error retrieving partitionList ", err)
}
initialOffset := sarama.OffsetOldest
for _, partition := range partitionList {
pc, _ := consumer.ConsumePartition(request.topic, partition, initialOffset)
go func(pc sarama.PartitionConsumer) {
for {
select {
case message := <-pc.Messages():
respChan <- kafkaResponse{request.telega, message.Value}
case err := <-pc.Errors():
log.Panic(err)
}
}
}(pc)
}
}
}
You are closing your consumer after setting up all the PartitionConsumers in the code
defer func() {
if err := consumer.Close(); err != nil {
panic(err)
}
}()
However, the documentation specifies that you should only close the consumer after all the PartitionConsumers have been closed.
// Close shuts down the consumer. It must be called after all child
// PartitionConsumers have already been closed.
Close() error
I would recommend you add a sync.WaitGroup to the function go func(pc sarama.PartitionConsumer) {

Resources