My ampq system seems loosing messages, so I'd like a way to see if messages are effectively queued before being consumed.
I have several MicroServices communicating by amqp messages on NodeJs, using CloudAmqp. One of this microservice MS[B] generates .pdf, the process it's pretty heavy and requires about a minute for each request. So I send the .pdf asyncronously, triggering a webhook once finished, and generate once per time using a PreFetch = 1
So one MS[A] collects all the requests from the user, answers back to them saying "ok, request received, listen on the webhook" and in parallel it asks to the MS[B] to generate pdfs. MS[B] has prefetch=1, so consumes just one request per time. Once finished, sends the response to the callback queue of MS[A], which triggers the user webhook saying "the pdf, it's ready".
The problem is that MS[B] misses all the messages while busy:
it consumes one request from MS[A]
starts generating the .pdf
while generating, it discards all the other messages that MS[A] sends, as if there would be not any queue
it finishes the .pdf, sending ACK to MS[A]
then it starts again accepting messages, taking the last one received after being idle, losing all the previous ones.
Why? How can I find the problem, what could I monitor?
Communications between other MSs works well, with messages correctly ordered in queues. Just this one, with prefetch=1, loses messages.
I am NOT using the NO-ACK rule. I don't know what try, what test and what monitor to find the problem.
How can I see (if) messages are correctly queued before being consumed, ora just lost?
Below, the implementation of the messaging system
Channel Creation
/*
Starting Point of a connection to the CLOUDAMQP_URL server, then exec the callback
*/
start(callback) {
var self = this;
// if the connection is closed or fails to be established at all, we will reconnect
amqp.connect(process.env.CLOUDAMQP_URL + "?heartbeat=60")
.then(
function (conn) {
// create queues and consume mechanism
self.amqpConn = conn;
setTimeout(() => {
startPublisher();
}, 200);
setTimeout(() => {
createCallbackQueue();
}, 1000);
setTimeout(() => {
callback();
}, 2000);
});
// create publisher channels
function startPublisher() {
self.amqpConn.createConfirmChannel()
.then(function (ch) {
self.pubChannel = ch;
logger.debug("🗣️ pubChannel ready");
while (true) {
var m = self.offlinePubQueue.shift();
if (!m) break;
self.publish(m[0], // exchange
m[1], // routingKey
m[2], // content,
undefined // correlationId
);
}
});
}
// create callback channel
function createCallbackQueue() {
self.amqpConn.createChannel()
.then(function (channel) {
channel.assertQueue(self.CALLBACK_QUEUE_NAME, {
durable: true,
exclusive: true, // callback are exclusive
})
.then(function (q) {
logger.debug(" 👂 Waiting for RPC RESP in " + self.CALLBACK_QUEUE_NAME);
channel.consume(q.queue,
processCallback, {
noAck: false
}
);
});
// process messages of the callback
function processCallback(msg) {
var correlationId = msg.properties.correlationId;
}
//callback received
if (self.correlationIds_map[correlationId]) {
delete self.correlationIds_map[correlationId];
var content = JSON.parse(msg.content.toString());
self.eventEmitter.emit(correlationId, content);
}
}
});
}
return deferred.promise;
}
Consuming Messages
/*
#worker_queue - the name of the queue
*/
// Consume message from 'worker_queue', A worker that acks messages only if processed succesfully
startWorker(worker_queue, routes) {
var self = this;
logger.debug("startWorker " + self.CALLBACK_QUEUE_NAME);
var channel;
worker_queue = self.MICROSERVICE_NAME + worker_queue;
self.amqpConn.createChannel()
.then(
function (ch) {
channel = ch;
ch.prefetch(self.opt.prefetch); // = 1 for MS[B] generating pdf
channel.assertQueue(worker_queue, {
durable: true,
exclusive: true
})
.then(function (q) {
channel.consume(worker_queue, processMsg, {
noAck: false
});
});
});
// call the 'function from interface' passing params, and send the ACK
function processMsg(msg) {
work(msg)
.then(function (data) {
channel.ack(msg, false); // allUpTo = false
})
.catch(function (err) {
channel.ack(msg, false);
// channel.reject(msg, false); // requeue = false
// this.closeOnErr(e);
});
}
// execute the command, and queue back a response, checking if it's an error or not
function work(msg) {
var deferred = Q.defer();
var correlationId;
try {
correlationId = msg.properties.correlationId;
} catch (err) {}
work_function(msg.content, correlationId)
.then(function (resp) {
var content = {
data: resp
};
content = Buffer.from(JSON.stringify(content));
channel.sendToQueue(msg.properties.replyTo,
content, {
correlationId: correlationId,
content_type: 'application/json'
}
);
deferred.resolve(resp);
});
return deferred.promise;
}
}
Publish Messages
publish(exchange, routingKey, content, correlationId) {
var self = this;
var deferred = Q.defer();
self.correlationIds_map[correlationId] = true;
self.pubChannel.publish(exchange, routingKey, content,
{
replyTo: self.CALLBACK_QUEUE_NAME,
content_type : 'application/json',
correlationId: correlationId,
persistent : true
},
function(err, ok) {
if (err)
{
self.offlinePubQueue.push([exchange, routingKey, content]); // try again
self.pubChannel.connection.close();
deferred.resolve('requeued');
}
else
{
deferred.resolve(ok);
}
});
return deferred.promise;
}
Related
This is on client side
socket.on('connect', () => {
console.log('client connect', socket.id);
const token = getToken();
socket.emit('token', token);
});
socket.on('message', data => {
....
//handle message
});
This is on server side
io.on('connection', (client) => {
client.on('token', token => {
verifyToken(token)
.then(({ _id: clientId }) => {
if (!clientId) return;
if (!connections[clientId]) {
connections[clientId] = new Map();
}
connections[clientId].set(client, 1);
client.on('disconnect', () => {
connections[clientId].delete(client);
});
});
});
});
}
async sendMessageToClients (workspaceId, message) {
const workspace = await getWorkspaceQuery(workspaceId);
if (!workspace) return;
const workspaceMembers = workspace.members.map(({ user }) => user);
for (const memberId of workspaceMembers) {
if (connections[memberId]) {
for (const clientConnection of connections[memberId].keys()) {
console.log('send to client', memberId, message.content, clientConnection.connected, clientConnection.id);
clientConnection.emit('message', message);
}
}
}
};
}
I purposely make a client offline by disconnect the wifi connection (make it in offline mode), what happen is that
a. if the disconnection is short, socket.id stay the same and I can get the buffered message send by other client when comes online;
b. but if I the disconnection is longer, the socket.id will change, and I can't get the buffered message send by other client when comes online.
How should I address that?
Since according to here the messages should be ideally buffered after reconnection.
I have a websocket in api gateway connected to a lambda that looks like this:
const AWS = require('aws-sdk');
const amqp = require('amqplib');
const api = new AWS.ApiGatewayManagementApi({
endpoint: 'MY_ENDPOINT',
});
async function sendMsgToApp(response, connectionId) {
console.log('=========== posting reply');
const params = {
ConnectionId: connectionId,
Data: Buffer.from(response),
};
return api.postToConnection(params).promise();
}
let rmqServerUrl =
'MY_RMQ_SERVER_URL';
let rmqServerConn = null;
exports.handler = async event => {
console.log('websocket event:', event);
const { routeKey: route, connectionId } = event.requestContext;
switch (route) {
case '$connect':
console.log('user connected');
const creds = event.queryStringParameters.x;
console.log('============ x.length:', creds.length);
const decodedCreds = Buffer.from(creds, 'base64').toString('utf-8');
try {
const conn = await amqp.connect(
`amqps://${decodedCreds}#${rmqServerUrl}`
);
const channel = await conn.createChannel();
console.log('============ created channel successfully:');
rmqServerConn = conn;
const [userId] = decodedCreds.split(':');
const { queue } = await channel.assertQueue(userId, {
durable: true,
autoDelete: false,
});
console.log('============ userId:', userId, 'queue:', queue);
channel.consume(queue, msg => {
console.log('========== msg:', msg);
const { content } = msg;
const msgString = content.toString('utf-8');
console.log('========== msgString:', msgString);
sendMsgToApp(msgString, connectionId)
.then(res => {
console.log(
'================= sent queued message to the app, will ack, outcome:',
res
);
try {
channel.ack(msg);
} catch (e) {
console.log(
'================= error acking message:',
e
);
}
})
.catch(e => {
console.log(
'================= error sending queued message to the app, will not ack, error:',
e
);
});
});
} catch (e) {
console.log(
'=========== error initializing amqp connection',
e
);
if (rmqServerConn) {
await rmqServerConn.close();
}
const response = {
statusCode: 401,
body: JSON.stringify('failed auth!'),
};
return response;
}
break;
case '$disconnect':
console.log('user disconnected');
if (rmqServerConn) {
await rmqServerConn.close();
}
break;
case 'message':
console.log('message route');
await sendMsgToApp('test', connectionId);
break;
default:
console.log('unknown route', route);
break;
}
const response = {
statusCode: 200,
body: JSON.stringify('Hello from websocket Lambda!'),
};
return response;
};
The amqp connection is for a rabbitmq server that's provisioned by amazonmq. The problem I have is that messages published to the queue either do not show up at all in the .consume callback, or they only show up after the websocket is disconnected and reconnected. Essentially they're missing until a point much later after which they show up unexpectedly. That's within the websocket. Even when they do show up, they don't get sent to the client (app in this case) that's connected to the websocket. What could be the problem here?
The problem here is that I had the wrong idea about how API Gateway's websockets work. API gateway maintains the websocket connection but not the lambda itself. I put my .consume subscription logic inside the lambda, which doesn't work because the lambda runs and terminates instead of being kept alive. A better method would be to make the queue an event source for the lambda. However this also didn't work for me because it requires you to know your queues when setting up the lambda. My queues are dynamically created so that violated the requirement. I ended up standing up a rmq server on a vps.
I have succesfully implemented this mechanism in my application:
https://vividcode.io/Spring-5-WebFlux-with-Server-Sent-Events/
I can receive events with curl every second, as shown in the example.
My problem is: I cannot receive these events in Angular 5. I have tried many things. Currently my service code looks like this:
public getMigrationProgress(processName: string): Observable<any> {
let headers: HttpHeaders = new HttpHeaders();
headers = headers.append('X-Authorization', this._sessionService.getAuthToken());
headers = headers.append('accept', 'text/event-stream');
let url = config.restApi.url + this.getResource() + '/' + processName;
return Observable.create(observer => {
let eventSource = new EventSourcePolyfill(url, { headers: headers });
eventSource.onmessage = (event => {
observer.next(event);
this.zone.run(() => {
console.log('prpprpr');
});
});
eventSource.onopen = (event) => {
observer.next(event);
};
eventSource.onerror = (error) => {
if (eventSource.readyState === 0) {
console.log('The stream has been closed by the server.');
eventSource.close();
observer.complete();
} else {
observer.error('EventSource error: ' + error);
}
};
});
}
It only opens connection, does not receive events (Method onopen works once, onmessage - never). Server sends them though.
Any ideas how to fix this?
Turned out that if you set event name on server, you cannot receive it by onmessage method.
In the example the event name was set to "random". In order to receive it you have to do it like this:
eventSource.addEventListener('random', function (event) {
console.log(event);
});
So I have implemented a an email system like the one here : https://cloudonaut.io/integrate-sqs-and-lambda-serverless-architecture-for-asynchronous-workloads/
Flow is as follows
http request to end an email -> api gateway -> HttpRequestLambda -> SQS <-> SQSMessageConsumerLambda (scheduled) -> MessageWorkerLambda (sends email via email service provider)
My SQSMessageConsumerLambda is scheduled to run every minute
I changed the SQS consumer to recursively call itself when the timeout is getting near rather than just ending. Doing this means that SQS queue has a better chance of not piling up with too many messages.
This seems to work great so far, but I have a couple quesitons:
1.if the function timesout, those messages that were read from the queue are probably still within their visibility timeout period, thus invoking the lambda recursively means that they cant actually be re-read from the queue until their visibilty timeout expires which is probably not likely to be the case immediately after the recursive call. So would it be an idea to pass these messages into the recursive call itself? and then somehow check for these 'passed in messages' at the beginning of the consumer lambda and send them directly to workers in that case ?
2.SQSMessageConsumerLambda is still a bit of a bottleneck isn't it? as it takes about 40-50 ms to invoke the MessageWorkerLambda for each message it wants to delegate. Or, does the 'async.parallel' mitigate this ?
3.Would it be better if we could somehow elastically increase the number of SQSMessageConsumerLambda based on some CloudWatch alarms , i.e. alarms that check if there are more than X amount of messages on the queue for X minutes ?
var AWS = require('aws-sdk');
var sqs = new AWS.SQS();
var async = require("async");
var lambda = new AWS.Lambda();
var QUEUE_URL = `https://sqs.${process.env.REGION}.amazonaws.com/${process.env.ACCOUNT_ID}/${process.env.STAGE}-emailtaskqueue`;
var EMAIL_WORKER = `${process.env.SERVICE}-${process.env.STAGE}-emailWorker`
var THIS_LAMBDA = `${process.env.SERVICE}-${process.env.STAGE}-emailTaskConsumer`
function receiveMessages(callback) {
var numMessagesToRead = 10;
//console.log('in receiveMessages, about to read ',numMessagesToRead);
//WaitTimeSeconds : The duration (in seconds) for which the call waits for a message to arrive in the queue before returning
var params = {
QueueUrl: QUEUE_URL,
MaxNumberOfMessages: numMessagesToRead,
WaitTimeSeconds: 20
};
sqs.receiveMessage(params, function(err, data) {
if (err) {
console.error(err, err.stack);
callback(err);
} else {
if (data.Messages && data.Messages.length > 0) {
console.log('Got ',data.Messages.length, ' messages off the queue' );
}else{
console.log('Got no messages from queue');
}
callback(null, data.Messages);
}
});
}
function invokeWorkerLambda(task, callback) {
console.log('Need to invoke worker for this task..',task);
//task.Body is a json string
var payload = {
"ReceiptHandle" : task.ReceiptHandle,
"body" : JSON.parse(task.Body)
};
console.log('payload:',payload);
//using 'Event' means use async (http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Lambda.html#invoke-property)
//TODO need variable here
var params = {
FunctionName: EMAIL_WORKER,
InvocationType: 'Event',
Payload: JSON.stringify(payload)
};
var millis = Date.now();
lambda.invoke(params, function(err, data) {
millis = Date.now() - millis;
console.log('took ', millis, ' to invoke ', EMAIL_WORKER, ' asynchronously');
if (err) {
console.error(err, err.stack);
callback(err);
} else {
callback(null, data)
}
});
}
function handleSQSMessages(context, callback) {
//console.log('in handleSQSMessages');
receiveMessages(function(err, messages) {
if (messages && messages.length > 0) {
var invocations = [];
messages.forEach(function(message) {
invocations.push(function(callback) {
invokeWorkerLambda(message, callback)
});
});
async.parallel(invocations, function(err) {
if (err) {
console.error(err, err.stack);
callback(err);
} else {
if (context.getRemainingTimeInMillis() > 20000) {
console.log('there is more time to read more messages for this run of the cron')
handleSQSMessages(context, callback);
} else {
console.log('remaining time in millis:',context.getRemainingTimeInMillis(),' No more time here, invoking this lambda again')
lambda.invoke({FunctionName: THIS_LAMBDA, InvocationType: 'Event',Payload: '{"recursiveMarker":true}' }, function(err, data) {
if (err) {
console.error(err, err.stack);
callback(err);
} else {
console.log('data from the invocation:', data);
callback(null, 'Lambda was just called recursively');
}
});
}
}
});
} else {
callback(null, "DONE");
}
});
}
module.exports.emailTaskConsumer = (event, context, callback) => {
console.log('in an emailTaskConsumer. Was this a recursive call ?', event);
handleSQSMessages(context, callback);
}
1) The visibility timeout is a great feature of SQS allowing you to build resilient systems. Could not find a reason to try to handle failures on your own.
2) You could batch all messages read from the queue to the Worker Lambda at process them at once.
3) You could add additional CloudWatch event rules triggering the Consumer Lambda to increase the read througput.
Use SNS to trigger the Lambda. This is the correct way of working with Lambda functions. Your HttpRequestLambda would fire a SNS notification and another Lambda function is immediately triggered to response to that event. Actually, if you are not doing nothing else in HttpRequestLambda, you can also replace it with AWS API proxy. Here you can see full tutorial about exposing the SNS API via API Gateway.
In the socket.io acknowledgement example we see a client's send/emit being called back with the server's response. Is the same functionality available in the reverse direction - i.e. how does the server confirm client reception for a send/emit from the server? It would be nice to have a send/emit callback even just to indicate reception success. Didn't see this functionality documented anywhere...
Thanks!
Looking in the socket.io source I found that indeed ACKs are supported for server-sent messages (but not in broadcasts!) (lines 115-123 of socket.io/lib/socket.js):
if ('function' == typeof args[args.length - 1]) {
if (this._rooms || (this.flags && this.flags.broadcast)) {
throw new Error('Callbacks are not supported when broadcasting');
}
debug('emitting packet with ack id %d', this.nsp.ids);
this.acks[this.nsp.ids] = args.pop();
packet.id = this.nsp.ids++;
}
An example of how the ack should work (not tested):
// server-side:
io.on('msg', (data, ackCallback) => {
console.log('data from client', data);
ackCallback('roger roger');
});
// client-side:
socket.emit('msg', someData, (answer) => {
console.log('server\'s acknowledgement:', answer);
});
If we want to be 100% sure the reception success, just add ack call is not enough because we also need to know whether the ack call is run or not.
The socket.io 3.0 document add this timeout example to show how to do that. But the timeout value is the tricky one.
const withTimeout = (onSuccess, onTimeout, timeout) => {
let called = false;
const timer = setTimeout(() => {
if (called) return;
called = true;
onTimeout();
}, timeout);
return (...args) => {
if (called) return;
called = true;
clearTimeout(timer);
onSuccess.apply(this, args);
}
}
socket.emit("hello", 1, 2, withTimeout(() => {
console.log("success!");
}, () => {
console.log("timeout!");
}, 1000));
Yes we can send a response back to the server from the Client(as acknowledgment )
According to new documentation of socket(4.x)
Server-side
let dataToSend={
test:"fromServer"
}
socket.timeout(5000).emit("my-event",dataToSend, (err, response) => {
if (err) {
// the other side did not acknowledge the event in the given delay
} else {
console.log(response);
}
});
Cleint- side
socket.on("my-event", (data, callback) => {
// any logic for data(that data come from server i.e { test:"fromServer" }
callback({ test: "test" });
});