Lambda that reads from SQS queue - bottleneck? - aws-lambda

So I have implemented a an email system like the one here : https://cloudonaut.io/integrate-sqs-and-lambda-serverless-architecture-for-asynchronous-workloads/
Flow is as follows
http request to end an email -> api gateway -> HttpRequestLambda -> SQS <-> SQSMessageConsumerLambda (scheduled) -> MessageWorkerLambda (sends email via email service provider)
My SQSMessageConsumerLambda is scheduled to run every minute
I changed the SQS consumer to recursively call itself when the timeout is getting near rather than just ending. Doing this means that SQS queue has a better chance of not piling up with too many messages.
This seems to work great so far, but I have a couple quesitons:
1.if the function timesout, those messages that were read from the queue are probably still within their visibility timeout period, thus invoking the lambda recursively means that they cant actually be re-read from the queue until their visibilty timeout expires which is probably not likely to be the case immediately after the recursive call. So would it be an idea to pass these messages into the recursive call itself? and then somehow check for these 'passed in messages' at the beginning of the consumer lambda and send them directly to workers in that case ?
2.SQSMessageConsumerLambda is still a bit of a bottleneck isn't it? as it takes about 40-50 ms to invoke the MessageWorkerLambda for each message it wants to delegate. Or, does the 'async.parallel' mitigate this ?
3.Would it be better if we could somehow elastically increase the number of SQSMessageConsumerLambda based on some CloudWatch alarms , i.e. alarms that check if there are more than X amount of messages on the queue for X minutes ?
var AWS = require('aws-sdk');
var sqs = new AWS.SQS();
var async = require("async");
var lambda = new AWS.Lambda();
var QUEUE_URL = `https://sqs.${process.env.REGION}.amazonaws.com/${process.env.ACCOUNT_ID}/${process.env.STAGE}-emailtaskqueue`;
var EMAIL_WORKER = `${process.env.SERVICE}-${process.env.STAGE}-emailWorker`
var THIS_LAMBDA = `${process.env.SERVICE}-${process.env.STAGE}-emailTaskConsumer`
function receiveMessages(callback) {
var numMessagesToRead = 10;
//console.log('in receiveMessages, about to read ',numMessagesToRead);
//WaitTimeSeconds : The duration (in seconds) for which the call waits for a message to arrive in the queue before returning
var params = {
QueueUrl: QUEUE_URL,
MaxNumberOfMessages: numMessagesToRead,
WaitTimeSeconds: 20
};
sqs.receiveMessage(params, function(err, data) {
if (err) {
console.error(err, err.stack);
callback(err);
} else {
if (data.Messages && data.Messages.length > 0) {
console.log('Got ',data.Messages.length, ' messages off the queue' );
}else{
console.log('Got no messages from queue');
}
callback(null, data.Messages);
}
});
}
function invokeWorkerLambda(task, callback) {
console.log('Need to invoke worker for this task..',task);
//task.Body is a json string
var payload = {
"ReceiptHandle" : task.ReceiptHandle,
"body" : JSON.parse(task.Body)
};
console.log('payload:',payload);
//using 'Event' means use async (http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Lambda.html#invoke-property)
//TODO need variable here
var params = {
FunctionName: EMAIL_WORKER,
InvocationType: 'Event',
Payload: JSON.stringify(payload)
};
var millis = Date.now();
lambda.invoke(params, function(err, data) {
millis = Date.now() - millis;
console.log('took ', millis, ' to invoke ', EMAIL_WORKER, ' asynchronously');
if (err) {
console.error(err, err.stack);
callback(err);
} else {
callback(null, data)
}
});
}
function handleSQSMessages(context, callback) {
//console.log('in handleSQSMessages');
receiveMessages(function(err, messages) {
if (messages && messages.length > 0) {
var invocations = [];
messages.forEach(function(message) {
invocations.push(function(callback) {
invokeWorkerLambda(message, callback)
});
});
async.parallel(invocations, function(err) {
if (err) {
console.error(err, err.stack);
callback(err);
} else {
if (context.getRemainingTimeInMillis() > 20000) {
console.log('there is more time to read more messages for this run of the cron')
handleSQSMessages(context, callback);
} else {
console.log('remaining time in millis:',context.getRemainingTimeInMillis(),' No more time here, invoking this lambda again')
lambda.invoke({FunctionName: THIS_LAMBDA, InvocationType: 'Event',Payload: '{"recursiveMarker":true}' }, function(err, data) {
if (err) {
console.error(err, err.stack);
callback(err);
} else {
console.log('data from the invocation:', data);
callback(null, 'Lambda was just called recursively');
}
});
}
}
});
} else {
callback(null, "DONE");
}
});
}
module.exports.emailTaskConsumer = (event, context, callback) => {
console.log('in an emailTaskConsumer. Was this a recursive call ?', event);
handleSQSMessages(context, callback);
}

1) The visibility timeout is a great feature of SQS allowing you to build resilient systems. Could not find a reason to try to handle failures on your own.
2) You could batch all messages read from the queue to the Worker Lambda at process them at once.
3) You could add additional CloudWatch event rules triggering the Consumer Lambda to increase the read througput.

Use SNS to trigger the Lambda. This is the correct way of working with Lambda functions. Your HttpRequestLambda would fire a SNS notification and another Lambda function is immediately triggered to response to that event. Actually, if you are not doing nothing else in HttpRequestLambda, you can also replace it with AWS API proxy. Here you can see full tutorial about exposing the SNS API via API Gateway.

Related

Debug queue AMQP that loses messages using low prefetch

My ampq system seems loosing messages, so I'd like a way to see if messages are effectively queued before being consumed.
I have several MicroServices communicating by amqp messages on NodeJs, using CloudAmqp. One of this microservice MS[B] generates .pdf, the process it's pretty heavy and requires about a minute for each request. So I send the .pdf asyncronously, triggering a webhook once finished, and generate once per time using a PreFetch = 1
So one MS[A] collects all the requests from the user, answers back to them saying "ok, request received, listen on the webhook" and in parallel it asks to the MS[B] to generate pdfs. MS[B] has prefetch=1, so consumes just one request per time. Once finished, sends the response to the callback queue of MS[A], which triggers the user webhook saying "the pdf, it's ready".
The problem is that MS[B] misses all the messages while busy:
it consumes one request from MS[A]
starts generating the .pdf
while generating, it discards all the other messages that MS[A] sends, as if there would be not any queue
it finishes the .pdf, sending ACK to MS[A]
then it starts again accepting messages, taking the last one received after being idle, losing all the previous ones.
Why? How can I find the problem, what could I monitor?
Communications between other MSs works well, with messages correctly ordered in queues. Just this one, with prefetch=1, loses messages.
I am NOT using the NO-ACK rule. I don't know what try, what test and what monitor to find the problem.
How can I see (if) messages are correctly queued before being consumed, ora just lost?
Below, the implementation of the messaging system
Channel Creation
/*
Starting Point of a connection to the CLOUDAMQP_URL server, then exec the callback
*/
start(callback) {
var self = this;
// if the connection is closed or fails to be established at all, we will reconnect
amqp.connect(process.env.CLOUDAMQP_URL + "?heartbeat=60")
.then(
function (conn) {
// create queues and consume mechanism
self.amqpConn = conn;
setTimeout(() => {
startPublisher();
}, 200);
setTimeout(() => {
createCallbackQueue();
}, 1000);
setTimeout(() => {
callback();
}, 2000);
});
// create publisher channels
function startPublisher() {
self.amqpConn.createConfirmChannel()
.then(function (ch) {
self.pubChannel = ch;
logger.debug("🗣️ pubChannel ready");
while (true) {
var m = self.offlinePubQueue.shift();
if (!m) break;
self.publish(m[0], // exchange
m[1], // routingKey
m[2], // content,
undefined // correlationId
);
}
});
}
// create callback channel
function createCallbackQueue() {
self.amqpConn.createChannel()
.then(function (channel) {
channel.assertQueue(self.CALLBACK_QUEUE_NAME, {
durable: true,
exclusive: true, // callback are exclusive
})
.then(function (q) {
logger.debug(" 👂 Waiting for RPC RESP in " + self.CALLBACK_QUEUE_NAME);
channel.consume(q.queue,
processCallback, {
noAck: false
}
);
});
// process messages of the callback
function processCallback(msg) {
var correlationId = msg.properties.correlationId;
}
//callback received
if (self.correlationIds_map[correlationId]) {
delete self.correlationIds_map[correlationId];
var content = JSON.parse(msg.content.toString());
self.eventEmitter.emit(correlationId, content);
}
}
});
}
return deferred.promise;
}
Consuming Messages
/*
#worker_queue - the name of the queue
*/
// Consume message from 'worker_queue', A worker that acks messages only if processed succesfully
startWorker(worker_queue, routes) {
var self = this;
logger.debug("startWorker " + self.CALLBACK_QUEUE_NAME);
var channel;
worker_queue = self.MICROSERVICE_NAME + worker_queue;
self.amqpConn.createChannel()
.then(
function (ch) {
channel = ch;
ch.prefetch(self.opt.prefetch); // = 1 for MS[B] generating pdf
channel.assertQueue(worker_queue, {
durable: true,
exclusive: true
})
.then(function (q) {
channel.consume(worker_queue, processMsg, {
noAck: false
});
});
});
// call the 'function from interface' passing params, and send the ACK
function processMsg(msg) {
work(msg)
.then(function (data) {
channel.ack(msg, false); // allUpTo = false
})
.catch(function (err) {
channel.ack(msg, false);
// channel.reject(msg, false); // requeue = false
// this.closeOnErr(e);
});
}
// execute the command, and queue back a response, checking if it's an error or not
function work(msg) {
var deferred = Q.defer();
var correlationId;
try {
correlationId = msg.properties.correlationId;
} catch (err) {}
work_function(msg.content, correlationId)
.then(function (resp) {
var content = {
data: resp
};
content = Buffer.from(JSON.stringify(content));
channel.sendToQueue(msg.properties.replyTo,
content, {
correlationId: correlationId,
content_type: 'application/json'
}
);
deferred.resolve(resp);
});
return deferred.promise;
}
}
Publish Messages
publish(exchange, routingKey, content, correlationId) {
var self = this;
var deferred = Q.defer();
self.correlationIds_map[correlationId] = true;
self.pubChannel.publish(exchange, routingKey, content,
{
replyTo: self.CALLBACK_QUEUE_NAME,
content_type : 'application/json',
correlationId: correlationId,
persistent : true
},
function(err, ok) {
if (err)
{
self.offlinePubQueue.push([exchange, routingKey, content]); // try again
self.pubChannel.connection.close();
deferred.resolve('requeued');
}
else
{
deferred.resolve(ok);
}
});
return deferred.promise;
}

AWS API gateway websocket receives messages inconsistently

I have a websocket in api gateway connected to a lambda that looks like this:
const AWS = require('aws-sdk');
const amqp = require('amqplib');
const api = new AWS.ApiGatewayManagementApi({
endpoint: 'MY_ENDPOINT',
});
async function sendMsgToApp(response, connectionId) {
console.log('=========== posting reply');
const params = {
ConnectionId: connectionId,
Data: Buffer.from(response),
};
return api.postToConnection(params).promise();
}
let rmqServerUrl =
'MY_RMQ_SERVER_URL';
let rmqServerConn = null;
exports.handler = async event => {
console.log('websocket event:', event);
const { routeKey: route, connectionId } = event.requestContext;
switch (route) {
case '$connect':
console.log('user connected');
const creds = event.queryStringParameters.x;
console.log('============ x.length:', creds.length);
const decodedCreds = Buffer.from(creds, 'base64').toString('utf-8');
try {
const conn = await amqp.connect(
`amqps://${decodedCreds}#${rmqServerUrl}`
);
const channel = await conn.createChannel();
console.log('============ created channel successfully:');
rmqServerConn = conn;
const [userId] = decodedCreds.split(':');
const { queue } = await channel.assertQueue(userId, {
durable: true,
autoDelete: false,
});
console.log('============ userId:', userId, 'queue:', queue);
channel.consume(queue, msg => {
console.log('========== msg:', msg);
const { content } = msg;
const msgString = content.toString('utf-8');
console.log('========== msgString:', msgString);
sendMsgToApp(msgString, connectionId)
.then(res => {
console.log(
'================= sent queued message to the app, will ack, outcome:',
res
);
try {
channel.ack(msg);
} catch (e) {
console.log(
'================= error acking message:',
e
);
}
})
.catch(e => {
console.log(
'================= error sending queued message to the app, will not ack, error:',
e
);
});
});
} catch (e) {
console.log(
'=========== error initializing amqp connection',
e
);
if (rmqServerConn) {
await rmqServerConn.close();
}
const response = {
statusCode: 401,
body: JSON.stringify('failed auth!'),
};
return response;
}
break;
case '$disconnect':
console.log('user disconnected');
if (rmqServerConn) {
await rmqServerConn.close();
}
break;
case 'message':
console.log('message route');
await sendMsgToApp('test', connectionId);
break;
default:
console.log('unknown route', route);
break;
}
const response = {
statusCode: 200,
body: JSON.stringify('Hello from websocket Lambda!'),
};
return response;
};
The amqp connection is for a rabbitmq server that's provisioned by amazonmq. The problem I have is that messages published to the queue either do not show up at all in the .consume callback, or they only show up after the websocket is disconnected and reconnected. Essentially they're missing until a point much later after which they show up unexpectedly. That's within the websocket. Even when they do show up, they don't get sent to the client (app in this case) that's connected to the websocket. What could be the problem here?
The problem here is that I had the wrong idea about how API Gateway's websockets work. API gateway maintains the websocket connection but not the lambda itself. I put my .consume subscription logic inside the lambda, which doesn't work because the lambda runs and terminates instead of being kept alive. A better method would be to make the queue an event source for the lambda. However this also didn't work for me because it requires you to know your queues when setting up the lambda. My queues are dynamically created so that violated the requirement. I ended up standing up a rmq server on a vps.

Why is my lambda taking exactly 6 seconds every time to respond?

I am using NodeJS env with serverless framework.
The service is an endpoint for a contact form submission. Code looks something like this.
I have two async calls, one is writing to dynamoDB and another is sending an Email via SES.
module.exports.blog = async (event, context, callback) => {
const data = JSON.parse(event.body);
const handler = 'AB';
const sesParams = getSesParams(handler, data);
if (typeof data.text !== 'string') {
callback(null, validationErrRes);
return;
}
try {
await logToDB(handler, data);
} catch (dbErr) {
console.error(dbErr);
callback(null, errRes(dbErr, 'Failed to log to DB'));
return;
}
try {
await SES.sendEmail(sesParams).promise();
} catch (emailErr) {
console.error(emailErr);
callback(null, errRes(emailErr, 'Failed to send mail'));
return;
}
callback(null, succsessResponse);
return;
};
The response takes exactly 6sec when the dbput and sendMail takes total of < 300ms.
PS: Running both async calls parallelly does not help much.
Try removing the callback in your function definition and the call to your callback function. Just return the successResponse. You are already an async function so do not need to use a callback. You can also just return error.
module.exports.blog = async (event, context) => {
and
return {
statusCode: 200
}
and
return validationErrRes

Can't receive mqtt message published to topic with AWS.Lambda+AWS.IOT

To Send Message I use AWS.Lambda+AWS.IOT
var AWS = require('aws-sdk');
var iotdata = new AWS.IotData({"endpoint": 'XXXXXX'});
exports.handler = function(event, context) {
var params = {
topic : "testtopic/action1",
payload : "payload",
qos: 0
};
iotdata.publish(params,
function(err, data) {
if (err) console.log(err, err.stack);
else console.log(data);
}
);
};
Some time I receive messages (with wrong order or big delay), but usually any messages at all.
For receive messages I use AWS.Console -> AWS IoT -> MQTT client
What wrong with my lambda?

Getting an acknowledgment on server send/emit

In the socket.io acknowledgement example we see a client's send/emit being called back with the server's response. Is the same functionality available in the reverse direction - i.e. how does the server confirm client reception for a send/emit from the server? It would be nice to have a send/emit callback even just to indicate reception success. Didn't see this functionality documented anywhere...
Thanks!
Looking in the socket.io source I found that indeed ACKs are supported for server-sent messages (but not in broadcasts!) (lines 115-123 of socket.io/lib/socket.js):
if ('function' == typeof args[args.length - 1]) {
if (this._rooms || (this.flags && this.flags.broadcast)) {
throw new Error('Callbacks are not supported when broadcasting');
}
debug('emitting packet with ack id %d', this.nsp.ids);
this.acks[this.nsp.ids] = args.pop();
packet.id = this.nsp.ids++;
}
An example of how the ack should work (not tested):
// server-side:
io.on('msg', (data, ackCallback) => {
console.log('data from client', data);
ackCallback('roger roger');
});
// client-side:
socket.emit('msg', someData, (answer) => {
console.log('server\'s acknowledgement:', answer);
});
If we want to be 100% sure the reception success, just add ack call is not enough because we also need to know whether the ack call is run or not.
The socket.io 3.0 document add this timeout example to show how to do that. But the timeout value is the tricky one.
const withTimeout = (onSuccess, onTimeout, timeout) => {
let called = false;
const timer = setTimeout(() => {
if (called) return;
called = true;
onTimeout();
}, timeout);
return (...args) => {
if (called) return;
called = true;
clearTimeout(timer);
onSuccess.apply(this, args);
}
}
socket.emit("hello", 1, 2, withTimeout(() => {
console.log("success!");
}, () => {
console.log("timeout!");
}, 1000));
Yes we can send a response back to the server from the Client(as acknowledgment )
According to new documentation of socket(4.x)
Server-side
let dataToSend={
test:"fromServer"
}
socket.timeout(5000).emit("my-event",dataToSend, (err, response) => {
if (err) {
// the other side did not acknowledge the event in the given delay
} else {
console.log(response);
}
});
Cleint- side
socket.on("my-event", (data, callback) => {
// any logic for data(that data come from server i.e { test:"fromServer" }
callback({ test: "test" });
});

Resources