How to wait until all bulk writes are completed in elastic search api - elasticsearch

Using NodeJS elastic search client. Trying to write a data importer to bulk import documents from MongoDB. The problem I'm having is the index refresh doesn't seem to wait until all documents are written to elastic before checking the counts.
Using the streams API in node to read the records into a batch, then using the elastic API bulk command to write the records. Shown below:
function rebuildIndex(modelName, queryStream, openStream, done) {
logger.debug('Rebuilding %s index', modelName);
async.series([
function (next) {
deleteType(modelName, function (err, result) {
next(err, result);
});
},
function (next) {
var Model;
var i = 0;
var batchSize = settings.indexBatchSize;
var batch = [];
var stream;
if (queryStream && !openStream) {
stream = queryStream.stream();
} else if (queryStream && openStream) {
stream = queryStream;
}else
{
Model = mongoose.model(modelName);
stream = Model.find({}).stream();
}
stream.on("data", function (doc) {
logger.debug('indexing %s', doc.userType);
batch.push({
index: {
"_index": settings.index,
"_type": modelName.toLowerCase(),
"_id": doc._id.toString()
}
});
var obj;
if (doc.toObject){
obj = doc.toObject();
}else{
obj = doc;
}
obj = _.clone(obj);
delete obj._id;
batch.push(obj);
i++;
if (i % batchSize == 0) {
console.log(chalk.green('Loaded %s records'), i);
client().bulk({
body: batch
}, function (err, resp) {
if (err) {
next(err);
} else if (resp.errors) {
next(resp);
}
});
batch = [];
}
});
// When the stream ends write the remaining records
stream.on("end", function () {
if (batch.length > 0) {
console.log(chalk.green('Loaded %s records'), batch.length / 2);
client().bulk({
body: batch
}, function (err, resp) {
if (err) {
logger.error(err, 'Failed to rebuild index');
next(err);
} else if (resp.errors) {
logger.error(resp.errors, 'Failed to rebuild index');
next(resp);
} else {
logger.debug('Completed rebuild of %s index', modelName);
next();
}
});
} else {
next();
}
batch = [];
})
}
],
function (err) {
if (err)
logger.error(err);
done(err);
}
);
}
I use this helper to check the document counts in the index. Without the timeout, the counts in the index are wrong, but with the timeout they're okay.
/**
* A helper function to count the number of documents in the search index for a particular type.
* #param type The type, e.g. User, Customer etc.
* #param done A callback to report the count.
*/
function checkCount(type, done) {
async.series([
function(next){
setTimeout(next, 1500);
},
function (next) {
refreshIndex(next);
},
function (next) {
client().count({
"index": settings.index,
"type": type.toLowerCase(),
"ignore": [404]
}, function (error, count) {
if (error) {
next(error);
} else {
next(error, count.count);
}
});
}
], function (err, count) {
if (err)
logger.error({"err": err}, "Could not check index counts.");
done(err, count[2]);
});
}
And this helper is supposed to refresh the index after the update completes:
// required to get results to show up immediately in tests. Otherwise there's a 1 second delay
// between adding an entry and it showing up in a search.
function refreshIndex(done) {
client().indices.refresh({
"index": settings.index,
"ignore": [404]
}, function (error, response) {
if (error) {
done(error);
} else {
logger.debug("deleted index");
done();
}
});
}
The loader works okay, except this test fails because of timing between the bulk load and the count check:
it('should be able to rebuild and reindex customer data', function (done) {
this.timeout(0); // otherwise the stream reports a timeout error
logger.debug("Testing the customer reindexing process");
// pass null to use the generic find all query
searchUtils.rebuildIndex("Customer", queryStream, false, function () {
searchUtils.checkCount("Customer", function (err, count) {
th.checkSystemErrors(err, count);
count.should.equal(volume.totalCustomers);
done();
})
});
});
I observe random results in the counts from the tests. With the artificial delay (setTimeout in the checkCount function) then the counts match. So I conclude that the documents are eventually written to elastic and the test would pass. I thought the indices.refresh would essentially force a wait until the documents are all written to the index, but it doesn't seem to be working with this approach.
The setTimeout hack is not really sustainable when the volume goes to actual production level....so how can I ensure the bulk calls are completely written to elastic index before checking the count of documents?

Take a look at the "refresh" parameter (elasticsearch documentation)
For example:
let bulkUpdatesBody = [ bulk actions / docs to index go here ]
client.bulk({
refresh: "wait_for",
body: bulkUpdatesBody
});

I'm not sure if this is the answer or not - but I flushed the index prior to checking the count. It "appears" to work, but I don't know if it's just because of the timing between the calls. Perhaps someone from elastic team knows if flushing the index will really solve the issue?
function checkCount(type, done) {
async.series([
function(next) {
client().indices.flush({
"index": settings.index,
"ignore": [404]
}, function (error, count) {
if (error) {
next(error);
} else {
next(error, count.count);
}
});
},
function (next) {
refreshIndex(type, next);
},
function (next) {
client().count({
"index": settings.index,
"type": type.toLowerCase(),
"ignore": [404]
}, function (error, count) {
if (error) {
next(error);
} else {
next(error, count.count);
}
});
}
], function (err, count) {
if (err)
logger.error({"err": err}, "Could not check index counts.");
done(err, count[2]);
});
}

Related

How to use a few dispatch in nuxt fetch?

I create a site in nuxt and got data from worpdress api.
I have a few store: home.js, solutions.js, tipo.js, portfolio.js and options.js.
In fetch i check, if the store array is empty, than call dispatch and fill arrays.
export default {
async fetch({ store }) {
try {
if (store.getters['home/home'].length === 0) {
await store.dispatch('home/fetchHome');
}
if (store.getters["solutions/getSolutions"].length === 0) {
await store.dispatch('solutions/fetchSolutions');
}
if (store.getters["tipo/getTipo"].length === 0) {
await store.dispatch('tipo/fetchTipo');
}
if (store.getters["portfolio/getPortfolio"].length === 0) {
await store.dispatch('portfolio/fetchPortfolio');
}
if(store.getters["options/getOptions"].length === 0){
await store.dispatch('options/fetchOptions');
}
} catch (e) {
console.log(e, 'e no data')
}
},
components: { HomeContacts, PortofolioSlider, Clients, ChiSiamo, Solutions, HomeIntro }
}
But the problem is, that the page is loading to long time. Because i call dispatches throw await, and i think, this is the problem.
How can i call all dispatches in fethc, without async, but parallel?
I see the advantage of working with fetch over asyncData in that only the first time when I load the page, I need to wait a little, the arrays will fill up and when I get to the current page from another page, there will be no requests through the api, and the data will be output from the store.
It's just that there is very little information on nuxt in terms of ideology, how to work and what is better to use and when. In next, this is better.
This method doesn't work.
fetch({ store }) {
const promise1 = new Promise((resolve, reject) => {
if (store.getters['home/home'].length === 0) {
resolve(store.dispatch('home/fetchHome'));
}
});
const promise2 = new Promise((resolve, reject) => {
if (store.getters["solutions/getSolutions"].length === 0) {
resolve(store.dispatch('solutions/fetchSolutions'));
}
});
const promise3 = new Promise((resolve, reject) => {
if (store.getters["tipo/getTipo"].length === 0) {
resolve(store.dispatch('tipo/fetchTipo'));
}
});
const promise4 = new Promise((resolve, reject) => {
if (store.getters["portfolio/getPortfolio"].length === 0) {
resolve(store.dispatch('portfolio/fetchPortfolio'));
}
});
const promise5 = new Promise((resolve, reject) => {
if (store.getters["options/getOptions"].length === 0) {
resolve(store.dispatch('options/fetchOptions'));
}
});
Promise.all([promise1, promise2, promise3, promise4, promise5])
.then((data) => console.log(data))
.catch((error) => console.log(error));
Assuming that:
store.dispatch() returns Promise,
the first attempt in the question is generally correct,
the objective is to perform relevant dispatches in parallel,
then:
elimitate await from the store.dispatch() sequence,
accumulate the promises returned by store.dispatch() in an array,
don't use a new Promise() wrapper,
await the Promise returned by Promise.all(promises).
export default {
async fetch({ store }) {
try {
let promises = [];
if (store.getters['home/home'].length === 0) {
promises.push(store.dispatch('home/fetchHome'));
}
if (store.getters['solutions/getSolutions'].length === 0) {
promises.push(store.dispatch('solutions/fetchSolutions'));
}
if (store.getters['tipo/getTipo'].length === 0) {
promises.push(store.dispatch('tipo/fetchTipo'));
}
if (store.getters['portfolio/getPortfolio'].length === 0) {
promises.push(store.dispatch('portfolio/fetchPortfolio'));
}
if(store.getters['options/getOptions'].length === 0) {
promises.push(store.dispatch('options/fetchOptions'));
}
let data = await Promise.all(promises);
console.log(data);
} catch (e) {
console.log(e);
}
},
components: { HomeContacts, PortofolioSlider, Clients, ChiSiamo, Solutions, HomeIntro }
}
For convenience, this can be proceduralised as follows:
export default {
async fetch({ store }) {
try {
let paths = [
{ get: 'home/home', fetch: 'home/fetchHome' },
{ get: 'solutions/getSolutions', fetch: 'solutions/fetchSolutions' },
{ get: 'tipo/getTipo', fetch: 'tipo/fetchTipo' },
{ get: 'portfolio/getPortfolio', fetch: 'portfolio/fetchPortfolio' },
{ get: 'options/getOptions', fetch: 'options/fetchOptions' }
];
let promises = paths.filter(p => store.getters[p.get].length === 0).map(p => store.dispatch(p.fetch));
let data = await Promise.all(promises);
console.log(data);
} catch (e) {
console.log(e);
}
},
components: { HomeContacts, PortofolioSlider, Clients, ChiSiamo, Solutions, HomeIntro }
}
It may make more sense to define the paths array elsewhere in the code and pass it to a simplified fetch(), giving it the profile :
fetch({ store, paths })
If it still doesn't work, then there's something your're not telling us.
Promise.all can be useful here: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/all
Or even Promise.allSettled(), depending on what you're trying to do: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/allSettled
Then, this is a matter of displaying something while your page does the fetching. You could use a v-if="$fetchState.pending" at the top level of your page to display a loader while the whole thing is being fetched.
There is nothing related to ideology here, there are 2 hooks that do data fetching by either blocking the render of the page (asyncData()) or allowing you to render it while the data is fetched (fetch()).
Nothing related to the framework by itself, you're free to do as you'd like.

Is there any cost advantage of Parse.Object.saveAll vs. saving individually?

The Parse JS SDK provides a Parse.Object.saveAll() method to save many objects with one command.
From looking at ParseServerRESTController.js it seems that each object is saved individually:
if (path === '/batch') {
let initialPromise = Promise.resolve();
if (data.transaction === true) {
initialPromise = config.database.createTransactionalSession();
}
return initialPromise.then(() => {
const promises = data.requests.map(request => {
return handleRequest(
request.method,
request.path,
request.body,
options,
config
).then(
response => {
return {
success: response
};
},
error => {
return {
error: {
code: error.code,
error: error.message
},
};
}
);
});
return Promise.all(promises).then(result => {
if (data.transaction === true) {
if (
result.find(resultItem => typeof resultItem.error === 'object')
) {
return config.database.abortTransactionalSession().then(() => {
return Promise.reject(result);
});
} else {
return config.database.commitTransactionalSession().then(() => {
return result;
});
}
} else {
return result;
}
});
});
}
It seems that saveAll is merely a convenience wrapper around saving each object individually, so it still does seem to make n database requests for n objects.
It it correct that saveAll has no cost advantage (performance, network traffic, etc) vs. saving each object individually in Cloud Code?
I can tell you that the answer is that Parse.Object.saveAll and Parse.Object.destroyAll batch requests by default in batches of 20 objects. But why take my word for it? Let's test it out!
Turn verbose logging on and then run the following:
const run = async function run() {
const objects = [...Array(10).keys()].map(i => new Parse.Object('Test').set({i}));
await Parse.Object.saveAll(objects);
const promises = objects.map(o => o.increment('i').save());
return Promise.all(promises);
};
run()
.then(console.log)
.catch(console.error);
And here's the output from the parse-server logs (I've truncated it, but it should be enough to be apparent what is going on):
verbose: REQUEST for [POST] /parse/batch: { // <--- note the path
"requests": [ // <--- an array of requests!!!
{
"method": "POST",
"body": {
"i": 0
},
"path": "/parse/classes/Test"
},
... skip the next 7, you get the idea
{
"method": "POST",
"body": {
"i": 9
},
"path": "/parse/classes/Test"
}
]
}
.... // <-- remove some irrelevent output for brevity.
verbose: RESPONSE from [POST] /parse/batch: {
"response": [
{
"success": {
"objectId": "szVkuqURVq",
"createdAt": "2020-03-05T21:25:44.487Z"
}
},
...
{
"success": {
"objectId": "D18WB4Nsra",
"createdAt": "2020-03-05T21:25:44.491Z"
}
}
]
}
...
// now we iterate through and there's a request per object.
verbose: REQUEST for [PUT] /parse/classes/Test/szVkuqURVq: {
"i": {
"__op": "Increment",
"amount": 1
}
}
...
verbose: REQUEST for [PUT] /parse/classes/Test/HtIqDIsrX3: {
"i": {
"__op": "Increment",
"amount": 1
}
}
// and the responses...
verbose: RESPONSE from [PUT] /parse/classes/Test/szVkuqURVq: {
"response": {
"i": 1,
"updatedAt": "2020-03-05T21:25:44.714Z"
}
}
...
In the core manager code, you do correctly identify that we are making a request for each object to the data store (i.e. MongoDB), This is necessary because an object may have relations or pointers that have to be handled and that may require additional calls to the data store.
BUT! calls between the parse server and the data store are usually over very fast networks using a binary format, whereas calls between the client and the parse server are JSON and go over longer distances with ordinarily much slower connections.
There is one other potential advantage that you can see in the core manager code which is that the batch is done in a transaction.

Add array data to in vue without loosing old data

i am making vue application i want to add more data which is fetching through laravel return in array without loosing old data
my methods is
getData () {
axios.get('get/users')
.then(response => {
this.queue = response.data.users
})
.catch(e => {
this.errors.push(e)
})
}
and my data
data: function () {
return {
queue: []
}
},
Full Script is
data: function () {
return {
queue: []
}
},
created () {
this.getData()
},
methods: {
getData () {
axios.get('get/users')
.then(response => {
this.queue = response.data.users
})
.catch(e => {
this.errors.push(e)
})
},
decide (choice) {
this.$refs.tinder.decide(choice)
},
submit (choice) {
switch (choice) {
case 'nope': // 左滑
break;
case 'like': // 右滑
break;
case 'super': // 上滑
break;
}
if (this.queue.length < 2) {
this.getData()
}
}
}
here is video link
https://youtu.be/iv82EGMD4XA
i want when queue have 2 data left trigger this.getData()
fetch users and add array data to queue without loosing old queue data
You can use the concat function
this.queue = this.queue.concat(response.data.users);
In the axios response
Instead of the
this.queue = response.data.users

ASK error, TypeError: Cannot read property 'type' of undefined

I'm creating a skill that will call back different incidents at different dates and times from a DynamoDB table through Alexa.
My 3 columns are data, time and incident
I've defined my partition and sort key in my Lambda function as
let GetMachineStateIntent = (context, callback) => {
var params = {
TableName: "updatedincident",
Key: {
date: "2017-03-21",
time: "07:38",
incident: "Blocked Primary",
}
};
When I try to test my skill I can't seem to recall the incident correctly, the error I'm getting in Cloudwatch is:
2018-03-28T14:48:53.397Z 042319cb-4a3e-49ae-8b33-1641367107d4 Unexpected error occurred in the skill handler! TypeError: Cannot read property 'type' of undefined
at exports.handler.e (/var/task/index.js:70:16)
as well as:
2018-03-28T14:48:53.417Z 042319cb-4a3e-49ae-8b33-1641367107d4
{
"errorMessage": "Unexpected error"
}
Here is my code from index.js
var AWSregion = 'us-east-1'; // us-east-1
var AWS = require('aws-sdk');
var dbClient = new AWS.DynamoDB.DocumentClient();
AWS.config.update({
region: "'us-east-1'"
});
let GetMachineStateIntent = (context, callback) => {
var params = {
TableName: "updatedincident",
Key: {
date: "2018-03-28",
time: "04:23",
}
};
dbClient.get(params, function (err, data) {
if (err) {
// failed to read from table for some reason..
console.log('failed to load data item:\n' + JSON.stringify(err, null, 2));
// let skill tell the user that it couldn't find the data
sendResponse(context, callback, {
output: "the data could not be loaded from your database",
endSession: false
});
} else {
console.log('loaded data item:\n' + JSON.stringify(data.Item, null, 2));
// assuming the item has an attribute called "incident"..
sendResponse(context, callback, {
output: data.Item.incident,
endSession: false
});
}
});
};
function sendResponse(context, callback, responseOptions) {
if(typeof callback === 'undefined') {
context.succeed(buildResponse(responseOptions));
} else {
callback(null, buildResponse(responseOptions));
}
}
function buildResponse(options) {
var alexaResponse = {
version: "1.0",
response: {
outputSpeech: {
"type": "SSML",
"ssml": `<speak><prosody rate="slow">${options.output}</prosody></speak>`
},
shouldEndSession: options.endSession
}
};
if (options.repromptText) {
alexaResponse.response.reprompt = {
outputSpeech: {
"type": "SSML",
"ssml": `<speak><prosody rate="slow">${options.reprompt}</prosody></speak>`
}
};
}
return alexaResponse;
}
exports.handler = (event, context, callback) => {
try {
var request = event.request;
if (request.type === "LaunchRequest") {
sendResponse(context, callback, {
output: "welcome to my skill. what data are you looking for?",
endSession: false
});
}
else if (request.type === "IntentRequest") {
let options = {};
if (request.intent.name === "GetMachineStateIntent") {
GetMachineStateIntent(context, callback);
} else if (request.intent.name === "AMAZON.StopIntent" || request.intent.name === "AMAZON.CancelIntent") {
sendResponse(context, callback, {
output: "ok. good bye!",
endSession: true
});
}
else if (request.intent.name === "AMAZON.HelpIntent") {
sendResponse(context, callback, {
output: "you can ask me about incidents that have happened",
reprompt: "what can I help you with?",
endSession: false
});
}
else {
sendResponse(context, callback, {
output: "I don't know that one! please try again!",
endSession: false
});
}
}
else if (request.type === "SessionEndedRequest") {
sendResponse(context, callback, ""); // no response needed
}
else {
// an unexpected request type received.. just say I don't know..
sendResponse(context, callback, {
output: "I don't know that one! please try again!",
endSession: false
});
}
} catch (e) {
// handle the error by logging it and sending back an failure
console.log('Unexpected error occurred in the skill handler!', e);
if(typeof callback === 'undefined') {
context.fail("Unexpected error");
} else {
callback("Unexpected error");
}
}
};
and this is my handler GetMachineState.js
function sendResponse(context, callback, responseOptions) {
if(typeof callback === 'undefined') {
context.succeed(buildResponse(responseOptions));
} else {
callback(null, buildResponse(responseOptions));
}
}
function buildResponse(options) {
var alexaResponse = {
version: "1.0",
response: {
outputSpeech: {
"type": "SSML",
"ssml": `<speak><prosody rate="slow">${options.output}</prosody></speak>`
},
shouldEndSession: options.endSession
}
};
if (options.repromptText) {
alexaResponse.response.reprompt = {
outputSpeech: {
"type": "SSML",
"ssml": `<speak><prosody rate="slow">${options.reprompt}</prosody></speak>`
}
};
}
return alexaResponse;
}
exports.handler = (event, context, callback) => {
try {
var request = event.request;
if (request.type === "LaunchRequest") {
sendResponse(context, callback, {
output: "welcome to my skill. what do you want to find?",
endSession: false
});
}
else if (request.type === "IntentRequest") {
let options = {};
if (request.intent.name === "GetMachineStateIntent") {
// this is where we will wire up the dynamo call
// for now, just send a simple response and end the session
sendResponse(context, callback, {
output: "cinema not implemented yet!",
endSession: true
});
} else if (request.intent.name === "AMAZON.StopIntent" || request.intent.name === "AMAZON.CancelIntent") {
sendResponse(context, callback, {
output: "ok. good bye!",
endSession: true
});
}
else if (request.intent.name === "AMAZON.HelpIntent") {
sendResponse(context, callback, {
output: "you can ask me about incidents that have happened",
reprompt: "what can I help you with?",
endSession: false
});
}
else {
sendResponse(context, callback, {
output: "I don't know that one! Good bye!",
endSession: true
});
}
}
else if (request.type === "SessionEndedRequest") {
sendResponse(context, callback, ""); // no response needed
}
else {
// an unexpected request type received.. just say I don't know..
sendResponse(context, callback, {
output: "I don't know that one! Good bye!",
endSession: true
});
}
} catch (e) {
// handle the error by logging it and sending back an failure
console.log('Unexpected error occurred in the skill handler!', e);
if(typeof callback === 'undefined') {
context.fail("Unexpected error");
} else {
callback("Unexpected error");
}
}
};
Its impossible to know for sure if this is the problem or not because you haven't shared the code from the index.js file. The error message you get is telling you that the problem occurs at line 70 in your index.js file so you should look there, and figure out what the problem is.
However, based on the fact that you also posted this as a comment on another question, I'm going to venture to guess that the issue you've run into is that you used the code snippet I provided in the answer to that question and the error is from dereferencing the request.type
You have to make sure the request variable is set to the actual request from the event, like so: var request = event.request where event is provided from exports.handler = (event, context, callback) => {
For example:
exports.handler = (event, context, callback) => {
var request = event.request;
if (request.type === "IntentRequest"
// make suret the name of the intent matches the one in your interaction model
&& request.intent.name == "GetMachineStateIntent") {
var dateSlot = request.intent.slots.Date != null ?
request.intent.slots.Date.value : "unknown date";
var timeSlot = request.intent.slots.Time != null ?
request.intent.slots.Time.value : "unknown time";
// respond with speech saying back what the skill thinks the user requested
sendResponse(context, callback, {
output: "You wanted the machine state at "
+ timeSlot + " on " + dateSlot,
endSession: true
});
} else {
// TODO: handle other types of requests..
sendResponse(context, callback, {
output: "I don't know how to handle this request yet!"
endSession: true
});
}
};
function sendResponse(context, callback, responseOptions) {
if(typeof callback === 'undefined') {
context.succeed(buildResponse(responseOptions));
} else {
callback(null, buildResponse(responseOptions));
}
}
function buildResponse(options) {
var alexaResponse = {
version: "1.0",
response: {
outputSpeech: {
"type": "SSML",
"ssml": `<speak><prosody rate="slow">${options.output}</prosody></speak>`
},
shouldEndSession: options.endSession
}
};
if (options.repromptText) {
alexaResponse.response.reprompt = {
outputSpeech: {
"type": "SSML",
"ssml": `<speak><prosody rate="slow">${options.reprompt}</prosody></speak>`
}
};
}
return alexaResponse;
}

Is it possible to send multiple SNS in a lambda function?

I tried to use Promise to do that but always got timeout error. All Going to send logged but no SNS 0 has error or SNS 0 has no error logged
function sendSNSMessage(params, index) {
return new Promise(function (resolve, reject) {
console.log('Going to send', index, params)
sns.publish(params, function(err, data) {
if (err) {
console.log(`SNS ${index} has error: ${err}`)
return reject(err)
}
console.log(`SNS ${index} has no error: ${data}`)
resolve(data)
})
})
}
exports.query = (event, context, callback) => {
const connection = mysql.createConnection(config.db)
try {
connection.connect(function(err) {
if (!err) {
connection.query('SELECT * FROM urls', function(err, results) {
if (!err) {
const messsages = results.map((result, index)=>{
console.log(`results[${index}]: ${result.url}`)
return sendSNSMessage({
Message: result.url,
TopicArn: config.sns.queryArn
}, index)
})
Promise.all(messsages).then(()=>{
callback(null, 'All messages sent')
}, console.error)
} else {
console.log('Query error!')
callback(err)
}
connection.end()
})
} else {
console.log('Error connecting database ...' + err.message)
}
})
} catch (error) {
callback(`Exceptiodn: ${error}`)
}
}
Basically the idea behind is:
Do a mysql query to get a couple of records
Send one SNS per record tp trigger another lambda function

Resources