Elastic Search bulk operation - elasticsearch

I really can't understand why when I run a bulk insert I lost the previous data in the same collection without executing any delete operation?
this is weird.
any idea?
var client = new elasticsearch.Client( {
hosts: [
'http://localhost:9200/'
]
})
.
.
.
InserTweets: function (arrayobj, callback) {
var items=[];
var count=1;
arrayobj.forEach(element => {
items.push({ index: { _index: 'twitter', _type: 'tweet', _id: count }},element);
count++;
});
client.bulk({body:items}, function (err, resp, status) {
callback(err, resp, status);
}, function (err, resp, status) {
console.log(err);
});
}

You are setting the _id to the count so on the second operation its overwriting/updating the existing record to the new record.
The _id needs to be unique for each record.
has element got anything unique like its own id which you could use?

Related

Optimistic response not working when adding items to list

My data model is a list with items. Very simple:
{
_id: 1,
name: "List 1",
items: [
{ _id: 2, text: "Item text 1" },
{ _id: 3, text: "Item text 2" }
]
}
Adding a new list with optimistic response works perfectly:
const [addListMutation] = useAddListMutation({
update: (cache, { data }) => {
const cachedLists =
(cache.readQuery<GetAllListsQuery>({
query: GetAllListsDocument,
})?.lists as TList[]) ?? [];
if (data) {
cache.writeQuery({
query: GetAllListsDocument,
data: {
lists: [...cachedLists, data?.list as TList],
},
});
}
},
});
const addList = async (name: string) => {
const list = {
_id: ..new id here,
name,
items: [],
};
const variables: AddListMutationVariables = {
data: list,
};
await addListMutation({
variables,
optimisticResponse: {
list,
},
});
};
This gets reflected immediately in my component using const { loading, data } = useGetAllListsQuery();. data is updated twice; first with the optimistic response and then after the mutation is done. Just like expected.
Now I'm trying to add an item to the list this way:
const [updateListMutation] = useUpdateListMutation({
update: (cache, { data }) => {
const cachedLists =
(cache.readQuery<GetAllListsQuery>(
{
query: GetAllListsDocument,
},
)?.lists as TList[]) ?? [];
if (data?.list) {
// Find existing list to update
const updatedList = data?.list as TList;
const updatedListIndex = cachedLists.findIndex(
(list: TList) => list._id === updatedList._id,
);
// Create a copy of cached lists and replace entire list
// with new list from { data }.
const updatedLists = [...cachedLists];
updatedLists[updatedListIndex] = { ...updatedList };
cache.writeQuery({
query: GetAllListsDocument,
data: {
lists: updatedLists,
},
});
}
}
});
const updateList = async (updatedList: TList) => {
const variables: UpdateListMutationVariables = {
query: {
_id: updatedList._id,
},
set: updatedList,
};
await updateListMutation({
variables,
optimisticResponse: {
list: updatedList,
},
});
};
const addListItem = async (list: TList, text: string) => {
const updatedList = R.clone(list);
updatedList.items.push({
_id: ...new item id here,
text: 'My new list item',
});
await updateList(updatedList);
};
The problem is is in my component and the const { loading, data } = useGetAllListsQuery(); not returning what I expect. When data first changes with the optimistic response it contains an empty list item:
{
_id: 1,
name: "List 1",
items: [{}]
}
And only after the mutation response returns, it populates the items array with the item with text 'My new list item'. So my component first updates when the mutation is finished and not with the optimistic response because it can't figure out to update the array. Don't know why?
(and I have checked that the updatedLists array in writeQuery correctly contains the new item with text 'My new list item' so I'm trying to write the correct data).
Please let me know if you have any hints or solutions.
I've tried playing around with the cache (right now it's just initialized default like new InMemoryCache({})). I can see the cache is normalized with a bunch of List:1, List:2, ... and ListItem:3, ListItem:4, ...
Tried to disable normalization so I only have List:{id} entries. Didn't help. Also tried to add __typename: 'ListItem' to item added, but that only caused the { data } in the update: ... for the optimistic response to be undefined. I have used hours on this now. It should be a fairly simple and common use case what I'm trying to do :).
package.json
"#apollo/client": "^3.3.4",
"graphql": "^15.4.0",
"#graphql-codegen/typescript": "^1.19.0",

How to Query a Array of objects in DynamoDB using FilterExpression in scan operation

How to Query array of objects(workingDays) key containing only "Tue" in dynamoDb with Scan Operation,I have queried using filter Expression but i am getting no results.
var queryData = {
TableName: tableName,
FilterExpression: "contains (workingDays, :dayVal)",
ExpressionAttributeValues: {
":dayVal": {
S:"Tue"
}
}
};
console.log("getParams ==>", queryData)
dynamodb.scan(queryData, function (err, details) {
if (err) {
console.log(err, err.stack); // an error occurred
callback(err, null)
}
else{
callback(null, details)
}
})
ExpressionAttributeValues in your query contains String ["S"] as 'key' for value 'Tue', where as in your table, 'workingDays' is a list of map object containing value for day keys.
Try below code:
var queryData = {
TableName: tableName,
ExpressionAttributeNames: {
"#workingDays": "workingDays",
},
FilterExpression: "contains (#workingDays, :dayVal)",
ExpressionAttributeValues: {
":dayVal": {
"day":"Tue"
}
}
};
console.log("getParams ==>", queryData)
docClient.scan(queryData, function (err, details) {
if (err) {
console.log(err, err.stack); // an error occurred
// callback(err, null)
}
else{
// callback(null, details)
console.log(details);
}
});

How to redirect after a deleting mutation in Apollo?

After I delete a post, I want to update the cache and redirect to the post index page.
deletePost() {
this.$apollo.mutate({
mutation: DELETE_POST,
variables: {
postId: this.postId
},
update: (cache, { data: { deletePost } }) => {
const query = {
query: GET_PAGINATED_POSTS,
variables: {
page: 0,
pageSize: 10
},
};
const data = cache.readQuery({ ...query });
data.postsPage = data.postsPage.filter(post => post._id != this.postId)
cache.writeQuery({ ...query, data })
}
})
// redirect
this.$router.push({ name: 'IndexPosts' })
}
The above works, but since I'm not doing an optimisticResponse, there's a bit of a delay between the time the index page shows and the time the cache update takes place. How can I solve this? I was trying to do an optimisticResponse but I don't know how to get the list of paginated posts without doing another query.
this.$apollo.mutate(...) returns a promise.
Try something like:
this.$apollo.mutate(...)
.then(({ data: { deletePost } }) => {
this.$router.push({ name: 'IndexPosts' })
})

How to wait until all bulk writes are completed in elastic search api

Using NodeJS elastic search client. Trying to write a data importer to bulk import documents from MongoDB. The problem I'm having is the index refresh doesn't seem to wait until all documents are written to elastic before checking the counts.
Using the streams API in node to read the records into a batch, then using the elastic API bulk command to write the records. Shown below:
function rebuildIndex(modelName, queryStream, openStream, done) {
logger.debug('Rebuilding %s index', modelName);
async.series([
function (next) {
deleteType(modelName, function (err, result) {
next(err, result);
});
},
function (next) {
var Model;
var i = 0;
var batchSize = settings.indexBatchSize;
var batch = [];
var stream;
if (queryStream && !openStream) {
stream = queryStream.stream();
} else if (queryStream && openStream) {
stream = queryStream;
}else
{
Model = mongoose.model(modelName);
stream = Model.find({}).stream();
}
stream.on("data", function (doc) {
logger.debug('indexing %s', doc.userType);
batch.push({
index: {
"_index": settings.index,
"_type": modelName.toLowerCase(),
"_id": doc._id.toString()
}
});
var obj;
if (doc.toObject){
obj = doc.toObject();
}else{
obj = doc;
}
obj = _.clone(obj);
delete obj._id;
batch.push(obj);
i++;
if (i % batchSize == 0) {
console.log(chalk.green('Loaded %s records'), i);
client().bulk({
body: batch
}, function (err, resp) {
if (err) {
next(err);
} else if (resp.errors) {
next(resp);
}
});
batch = [];
}
});
// When the stream ends write the remaining records
stream.on("end", function () {
if (batch.length > 0) {
console.log(chalk.green('Loaded %s records'), batch.length / 2);
client().bulk({
body: batch
}, function (err, resp) {
if (err) {
logger.error(err, 'Failed to rebuild index');
next(err);
} else if (resp.errors) {
logger.error(resp.errors, 'Failed to rebuild index');
next(resp);
} else {
logger.debug('Completed rebuild of %s index', modelName);
next();
}
});
} else {
next();
}
batch = [];
})
}
],
function (err) {
if (err)
logger.error(err);
done(err);
}
);
}
I use this helper to check the document counts in the index. Without the timeout, the counts in the index are wrong, but with the timeout they're okay.
/**
* A helper function to count the number of documents in the search index for a particular type.
* #param type The type, e.g. User, Customer etc.
* #param done A callback to report the count.
*/
function checkCount(type, done) {
async.series([
function(next){
setTimeout(next, 1500);
},
function (next) {
refreshIndex(next);
},
function (next) {
client().count({
"index": settings.index,
"type": type.toLowerCase(),
"ignore": [404]
}, function (error, count) {
if (error) {
next(error);
} else {
next(error, count.count);
}
});
}
], function (err, count) {
if (err)
logger.error({"err": err}, "Could not check index counts.");
done(err, count[2]);
});
}
And this helper is supposed to refresh the index after the update completes:
// required to get results to show up immediately in tests. Otherwise there's a 1 second delay
// between adding an entry and it showing up in a search.
function refreshIndex(done) {
client().indices.refresh({
"index": settings.index,
"ignore": [404]
}, function (error, response) {
if (error) {
done(error);
} else {
logger.debug("deleted index");
done();
}
});
}
The loader works okay, except this test fails because of timing between the bulk load and the count check:
it('should be able to rebuild and reindex customer data', function (done) {
this.timeout(0); // otherwise the stream reports a timeout error
logger.debug("Testing the customer reindexing process");
// pass null to use the generic find all query
searchUtils.rebuildIndex("Customer", queryStream, false, function () {
searchUtils.checkCount("Customer", function (err, count) {
th.checkSystemErrors(err, count);
count.should.equal(volume.totalCustomers);
done();
})
});
});
I observe random results in the counts from the tests. With the artificial delay (setTimeout in the checkCount function) then the counts match. So I conclude that the documents are eventually written to elastic and the test would pass. I thought the indices.refresh would essentially force a wait until the documents are all written to the index, but it doesn't seem to be working with this approach.
The setTimeout hack is not really sustainable when the volume goes to actual production level....so how can I ensure the bulk calls are completely written to elastic index before checking the count of documents?
Take a look at the "refresh" parameter (elasticsearch documentation)
For example:
let bulkUpdatesBody = [ bulk actions / docs to index go here ]
client.bulk({
refresh: "wait_for",
body: bulkUpdatesBody
});
I'm not sure if this is the answer or not - but I flushed the index prior to checking the count. It "appears" to work, but I don't know if it's just because of the timing between the calls. Perhaps someone from elastic team knows if flushing the index will really solve the issue?
function checkCount(type, done) {
async.series([
function(next) {
client().indices.flush({
"index": settings.index,
"ignore": [404]
}, function (error, count) {
if (error) {
next(error);
} else {
next(error, count.count);
}
});
},
function (next) {
refreshIndex(type, next);
},
function (next) {
client().count({
"index": settings.index,
"type": type.toLowerCase(),
"ignore": [404]
}, function (error, count) {
if (error) {
next(error);
} else {
next(error, count.count);
}
});
}
], function (err, count) {
if (err)
logger.error({"err": err}, "Could not check index counts.");
done(err, count[2]);
});
}

Primary sort key DynamoDB attribute expression

I am new to DynamoDB and want only to create a new object if the Primary sort key(name) does not exist twice. I tried it like this:
params.id = randomId();
var item = {
TableName: tableName,
Item: params,
ConditionExpression: "#na <> :n",
ExpressionAttributeNames:{"#na":"name"},
ExpressionAttributeValues:{
":n":params.name
}
};
docClient.put(item, function(err, data) {
console.log("Data:", data);
console.log("Err:", err);
});
But the item is still created :/ Is ist even possible to create a condition expression on the primary sort key ?
Actually just ran into this issue myself, as explained here it looks like you can't, you'll have to use a Global Secondary Index for the 'sort' key.
You will have to do a seperate get request on the GSI first to see if "name" exists for eg.
function checkNameDoesNotExist(name, fn){
query.IndexName = 'nameInUsers';
query.KeyConditionExpression = 'name = :n';
query.ExpressionAttributeValues = {
':n': name
};
dynamodb.query(query, function(err, data){
if (err) {
return fn(err);
} else {
fn(null, data);
}
});
}
Disclaimer: wrote the code off the top of my head, don't know if it works but should give you a good starting point
You can use the exist condition. It will return an error saying that the object already exists
var item = {
TableName: tableName,
Item: params,
Expected: {
name: {
Exists: false
}
};
docClient.put(item, function(err, data) {
console.log("Data:", data);
console.log("Err:", err);
});

Resources