How project DBRef on Spring MongoDB Aggregation? - spring

I have the following aggregation done in a MongoDB shell to get the number of alerts of each type for each user:
db.getCollection('alerts').aggregate(
{
$unwind:"$son"
},
{
$group:
{
_id:{
son: "$son",
level: "$level"
},
count: { $sum: 1 }
}
},
{
$group:
{
_id:{
son: "$_id.son"
},
alerts: { $addToSet: {
level: "$_id.level",
count: "$count"
}}
}
}
)
I have translated it to Spring Data MongoDB as follows:
TypedAggregation<AlertEntity> alertsAggregation =
Aggregation.newAggregation(AlertEntity.class,
unwind("$son"),
Aggregation.group("$son", "$level").count().as("count"),
Aggregation.group("$_id.son")
.addToSet(new BasicDBObject("level", "$_id.level").append("count", "$count")).as("alerts"));
// Aggregation.match(Criteria.where("_id").in(sonIds)
AggregationResults<AlertsBySonDTO> results = mongoTemplate.
aggregate(alertsAggregation, AlertsBySonDTO.class);
List<AlertsBySonDTO> alertsBySonResultsList = results.getMappedResults();
return alertsBySonResultsList;
What I have not clear and I can not get it to work, is to project the identifier and if possible the name of the user (son variable).
The resulting DTO is as follows
public final class AlertsBySonDTO implements Serializable {
private static final long serialVersionUID = 1L;
#JsonProperty("identity")
private String id;
#JsonProperty("alerts")
private ArrayList<Map<String, String>> alerts;
}
but in the id property the entire embedded child entity.
This is the structure of the collection of alerts.
JSON alerts format:
{
"_id" : ObjectId("59e6ff3d9ef9d46a91112890"),
"_class" : "es.bisite.usal.bulltect.persistence.entity.AlertEntity",
"level" : "INFO",
"title" : "Alerta de Prueba",
"payload" : "Alerta de Prueba",
"create_at" : ISODate("2017-10-18T07:13:45.091Z"),
"delivery_mode" : "PUSH_NOTIFICATION",
"delivered" : false,
"parent" : {
"$ref" : "parents",
"$id" : ObjectId("59e6ff369ef9d46a91112878")
},
"son" : {
"$ref" : "children",
"$id" : ObjectId("59e6ff389ef9d46a9111287b")
}
}
/* 2 */
{
"_id" : ObjectId("59e6ff6d9ef9d46a91112892"),
"_class" : "es.bisite.usal.bulltect.persistence.entity.AlertEntity",
"level" : "WARNING",
"title" : "Token de acceso inv�lido.",
"payload" : "El token de acceso YOUTUBE no es v�lido",
"create_at" : ISODate("2017-10-18T07:14:53.449Z"),
"delivery_mode" : "PUSH_NOTIFICATION",
"delivered" : false,
"parent" : {
"$ref" : "parents",
"$id" : ObjectId("59e6ff369ef9d46a91112878")
},
"son" : {
"$ref" : "children",
"$id" : ObjectId("59e6ff389ef9d46a9111287b")
}
}
/* 3 */
{
"_id" : ObjectId("59e6ff6d9ef9d46a91112893"),
"_class" : "es.bisite.usal.bulltect.persistence.entity.AlertEntity",
"level" : "WARNING",
"title" : "Token de acceso inv�lido.",
"payload" : "El token de acceso INSTAGRAM no es v�lido",
"create_at" : ISODate("2017-10-18T07:14:53.468Z"),
"delivery_mode" : "PUSH_NOTIFICATION",
"delivered" : false,
"parent" : {
"$ref" : "parents",
"$id" : ObjectId("59e6ff369ef9d46a91112878")
},
"son" : {
"$ref" : "children",
"$id" : ObjectId("59e6ff389ef9d46a9111287c")
}
}
Anyone know how I can approach this?
thanks in advance

1. With MongoDB version 3.4
These are the following collections I created to reproduce your use case:
Alerts Collection
{
"_id" : ObjectId("59e6ff3d9ef9d46a91112890"),
"_class" : "es.bisite.usal.bulltect.persistence.entity.AlertEntity",
"level" : "INFO",
"title" : "Alerta de Prueba",
"payload" : "Alerta de Prueba",
"create_at" : ISODate("2017-10-18T07:13:45.091+0000"),
"delivery_mode" : "PUSH_NOTIFICATION",
"delivered" : false,
"parent" : DBRef("parents", ObjectId("59e6ff369ef9d46a91112878")),
"son" : DBRef("children", ObjectId("59e72ff0572ae72d8c063666"))
}
{
"_id" : ObjectId("59e6ff6d9ef9d46a91112892"),
"_class" : "es.bisite.usal.bulltect.persistence.entity.AlertEntity",
"level" : "WARNING",
"title" : "Token de acceso inv�lido.",
"payload" : "El token de acceso YOUTUBE no es valido",
"create_at" : ISODate("2017-10-18T07:14:53.449+0000"),
"delivery_mode" : "PUSH_NOTIFICATION",
"delivered" : false,
"parent" : DBRef("parents", ObjectId("59e6ff369ef9d46a91112878")),
"son" : DBRef("children", ObjectId("59e72ff0572ae72d8c063666"))
}
{
"_id" : ObjectId("59e6ff6d9ef9d46a91112893"),
"_class" : "es.bisite.usal.bulltect.persistence.entity.AlertEntity",
"level" : "WARNING",
"title" : "Token de acceso inv�lido.",
"payload" : "El token de acceso INSTAGRAM no es v�lido",
"create_at" : ISODate("2017-10-18T07:14:53.468+0000"),
"delivery_mode" : "PUSH_NOTIFICATION",
"delivered" : false,
"parent" : DBRef("parents", ObjectId("59e6ff369ef9d46a91112878")),
"son" : DBRef("children", ObjectId("59e72ffb572ae72d8c063669"))
}
Notice I changed the OBjectIds of the sons reference to match the children collection I created.
Children collection
{
"_id" : ObjectId("59e72ff0572ae72d8c063666"),
"name" : "Bob"
}
{
"_id" : ObjectId("59e72ffb572ae72d8c063669"),
"name" : "Tim"
}
Since you are using a reference you can't just access a field from the other collection. So I think you are missing some aggregation steps.
I did the following:
db.getCollection('alerts').aggregate(
{
$unwind:"$son"
},
{
$group:
{
_id:{
son: "$son",
level: "$level"
},
count: { $sum: 1 }
}
},
{
$group:
{
_id:{
son: "$_id.son"
},
alerts: { $addToSet: {
level: "$_id.level",
count: "$count"
}}
}
},
{ $addFields: { sonsArray: { $objectToArray: "$_id.son" } } },
{ $match: { "sonsArray.k": "$id"} },
{ $lookup: { from: "children", localField: "sonsArray.v", foreignField: "_id", as: "name" } }
)
And got the following results as json:
{
"_id" : {
"son" : DBRef("children", ObjectId("59e72ffb572ae72d8c063669"))
},
"alerts" : [
{
"level" : "WARNING",
"count" : NumberInt(1)
}
],
"sonsArray" : [
{
"k" : "$ref",
"v" : "children"
},
{
"k" : "$id",
"v" : ObjectId("59e72ffb572ae72d8c063669")
}
],
"name" : [
{
"_id" : ObjectId("59e72ffb572ae72d8c063669"),
"name" : "Tim"
}
]
}
{
"_id" : {
"son" : DBRef("children", ObjectId("59e72ff0572ae72d8c063666"))
},
"alerts" : [
{
"level" : "INFO",
"count" : NumberInt(1)
},
{
"level" : "WARNING",
"count" : NumberInt(1)
}
],
"sonsArray" : [
{
"k" : "$ref",
"v" : "children"
},
{
"k" : "$id",
"v" : ObjectId("59e72ff0572ae72d8c063666")
}
],
"name" : [
{
"_id" : ObjectId("59e72ff0572ae72d8c063666"),
"name" : "Bob"
}
]
}
If you want to get rid of the fields that where additionally created like sonsArray etc. you can do add a $project pipeline to clean your result.
2. If you have older versions of mongodb and you can change your data structure.
If instead of using a reference like this:
"son" : DBRef("children", ObjectId("59e72ffb572ae72d8c063669"))
you can add the objectId of the son/s as an array like this:
"sonId" : [
ObjectId("59e72ff0572ae72d8c063666")
]
then you can do your aggregation as follows:
db.getCollection('alerts').aggregate(
{
$unwind:"$sonId"
},
{
$group:
{
_id:{
sonId: "$sonId",
level: "$level"
},
count: { $sum: 1 }
}
},
{
$group:
{
_id:{
sonId: "$_id.sonId"
},
alerts: { $addToSet: {
level: "$_id.level",
count: "$count"
}}
}
},
{ $lookup: { from: "children", localField: "_id.sonId", foreignField: "_id", as: "son" } }
)
Is that something you are looking for?

Related

Get document by min size of array in Mongodb

I have mongo collection:
{
"_id" : 123,
"index" : "111",
"students" : [
{
"firstname" : "Mark",
"lastname" : "Smith"),
}
],
}
{
"_id" : 456,
"index" : "222",
"students" : [
{
"firstname" : "Mark",
"lastname" : "Smith"),
}
],
}
{
"_id" : 789,
"index" : "333",
"students" : [
{
"firstname" : "Neil",
"lastname" : "Smith"),
},
{
"firstname" : "Sofia",
"lastname" : "Smith"),
}
],
}
I want to get document that has index that is in the set of the given indexes, for example givenSet = ["111","333"] and has min length of students array.
Result should be the first document with _id:123, because its index is in the givenSet and studentsArrayLength = 1, which is smaller than third.
I need to write custom JSON #Query for Spring Mongo Repository. I am new to Mongo and am stuck a bit with this problem.
I wrote something like this:
#Query("{'index':{$in : ?0}, length:{$size:$students}, $sort:{length:-1}, $limit:1}")
Department getByMinStudentsSize(Set<String> indexes);
And got error: error message '$size needs a number'
Should I just use .count() or something like that?
you should use the aggregation framework for this type of query.
filter the result based on your condition.
add a new field and assign the array size to it.
sort based on the new field.
limit the result.
the solution should look something like this:
db.collection.aggregate([
{
"$match": {
index: {
"$in": [
"111",
"333"
]
}
}
},
{
"$addFields": {
"students_size": {
"$size": "$students"
}
}
},
{
"$sort": {
students_size: 1
}
},
{
"$limit": 1
}
])
working example: https://mongoplayground.net/p/ih4KqGg25i6
You are getting the issue because the second param should be enclosed in curly braces. And second param is projection
#Query("{{'index':{$in : ?0}}, {length:{$size:'$students'}}, $sort:{length:1}, $limit:1}")
Department getByMinStudentsSize(Set<String> indexes);
Below is the mongodb query :
db.collection.aggregate(
[
{
"$match" : {
"index" : {
"$in" : [
"111",
"333"
]
}
}
},
{
"$project" : {
"studentsSize" : {
"$size" : "$students"
},
"students" : 1.0
}
},
{
"$sort" : {
"studentsSize" : 1.0
}
},
{
"$limit" : 1.0
}
],
{
"allowDiskUse" : false
}
);

Mongoose + GraphQL (Apollo Server) Schema

We have db collection which is little complicated. Many of our keys are JSON objects where fields aren't fixed and change based on input given by user on UI. How should we write mongoose and GraphQL Schema for such complex type ?
{
"_id" : ObjectId("5ababb359b3f180012762684"),
"item_type" : "Sample",
"title" : "This is sample title",
"sub_title" : "Sample sub title",
"byline" : "5c6ed39d6ed6def938b71562",
"lede" : "Sample description",
"promoted" : "",
"slug" : [
"myurl"
],
"categories" : [
"Technology"
],
"components" : [
{
"type" : "Slide",
"props" : {
"description" : {
"type" : "",
"props" : {
"value" : "Sample value"
}
},
"subHeader" : {
"type" : "",
"props" : {
"value" : ""
}
},
"ButtonWorld" : {
"type" : "a-button",
"props" : {
"buttonType" : "product",
"urlType" : "Internal Link",
"isServices" : false,
"title" : "Hello World",
"authors" : [
{
"__dataID__" : "Qm9va0F1dGhvcjo1YWJhYjI0YjllNDIxNDAwMTAxMGNkZmY=",
"_id" : null,
"First_Name" : "John",
"Last_Name" : "Doe",
"Display_Name" : "John Doe",
"Slug" : "john-doe",
"Role" : 1
}
],
"isbns" : [
"9781497603424"
],
"image" : "978-cover.jpg",
"price" : "8.99",
"bisacs" : [],
"customCategories" : [],
},
"salePrice" : {
"type" : "",
"props" : {
"value" : ""
}
}
}
},
"tags" : [
{
"id" : "5abab58042e2c90011875801",
"name" : "Tag Test 1"
},
{
"id" : "5abab5831242260011c248f9",
"name" : "Tag Test 2"
},
{
"id" : "592450e0b1be5055278eb5c6",
"name" : "horror",
},
{
"id" : "59244a96b1be5055278e9b0e",
"name" : "Special Report",
"_id" : "59244a96b1be5055278e9b0e"
}
],
"created_at" : ISODate("2018-03-27T21:44:21.412Z"),
"created_by" : ObjectId("591345bda429e90011c1797e")
}
I believe Mongoose have Mixed type but how do i represent such complex type in Apollo GraphQL Server and Mongoose Schema. Also, currently my resolver is just models.product.find(). So if i have such complex type, need to understand what update needs to make to my resolver.
It will be great if i get complete solution for GraphQL Apollo schema, mongoose schema and resolver for my data.
Finally found solution for problem.
You can declare new type and reference it in typeDef for GraphQL Schema.
In mongoose model, you can reference it as {type: Array}

Count Documents Matching Multiple Array Criteria

Schema is:
{
"_id" : ObjectId("594b7e86f59ccd05bb8a90b5"),
"_class" : "com.notification.model.entity.Notification",
"notificationReferenceId" : "7917a5365ba246d1bb3664092c59032a",
"notificationReceivedAt" : ISODate("2017-06-22T08:23:34.382+0000"),
"sendTo" : [
{
"userReferenceId" : "check",
"mediumAndDestination" : [
{
"medium" : "API",
"status" : "UNREAD"
}
]
}
]
}
{
"_id" : ObjectId("594b8045f59ccd076dd86063"),
"_class" : "com.notification.model.entity.Notification",
"notificationReferenceId" : "6990329330294cbc950ef2b38f6d1a4f",
"notificationReceivedAt" : ISODate("2017-06-22T08:31:01.299+0000"),
"sendTo" : [
{
"userReferenceId" : "check",
"mediumAndDestination" : [
{
"medium" : "API",
"status" : "UNREAD"
}
]
}
]
}
{
"_id" : ObjectId("594b813ef59ccd076dd86064"),
"_class" : "com.notification.model.entity.Notification",
"notificationReferenceId" : "3c910cf5fcec42d6bfb78a9baa393efa",
"notificationReceivedAt" : ISODate("2017-06-22T08:35:10.474+0000"),
"sendTo" : [
{
"userReferenceId" : "check",
"mediumAndDestination" : [
{
"medium" : "API",
"status" : "UNREAD"
}
]
},
{
"userReferenceId" : "hello",
"mediumAndDestination" : [
{
"medium" : "API",
"status" : "READ"
}
]
}
]
}
I want to count a user notifications based on statusList which is a List. I used mongoOperations to make a query:
Query query = new Query();
query.addCriteria(Criteria.where("sendTo.userReferenceId").is(userReferenceId)
.andOperator(Criteria.where("sendTo.mediumAndDestination.status").in(statusList)));
long count = mongoOperations.count(query, Notification.class);
I realise I'm doing it wrong because I am getting count as 1 when I query for user with reference ID hello and statusList with single element as UNREAD.
How do I perform an aggregated query on array element?
The query needs $elemMatch in order to actually match "within" the array element that matches both criteria:
Query query = new Query(Criteria.where("sendTo")
.elemMatch(
Criteria.where("userReferenceId").is("hello")
.and("mediumAndDestination.status").is("UNREAD")
));
Which essentially serializes to:
{
"sendTo": {
"$elemMatch": {
"userReferenceId": "hello",
"mediumAndDestination.status": "UNREAD"
}
}
}
Note that in your question there is no such document, the only matching thing with "hello" actually has the "status" of "READ". If I supply those criteria instead:
{
"sendTo": {
"$elemMatch": {
"userReferenceId": "hello",
"mediumAndDestination.status": "READ"
}
}
}
Then I get the last document:
{
"_id" : ObjectId("594b813ef59ccd076dd86064"),
"_class" : "com.notification.model.entity.Notification",
"notificationReferenceId" : "3c910cf5fcec42d6bfb78a9baa393efa",
"notificationReceivedAt" : ISODate("2017-06-22T08:35:10.474Z"),
"sendTo" : [
{
"userReferenceId" : "check",
"mediumAndDestination" : [
{
"medium" : "API",
"status" : "UNREAD"
}
]
},
{
"userReferenceId" : "hello",
"mediumAndDestination" : [
{
"medium" : "API",
"status" : "READ"
}
]
}
]
}
But with "UNREAD" the count is actually 0 for this sample.

Having trouble with a slow MongoDB aggregation query

I have the following aggregation query in MongoDB
return mongoose.model('Submission')
.aggregate([
{ $match: { client: { $in: clientIds }, admin: this._admin._id } },
{ $sort: { client: 1, submitted: -1 } },
{ $group: {
_id: '$client',
lastSubmitted: { $first: '$submitted' },
timezone: { $first: '$timezone' },
} },
])
.exec();
which is performing really badly on a collection with about 2000 documents. It usually takes 5 seconds to complete and I've seen as bad as 15 seconds. I have the following index on the submissions collection:
{
client : 1,
admin : 1,
assessment : 1,
submitted : -1,
}
I'm stuck as to why it's taking so long. Any suggestions?
EDIT
I've run the query
db.submissions.aggregate([
{$match: {
client: {$in: ['54a4cdfdd0666c243035dc98','55cc985291a0ffab6849de34']},
admin: '542b4af8880fc300007eb411'
}},
{$sort: {client:1, submitted: -1}},
{$group: {
_id: '$client',
lastSubmitted: {$first: '$submitted'},
timezone: {$first: '$timezone'}
}}
], {explain: true})
in the shell with explain and got
{
"stages" : [
{
"$cursor" : {
"query" : {
"client" : {
"$in" : [
"54a4cdfdd0666c243035dc98",
"55cc985291a0ffab6849de34"
]
},
"admin" : "542b4af8880fc300007eb411"
},
"fields" : {
"client" : 1,
"submitted" : 1,
"timezone" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "webdemo.submissions",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"admin" : {
"$eq" : "542b4af8880fc300007eb411"
}
},
{
"client" : {
"$in" : [
"54a4cdfdd0666c243035dc98",
"55cc985291a0ffab6849de34"
]
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"client" : 1,
"admin" : 1,
"assessment" : 1,
"submitted" : -1
},
"indexName" : "client_1_admin_1_assessment_1_submitted_-1",
"isMultiKey" : false,
"direction" : "forward",
"indexBounds" : {
"client" : [
"[\"54a4cdfdd0666c243035dc98\", \"54a4cdfdd0666c243035dc98\"]",
"[\"55cc985291a0ffab6849de34\", \"55cc985291a0ffab6849de34\"]"
],
"admin" : [
"[\"542b4af8880fc300007eb411\", \"542b4af8880fc300007eb411\"]"
],
"assessment" : [
"[MinKey, MaxKey]"
],
"submitted" : [
"[MaxKey, MinKey]"
]
}
}
},
"rejectedPlans" : [ ]
}
}
},
{
"$sort" : {
"sortKey" : {
"client" : 1,
"submitted" : -1
}
}
},
{
"$group" : {
"_id" : "$client",
"lastSubmitted" : {
"$first" : "$submitted"
},
"timezone" : {
"$first" : "$timezone"
}
}
}
],
"ok" : 1
}
EDIT 2
The output I get from db.submissions.getIndices() is
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "webdemo.submissions"
},
{
"v" : 1,
"key" : {
"client" : 1,
"admin" : 1,
"assessment" : 1,
"submitted" : -1
},
"name" : "client_1_admin_1_assessment_1_submitted_-1",
"ns" : "webdemo.submissions",
"background" : true
}
]

ElasticSerach - Statistical facets on length of the list

I have the following sample mappipng:
{
"book" : {
"properties" : {
"author" : { "type" : "string" },
"title" : { "type" : "string" },
"reviews" : {
"properties" : {
"url" : { "type" : "string" },
"score" : { "type" : "integer" }
}
},
"chapters" : {
"include_in_root" : 1,
"type" : "nested",
"properties" : {
"name" : { "type" : "string" }
}
}
}
}
}
I would like to get a facet on number of reviews - i.e. length of the "reviews" array.
For instance, verbally spoken results I need are: "100 documents with 10 reviews, 20 documents with 5 reviews, ..."
I'm trying the following statistical facet:
{
"query" : {
"match_all" : {}
},
"facets" : {
"stat1" : {
"statistical" : {"script" : "doc['reviews.score'].values.size()"}
}
}
}
but it keeps failing with:
{
"error" : "SearchPhaseExecutionException[Failed to execute phase [query_fetch], total failure; shardFailures {[mDsNfjLhRIyPObaOcxQo2w][facettest][0]: QueryPhaseExecutionException[[facettest][0]: query[ConstantScore(NotDeleted(cache(org.elasticsearch.index.search.nested.NonNestedDocsFilter#a2a5984b)))],from[0],size[10]: Query Failed [Failed to execute main query]]; nested: PropertyAccessException[[Error: could not access: reviews; in class: org.elasticsearch.search.lookup.DocLookup]
[Near : {... doc[reviews.score].values.size() ....}]
^
[Line: 1, Column: 5]]; }]",
"status" : 500
}
How can I achieve my goal?
ElasticSearch version is 0.19.9.
Here is my sample data:
{
"author" : "Mark Twain",
"title" : "The Adventures of Tom Sawyer",
"reviews" : [
{
"url" : "amazon.com",
"score" : 10
},
{
"url" : "www.barnesandnoble.com",
"score" : 9
}
],
"chapters" : [
{ "name" : "Chapter 1" }, { "name" : "Chapter 2" }
]
}
{
"author" : "Jack London",
"title" : "The Call of the Wild",
"reviews" : [
{
"url" : "amazon.com",
"score" : 8
},
{
"url" : "www.barnesandnoble.com",
"score" : 9
},
{
"url" : "www.books.com",
"score" : 5
}
],
"chapters" : [
{ "name" : "Chapter 1" }, { "name" : "Chapter 2" }
]
}
It looks like you are using curl to execute your query and this curl statement looks like this:
curl localhost:9200/my-index/book -d '{....}'
The problem here is that because you are using apostrophes to wrap the body of the request, you need to escape all apostrophes that it contains. So, you script should become:
{"script" : "doc['\''reviews.score'\''].values.size()"}
or
{"script" : "doc[\"reviews.score"].values.size()"}
The second issue is that from your description it looks like your are looking for a histogram facet or a range facet but not for a statistical facet. So, I would suggest trying something like this:
curl "localhost:9200/test-idx/book/_search?search_type=count&pretty" -d '{
"query" : {
"match_all" : {}
},
"facets" : {
"histo1" : {
"histogram" : {
"key_script" : "doc[\"reviews.score\"].values.size()",
"value_script" : "doc[\"reviews.score\"].values.size()",
"interval" : 1
}
}
}
}'
The third problem is that the script in the facet will be called for every single record in the result list and if you have a lot of results it might take really long time. So, I would suggest indexing an additional field called number_of_reviews that should be populated with the number of reviews by your client. Then your query would simply become:
curl "localhost:9200/test-idx/book/_search?search_type=count&pretty" -d '{
"query" : {
"match_all" : {}
},
"facets" : {
"histo1" : {
"histogram" : {
"field" : "number_of_reviews"
"interval" : 1
}
}
}
}'

Resources