Can i filter subarray in Elasticsearch? - elasticsearch

I have orders and order products attached for each order as subarray in Elastic Search. When i'm aggregating Prices i need possibility to filter my order products in my documents of orders.
Example of my document in Elastic:
{
"OrderID":4567488,
"projectId":"4",
"Project":"direkt",
"legacy_id":null,
"supporterId":null,
"Origin":"FR",
"orderProducts":[
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"30",
"Price":"26.95",
},
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"15",
"Price":"15.22",
},
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"123",
"Price":"24.55",
},
]
}
How im filter right now:
{
"index":"order_index",
"from":0,
"size":100,
"body":{
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{
"term":{
"orderProducts.brandNo":"30"
}
}
],
}
}
}
}
}
}
What i'm expecting
{
"OrderID":4567488,
"projectId":"4",
"Project":"direkt",
"legacy_id":null,
"supporterId":null,
"Origin":"FR",
"orderProducts":[
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"30",
"Price":"26.95",
},
]
}
What i'm really getting:
All document.
That is possible? To filter subarray data?
UPD.
Yes this is my schema mappings:
"mappings":{
"order":{
"dynamic_templates":[
{
"strings":{
"mapping":{
"type":"string",
"fields":{
"raw":{
"index":"not_analyzed",
"type":"string"
}
}
},
"match_mapping_type":"string"
}
}
],
"properties":{
"orderProducts":{
"include_in_parent":true,
"properties":{
"OrderProductID":{
"type":"long"
},
"OrderID":{
"type":"long"
},
"brandNo":{
"type":"long"
},
"Price":{
"type":"double"
}
},
"type":"nested"
},
"OrderID":{
"type":"long"
}
}
}
},

All right, after some experiments i discovered that that aggregation can be done like this:
{
"aggs":{
"sales":{
"nested":{
"path":"orderProducts"
},
"aggs":{
"filtered_nestedobjects":{
"filter":{
"bool":{
"must":[
{
"terms":{
"orderProducts.brandNo":[
"30"
]
}
}
]
}
},
"aggs":{
"Quantity":{
"sum":{
"field":"orderProducts.Quantity"
}
}
}
}
}
}
}
}
And the answer to main question can we filter subarray of elastic is yes. With the inner_hits only i did this.

Related

How do I sort using the best matching nested field or a default in Elasticsearch?

I have a bunch of documents that look like this in my index:
{
"given_name":"John",
"family_name":"Smith",
"email_addresses": [
{
"email_address":"john#gmail.com",
"primary":true
},
{
"email_address":"j.smith#gmail.com",
"primary":false
},
{
"email_address":"jpsmith#gmail.com",
"primary":false
},
{
"email_address":"johnsmith111#gmail.com",
"primary":false
}
]
}
The mapping looks like this:
{
"mappings":{
"properties":{
"given_name":{
"type":"keyword",
"fields":{
"search":{
"type":"search_as_you_type"
}
}
},
"family_name":{
"type":"keyword",
"fields":{
"search":{
"type":"search_as_you_type"
}
}
},
"email_addresses":{
"type":"nested",
"properties":{
"email_address":{
"type":"keyword",
"fields":{
"search":{
"type":"search_as_you_type"
}
}
},
"primary":{
"type":"boolean"
}
}
}
}
}
}
I am running a prefix search on given_name, family_name and email_addresses. This will allow the user to start typing and relevant results from those fields should start returning:
{
"query":{
"bool":{
"should":[
{
"nested":{
"path":"email_addresses",
"query":{
"prefix":{
"email_addresses.email_address.search": {
"value":"j"
}
}
}
}
},
{
"multi_match":{
"query":"j",
"fields":[
"given_name.search",
"family_name.search"
],
"type": "bool_prefix"
}
}
]
}
}
}
I'd like to sort the results from the above by the best matching email_address in email_addresses if there is one or more matching email_address under email_addresses, otherwise to use the email_address under email_addresses where primary is true.
I have looked into a script for sorting, but I didn't find anyway to access the matched nested child in a script in the documentation.
Is there anyway to achieve this?
To do this, we can use a bool query in the nested sort.
Given we have the following 4 documents:
{
"given_name":"John",
"family_name":"Smith1",
"email_addresses": [
{
"email_address":"someguy50#example.com",
"primary":true
},
{
"email_address":"someguy51#example.com",
"primary":false
},
{
"email_address":"someguy52#gmail.com",
"primary":false
},
{
"email_address":"someguy53gmail.com",
"primary":false
}
]
}
{
"given_name":"John",
"family_name":"Smith2",
"email_addresses": [
{
"email_address":"someguy54#example.com",
"primary":true
},
{
"email_address":"johnsmith#example.com",
"primary":false
},
{
"email_address":"someguy55#gmail.com",
"primary":false
},
{
"email_address":"someguy56gmail.com",
"primary":false
}
]
}
{
"given_name":"John",
"family_name":"Smith3",
"email_addresses": [
{
"email_address":"someguy49#example.com",
"primary":true
},
{
"email_address":"someguy47#example.com",
"primary":false
},
{
"email_address":"someguy48#gmail.com",
"primary":false
},
{
"email_address":"someguy46gmail.com",
"primary":false
}
]
}
{
"given_name":"John",
"family_name":"Smith4",
"email_addresses": [
{
"email_address":"someguy45#example.com",
"primary":true
},
{
"email_address":"someguy44#example.com",
"primary":false
},
{
"email_address":"someguy43#gmail.com",
"primary":false
},
{
"email_address":"someguy42gmail.com",
"primary":false
}
]
}
We can write our query like so:
{
"query":{
"bool":{
"should":[
{
"nested":{
"path":"email_addresses",
"query":{
"prefix":{
"email_addresses.email_address.search":{
"value":"john"
}
}
}
}
},
{
"multi_match":{
"query":"john",
"fields":[
"given_name.search",
"family_name.search"
],
"type":"bool_prefix"
}
}
]
}
},
"sort":[
{
"email_addresses.email_address":{
"order" : "asc",
"nested":{
"path":"email_addresses",
"filter":{
"bool":{
"should":[
{
"prefix":{
"email_addresses.email_address.search":{
"value":"john"
}
}
},
{
"term":{
"email_addresses.primary": true
}
}
]
}
}
}
}
}
]
}
First we do a prefix search on the email_addresses.email_address, given_name and family_name.
Then we sort on the nested email_addresses field as follows:
Sort by the email_addresses.email_address that matches our query.
Sort by email_address.primary = true.
The way this works is that in the bool query, Elasticsearch will first find documents that matches the first query under should and sort those documents. For the remaining documents that do not match, it will proceed to the next query, which in our case is email_address.primary = true. If there are more documents that do not match either of these queries, they will be ordered using an order predetermined by Elasticsearch.

Elasticsearch Aggregations: filtering a global aggregation with nested queries

I have 'nested' mapping like so:
"stringAttributes":{
"type":"nested",
"properties":{
"Name":{
"type":"keyword"
},
"Value":{
"type":"keyword"
}
}
},
and thus have docs that such as:
stringAttributes:[
{
Name:"supplier",
Value:"boohoo"
},
{
Name:"brand",
Value:"gucci"
},
{
Name:"primaryColour",
Value:"black"
},
{
Name:"secondaryColour",
Value:"green"
},
{
Name:"size",
Value:"12"
}
]
In building faceted search I believe I need a global aggregation. I.e. when a supplier is filtered by a user, the result set will not contains docs from other suppliers, so the regular aggregation will not contain any of the other supplier.
The query could include the following clauses:
"must": [
{
"nested": {
"path": "stringAttributes",
"query": {
"bool": {
"must": [
{
"term": {
"stringAttributes.Name": "supplier"
}
},
{
"terms": {
"stringAttributes.Value": [
"boohoo"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "stringAttributes",
"query": {
"bool": {
"must": [
{
"term": {
"stringAttributes.Name": "brand"
}
},
{
"terms": {
"stringAttributes.Value": [
"warehouse"
]
}
}
]
}
}
}
}
]
So in this case I need a global aggregation that is then filtered by all OTHER filters applied (e.g. by brand) that will return the other suppliers that could be selected given these other filters.
This is what I have so far. It returns the 'global' unfiltered results however. At this point I am completely stumped.
{
"global":{},
"aggs":{
"inner":{
"filter":{
"nested":{
"query":{
"bool":{
"filter":[
{
"term":{
"stringAttributes.Name":{
"value":"brand"
}
}
},
{
"terms":{
"stringAttributes.Value":[
"warehouse"
]
}
}
]
}
},
"path":"stringAttributes"
}
}
},
"aggs":{
"nested":{
"path":"stringAttributes"
},
"aggs":{
"aggs":{
"filter":{
"match":{
"stringAttributes.Name":"supplier"
}
},
"aggs":{
"facet_value":{
"terms":{
"size":1000,
"field":"stringAttributes.Value"
}
}
}
}
}
}
}
}
Any suggestions for filtering a global aggregation with nested attributes? I have read through a lot of documentation of various other answers on SO but still struggling to understand why this particular agg is not being filtered.
My suggested answer after some more digging...
{
"global":{
},
"aggs":{
"inner":{
"filter":{
"nested":{
"query":{
"bool":{
"filter":[
{
"term":{
"stringAttributes.Name":{
"value":"brand"
}
}
},
{
"terms":{
"stringAttributes.Value":[
"warehouse"
]
}
}
]
}
},
"path":"stringAttributes"
}
},
"aggs":{
"nested":{
"path":"stringAttributes"
},
"aggs":{
"agg_filtered_special":{
"filter":{
"match":{
"stringAttributes.Name":"supplier"
}
},
"aggs":{
"facet_value":{
"terms":{
"size":1000,
"field":"stringAttributes.Value"
}
}
}
}
}
}
}
}
}

ElasticSearch Query for range OR missing in array

I'm trying to create a query that will query the below mapping for values that have a "period" that either matches a specific date, or is missing it's value (with a null value). Please note that I am working with a third-party database, so I cannot change the mappings. Bear with me if the example data and mappings are large.. I've tried to cut everything nonessential away.
{
"EXAMPLE":{
"mappings":{
"company":{
"properties":{
"CompanyData":{
"properties":{
"participantRelations":{
"type":"nested",
"include_in_parent":true,
"properties":{
"participant":{
"type":"nested",
"include_in_parent":true,
"properties":{
"unitNumber":{
"type":"long"
}
},
"organizations":{
"properties":{
"memberData":{
"type":"nested",
"include_in_parent":true,
"properties":{
"attributes":{
"properties":{
"values":{
"properties":{
"period":{
"properties":{
"validFrom":{
"type":"date",
"format":"dateOptionalTime"
},
"validTo":{
"type":"date",
"format":"dateOptionalTime"
}
}
}
"value":{
"type":"string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
Here is some example data. I've translated it from the (Danish) source it comes from, so any slight missspellings etc are just misstypes.
{
"company": {
"companyData":{
"participantRelations":[
{
"participant":{
"unitNumber":4003857309
},
"organizations":[
{
"memberData":[
{
"attributtes":[
{
"values":[
{
"value":"chairman",
"period":{
"validFrom":"2014-10-01",
"validTo":"2016-08-11"
}
}
]
},
{
"values":[
{
"value":"generalassembly",
"period":{
"validFrom":"2014-10-01",
"validTo":"2016-08-11"
}
}
]
}
]
}
]
},
{
"memberData":[
{
"attributes":[
{
"values":[
{
"value":"chairman",
"period":{
"validFrom":"2016-08-16",
"validTo":"2017-06-08"
}
},
{
"value":"boardmember",
"period":{
"validFrom":"2017-06-09",
"validTo":null
}
}
]
},
{
"values":[
{
"value":"generalassembly",
"period":{
"validFrom":"2016-08-16",
"validTo":"2017-06-08"
}
},
{
"value":"generalassembly",
"period":{
"validFrom":"2017-06-09",
"validTo":null
}
}
]
}
]
}
]
}
]
}
]
}
}
}
What I want to do is something like the query below, which doesn't quite work as it has cases it cannot handle for reasons I do not know. What it needs to do is look for any company.participantRelations.organizations.memberData.attributes.values.period.validTo over a certain date, OR if the date is null. Now I know nulls are funky in ES, but I know that the date properties will always be there, but the validTo will be set to null if there is no date yet.
Furthermore, it needs to be nested on organizations as well, as I need a specific unitNumber to be present.
{
"query":{
"nested":{
"filter":{
"bool":{
"must":[
{
"nested":{
"filter":{
"bool":{
"must":[
{
"bool":{
"should":[
{
"range":{
"company.companyData.participantRelations.organizations.memberData.attributtes.values.period.validTo":{
"gte":"2017-08-14T15:23:11.011"
}
}
},
{
"missing":{
"field":"company.companyData.participantRelations.organizations.memberData.attributtes.values.period.validTo"
}
}
]
}
}
]
}
},
"path":"company.companyData.participantRelations.organizations.memberData"
}
},
{
"term":{
"company..companyData.participantRelations.participant.unitNumber":4003857309
}
}
]
}
},
"path":"company.companyData.participantRelations"
}
}
}
This query works in two cases:
Where there is only one entry in the list of values, and it's validTo date is null
Where the validTo date is greater or equal to my date limit.
It does not seem to work if there are two entries, the first of which has a date that is earlier than my limit, and the second entry has a null value (as in the example).
I realize this is kind of convoluted, but with the database I'm querying that is just the way it is. I hope I've simplified it enough for you to get my issue.
Thanks in advance.

How to filter top terms aggregation in ElasticSearch?

I have orders documents like this:
{
"customer":{
"id":1,
"Name":"Foobar"
},
"products":[
{
"id":1,
"name":"Television",
"category": 11
},
{
"id":2,
"name":"Smartphone",
"category": 12
}
]
}
And I am performing a top_terms_aggregation in order to know the products best sellers. To do it globally, I use:
{
"size":0,
"aggs":{
"products":{
"nested":{
"path":"products"
},
"aggs":{
"top_terms_aggregation":{
"terms":{
"field":"products.id",
"size":10
}
}
}
}
}
}
But, how would I filter the products given a category_id? I tried adding this filter:
"query":{
"bool":{
"must":[
{
"match":{
"products.category":11
}
}
]
}
}
But this filters the orders itself that has some product with the given category, and the aggregation gets corrupted.
I want to get the best sellers products that belongs to a given category.
Solved this way:
{
"size":0,
"aggs":{
"products":{
"nested":{
"path":"products"
},
"aggs":{
"first_filter":{
"filter":{
"term":{
"products.category":11
}
},
"aggs":{
"top_terms_aggregation":{
"terms":{
"field":"products.id",
"size":10
}
}
}
}
}
}
}
}
Must be this exact sequence of aggs or "stranger things" happens.
You may use filter aggregation
GET _search
{
"size":0,
"aggs":{
"products":{
"nested":{
"path":"products"
},
"filter": {
"term": {
"category": 11
}
},
"aggs":{
"top_terms_aggregation":{
"terms":{
"field":"products.id",
"size":10
}
}
}
}
}
}

Filtering nested aggregations in ElasticSearch

Given the following mapping from my index (Items):
{
"title": {
"type":"string"
},
"tag_groups": {
"type":"nested",
"include_in_parent":true,
"properties": {
"name":{
"type":"string",
"index":"not_analyzed"
},
"terms": {
"type":"string",
"index":"not_analyzed"
}
}
}
}
And the following sample of data that each document in the index follows:
{
"title":"Christian Louboutin Magenta Leather Lady Peep",
"tag_groups": [
{
"name": "Color",
"terms": ["pink"]
},
{
"name":"Material/Fabric",
"terms":["leather"]
},
{
"name":"Season",
"terms":["summer", "spring"]
},
{
"name":"Occasion",
"terms":["cocktail", "night out", "wedding: for the guests", "date night"]
}
],
}
IMPORTANT: These tag_groups are variable from product to product and category to category. So pulling them out of the nested property would be tough since it would create index properties that don't apply to all documents in the index.
Here is my query that is producing the correct aggregated results across each tag_groups.name and corresponding set of values. Counts are accurate too.
{
"size":"40",
"query": {
"filtered": {
"query": {"match_all": {}}
}
},
"aggs":{
"tagGroupAgg": {
"nested": {
"path":"tag_groups"
},
"aggs":{
"tagGroupNameAgg":{
"terms":{
"field":"tag_groups.name"
},
"aggs":{
"tagGroupTermsAgg":{
"terms": {
"field":"tag_groups.terms"
}
}
}
}
}
}
}
}
NOW FOR THE QUESTION...
In order for the aggregation counts on the left to reflect accurately, when I apply a TermsFilter to the aggregation (tag_groups.Color = ['pink']), how do I make sure that aggregation filter isn't applied to the tag_groups.Color result?
Currently, when I apply that filter I am losing all of my tag_groups.Colors (except for pink) preventing the user from search other colors...
I'm hitting a wall on this one. Any help would be much appreciated!
{
"size":"40",
"query":{
"filtered":{
"query":{
"match_all":{
}
}
}
},
"aggs":{
"tagGroupAgg":{
"nested":{
"path":"tag_groups"
},
"aggs":{
"tagGroupNameAgg":{
"terms":{
"field":"tag_groups.name"
},
"aggs":{
"tagGroupTermsAgg":{
"terms":{
"field":"tag_groups.terms"
},
"aggs":{
"tagGroupTermsReverseAgg":{
"reverse_nested":{
},
"aggs":{
"testingReverseFilter":{
"filter":{
"bool":{
"must":[
{
"terms":{
"tag_groups.name":[
"Color"
]
}
},
{
"terms":{
"brand_name.raw":[
"Chanel"
]
}
}
]
}
},
"aggs":{
"tagGroupTermsAgg2":{
"terms":{
"field":"tag_groups.terms"
}
}
}
}
}
}
}
}
}
}
}
}
}
}

Resources