Is it possible using the Elastic Search _count API and having the following abbreviated ES template to find the count of sponsorships for all the campaigns by brandId?
sponsorshipSets and sponsorships are optional so it can be null.
{
"index_patterns": "campaigns*",
"order": 4,
"version": 4,
"aliases": {
"campaigns": {
}
},
"settings": {
"number_of_shards": 5
},
"mappings": {
"dynamic": "false",
"properties": {
"brandId": {
"type": "keyword"
},
"sponsorshipSets": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"sponsorships": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
}
}
}
}
}
}
filter aggregation can be used to fetch docs with certain brand Id. Two Nested aggregations to point to sponsorship and value_count aggregation to get the count.
Query
{
"aggs": {
"selected_brand": {
"filter": {
"term": {
"brandId": "1"
}
}
},
"sponsorshipSets": {
"nested": {
"path": "sponsorshipSets"
},
"aggs": {
"sponsorships": {
"nested": {
"path": "sponsorshipSets.sponsorships"
},
"aggs": {
"count": {
"value_count": {
"field": "sponsorshipSets.sponsorships.id"
}
}
}
}
}
}
}
}
I found a solution without using Aggregations, it seems more accurate from the above and I can use the _count API.
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sponsorshipSets.sponsorships",
"query": {
"bool": {
"filter": {
"exists": {
"field": "sponsorshipSets.sponsorships"
}
}
}
}
}
},
{
"term": {
"brandId": "b1d28821-3730-4266-8f55-eb69596004fb"
}
}
]
}
}
}
Related
I have an ES index with this mapping:
{
"_doc": {
"dynamic": "false",
"properties": {
"original": {
"properties":{
"id": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
},
"marketCode": {
"type": "keyword"
},
"salesProfiles": {
"type": "nested",
"properties": {
"marketCode": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
}
}
}
}
},
"recommended": {
"properties":{
"id": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
},
"marketCode": {
"type": "keyword"
},
"salesProfiles": {
"type": "nested",
"properties": {
"marketCode": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
}
}
}
}
},
"distance": {
"type": "double"
},
"rank": {
"type": "double"
},
"source": {
"properties": {
"application": {
"type": "keyword"
},
"platform": {
"type": "keyword"
}
}
},
"timestamp": {
"properties": {
"createdAt": {
"type": "date"
},
"updatedAt": {
"type": "date"
}
}
}
}
},
"_default_": {
"dynamic": "false"
}
}
and I need to obtain the recommended docs with salesProfiles.marketCode equal to original.marketCode but my query doesn't return any buckets:
GET index/_search
{
"aggs": {
"similarities": {
"filter": {
"bool": {
"must": [
{
"term": {
"original.storefrontId": "12345"
}
},
{
"nested": {
"path": "recommended.salesProfiles",
"query": {
"bool": {
"must": [
{
"match": {
"recommended.salesProfiles.purchaseStatus": "PAID"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"markets": {
"nested": {
"path": "recommended.salesProfiles"
},
"aggs": {
"recommendedMarket": {
"terms": {
"field": "recommended.salesProfiles.marketCode",
"size": 100
}
}
}
}
}
}
},
"explain": false
}
Any suggestion would be really appreciated. Thanks in advance!
Its hard to debug this without any example docs, but I think this might work
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"original.storefrontId": "12345"
}
},
{
"nested": {
"path": "recommended.salesProfiles",
"query": {
"bool": {
"must": [
{
"match": {
"recommended.salesProfiles.purchaseStatus": "PAID"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"Profiles": {
"nested": {
"path": "recommended.salesProfiles"
},
"aggs": {
"by_term": {
"terms": {
"field": "recommended.salesProfiles.marketCode",
"size": 100
}
}
}
}
}
}
I don't think you can use "nested" under the filter agg without being under a nested aggregation, so I believe that's why you didn't get any docs.
I basically moved all the filtering to the query and just aggregated the terms later
I'm try to extract aggregated data, but I'm a little lost when I want to further filter a set of documents. Getting the color seems ok, but when I want to aggregate the categories with some colors filter the query fail. What am I doing wrong on this query?
This is the query I already have:
GET/my_index/_search
{
"_source": false,
"aggs": {
"global": {
"global": {
},
"aggs": {
"all_products": {
"nested": {
"path": "simple"
},
"aggs": {
"filter_top": {
"filter": {
"bool": {
"must": [
{
"match": {
"simple.compound_words": {
"query": "tisch",
"operator": "AND"
}
}
}
]
}
},
"aggs": {
"filter_merged": {
"aggs": {
"filter": {
"bool": {
"must": [
{
"terms": {
"simple.filter_color": [
"green",
"red"
]
}
}
]
}
},
"aggs": {
"filter_category": {
"terms": {
"field": "simple.filter_category"
}
}
}
}
},
"filter_color": {
"terms": {
"field": "simple.filter_color"
}
}
}
}
}
}
}
}
}
}
This is the relevant part of the index mappings.
{
"my_index": {
"mappings": {
"_doc": {
"properties": {
"simple": {
"type": "nested",
"properties": {
"compound_words": {
"type": "text",
"analyzer": "GermanCompoundWordsAnalyzer"
},
"filter_category": {
"type": "keyword"
},
"filter_color": {
"type": "keyword"
}
}
}
}
}
}
}
}
Thanks for your support.
I have an document with the following mappings:
{
"some_doc_name": {
"mappings": {
"_doc": {
"properties": {
"stages": {
"properties": {
"name": {
"type": "text"
},
"durationMillis": {
"type": "long"
}
}
}
}
}
}
}
}
And I would like to have an aggregation like: "The average duration of the stages which name contains the SCM token"
I tried something like:
{
"aggs": {
"scm_stage": {
"filter": {
"bool": {
"should": [{
"match_phrase": {
"stages.name": "SCM"
}
}]
}
},
"aggs" : {
"avg_duration": {
"avg": {
"field": "stages.durationMillis"
}
}
}
}
}
}
But that's giving me the average of all stages for all documents that contain at least one stage with the SCM token. Any advice on how to get this aggregation right?
Answering my own question thanks to the help of val
My mappings file was missing the "type": "nested", something like:
...
"stages": {
"type": "nested",
"properties": {
"id": {
"type": "keyword",
"ignore_above": 256
},
...
Then I can get my aggregation working with something like this:
{
"size": 0,
"query": {
"nested": {
"path": "stages",
"query": {
"match": {
"stages.name": "scm"
}
}
}
},
"aggs": {
"stages": {
"nested": {
"path": "stages"
},
"aggs": {
"stages-filter": {
"filter": {
"terms": {
"stages.name": [
"scm"
]
}
},
"aggs": {
"avg_duration": {
"avg": {
"field": "stages.durationMillis"
}
}
}
}
}
}
}
}
I use ElasticSearch version 1.7.5 and I am trying to fetch all documents where missing some fields.
My mapping:
...
"participant": {
"properties": {
"id": {
"type": "string"
},
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
},
"name": {
"type": "string"
}
},
"coordinator": {
"properties": {
"id": {
"type": "string"
},
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
},
"name": {
"type": "string"
}
}
...
I want to query all documents that don't have assigned coordinator.id or participant.id yet.
My query looks like:
"query": {
"nested": {
"path": "coordinator, participant",
"query": {
"constant_score": {
"filter": {
"or": [
{
"missing": {
"field": "coordinator.id"
}
},
{
"missing": {
"field": "participant.id"
}
},
]
}
}
}
}
}
You do OR queries via the bool query:
https://www.elastic.co/guide/en/elasticsearch/reference/1.7/query-dsl-bool-filter.html
So this query would work:
{
"query": {
"bool": {
"should": [
{
"constant_score": {
"filter": {
"missing": {
"field": "participant.id"
}
}
}
},
{
"constant_score": {
"filter": {
"missing": {
"field": "coordinator.id"
}
}
}
}
]
}
}
}
I noticed that you were using a nested query though the mapping does not state that coordinator and participant are nested field types so that will not work:
https://www.elastic.co/guide/en/elasticsearch/reference/1.7/mapping-nested-type.html
Setting something as a nested type is only useful when you need to group search terms together so I don't think it is necessary for you.
I have an issue with aggregations of nested documents on ElasticSearch 5.6.3.
My query is structured in the following way:
query
aggs
|_filter
|_nested
|_term
|_top-hits
If I try the aggregation on a non-nested field (and with the nested agg removed of course), everything works as expected. But as it is structured now, I receive an exception from Lucene:
Child query must not match same docs with parent filter. Combine them as must clauses (+) to find a problem doc. docId=2147483647, class org.apache.lucene.search.ConstantScoreScorer
This exception is not triggered on ElasticSearch 2.4.6.
I tried to structure the aggregations in a different way, but I couldn't come up with a combination that works and delivers the wanted results.
This is how the mapping looks like:
"recording": {
"dynamic": "strict",
"_all" : {
"enabled" : false
},
"properties": {
"id": {
"type": "integer"
},
"soloists": {
"properties": {
"type": "nested",
"person": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
},
"work": {
"id": {
"type": integer
},
"title": {
"type": "string",
"index": "not_analyzed"
}
}
}
And the query itself:
{
"query": {},
"aggs": {
"my_top_results": {
"global": {},
"aggs": {
"my_filter_agg": {
"filter": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"nested": {
"path": "soloists",
"query": {
"bool": {
"must": {
"match": {
"soloists.person.id": 77957
}
}
}
}
}
}
]
}
}
]
}
},
"aggs": {
"my_nested_agg": {
"nested": {
"path": "soloists"
},
"aggs": {
"my_terms_agg": {
"term": {
"field": "soloists.person.id",
"size": 10
}
"aggs": {
"my_top_hits_agg": {
"size": 1,
"_source": {
"include": [
"soloists.person.id",
"soloists.person.name"
]
}
}
}
}
}
}
}
}
}
}
}
}
Any help would be highly appreciated.
Some links I stumbled across while looking for a solution:
https://issues.apache.org/jira/browse/LUCENE-7674
https://discuss.elastic.co/t/querying-on-a-subobject-field-within-a-nested-object/65533
https://github.com/elastic/elasticsearch/issues/23280
https://github.com/elastic/elasticsearch/issues/11749
There are some typos in your mapping and queries:
Here are some fixed command which does not trigger any error when used on a instance of Elasticsearch 5.6.3.
You can copy and paste either in Kibana or in a Linux terminal (in which case you should edit the first line) and test them on your Elasticsearch instance.
HOST=10.225.0.2:9200
curl -XPUT "http://$HOST/an_index"
curl -XPUT "http://$HOST/an_index/recording/_mapping" -H 'Content-Type: application/json' -d'
{
"dynamic": "strict",
"_all": {
"enabled": false
},
"properties": {
"id": {
"type": "integer"
},
"soloists": {
"type": "nested",
"properties": {
"person": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"work": {
"properties": {
"id": {
"type": "integer"
},
"title": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}'
curl -XPOST "http://$HOST/an_index/recording/1" -H 'Content-Type: application/json' -d'
{
"id": 0,
"soloists": [
{
"person": {
"id": 77957,
"name": "John doe"
}
},
{
"person": {
"id": 1,
"name": "Jane smith"
}
}
],
"work": {
"id": 0,
"title": "Test"
}
}'
curl -XGET "http://$HOST/an_index/recording/_search?pretty" -H 'Content-Type: application/json' -d'
{
"size": 0,
"aggs": {
"my_top_results": {
"global": {},
"aggs": {
"my_filter_agg": {
"filter": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"nested": {
"path": "soloists",
"query": {
"bool": {
"must": {
"match": {
"soloists.person.id": 77957
}
}
}
}
}
}
]
}
}
]
}
},
"aggs": {
"my_nested_agg": {
"nested": {
"path": "soloists"
},
"aggs": {
"my_terms_agg": {
"terms": {
"field": "soloists.person.id",
"size": 10
},
"aggs": {
"my_top_hits_agg": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"soloists.person.id",
"soloists.person.name"
]
}
}
}
}
}
}
}
}
}
}
}
}
}'
If those queries work but not when applied to your index, could you please update your question with the output of curl -XGET "http://$HOST/your_index_name" so that we can check the exact settings and mapping of your index? Such an error may be cause by conflict between type on a same index. I'll update my answer accordingly.