Elasticsearch term aggregation document count issue - elasticsearch

This the request I'm sending to ElasticSearch:
{
"aggregations":{
"followUpActivity.metainfo.metainfos.string1":{
"terms":{
"field":"metainfos.string1",
"missing":"null",
"order":{
"_count":"asc"
}
}
}
}
}
I'm asking for buckets on field metainfos.string1 and ordering them by _count. This is the response:
{
"took":7,
"timed_out":false,
"_shards":{
"total":1,
"successful":1,
"failed":0
},
"hits":{
"total":3,
"max_score":1.0,
"hits":[
{
"_index":"living_v1",
"_type":"fuas",
"_id":"be9b29f3-37a5-11e6-a66a-30b5c2122322",
"_score":1.0,
"_routing":"living_team",
"_source":{
"user":"living_team",
"timestamp":"2016-06-22T11:27:25.531Z",
"metainfos":{
"string1":[
"s1", <<<<<<<<<<<<<--------------
"s2" <<<<<<<<<<<<<--------------
]
}
}
},
{
"_index":"living_v1",
"_type":"fuas",
"_id":"c3af0f64-37a5-11e6-a66a-30b5c2122322",
"_score":1.0,
"_routing":"living_team",
"_source":{
"user":"living_team",
"timestamp":"2016-06-22T12:30:01.625Z",
"metainfos":{
"string1":[
"s1", <<<<<<<<<<<<<--------------
"s2" <<<<<<<<<<<<<--------------
]
}
}
},
{
"_index":"living_v1",
"_type":"fuas",
"_id":"ee790469-48f3-11e6-9f47-30b5c2122322",
"_score":1.0,
"_routing":"living_team",
"_source":{
"user":"living_team",
"timestamp":"2016-07-13T13:33:41.231Z",
"metainfos":{
"string1":[
"s2" <<<<<<<<<<<<<--------------
]
}
}
}
]
},
"aggregations":{
"followUpActivity.metainfo.metainfos.string1":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":"s2",
"doc_count":2 <<<<<<<<<<<<<--------------
},
{
"key":"s1",
"doc_count":3 <<<<<<<<<<<<<--------------
}
]
}
}
}
As you can see, there are two buckets: s1 and s2. However, s1 is present only in two documents but ES is telling me that doc_count = 3!!! Moreover, s2 is present in three documents, but ES is telling me that doc_count = 2!!!!
I'm performing it using only one node.
Any ideas?
MAPPING
{
"living_v1":{
"mappings":{
"fuas":{
"properties":{
"metainfos":{
"properties":{
"string1":{
"type":"string"
}
}
},
"timestamp":{
"type":"date",
"format":"strict_date_optional_time||epoch_millis"
},
"user":{
"type":"string",
"index":"not_analyzed"
}
}
}
}
}
}

Related

Elasticsearch: When doing an "inner_hit" on nested documents, return all fields of matched offset in the hierarchy

Mapping for document:
{
"mappings": {
"properties": {
"client_classes": {
"type": "nested",
"properties": {
"members": {
"type": "nested",
"properties": {
"phone_nos": {
"type": "nested"
}
}
}
}
}
}
}
}
Data in Document:
{
"client_name":"client1",
"client_classes":[
{
"class_name":"class1",
"members":[
{
"name":"name1",
"phone_nos":[
{
"ext":"91",
"number":"99119XXXX"
},
{
"ext":"04",
"number":"99885XXXX"
}
]
},
{
"name":"name2",
"phone_nos":[
{
"ext":"03",
"number":"99887XXXX"
}
]
}
]
}
]
}
I query for "number" with value "99119XXXX"
{
"query":{
"nested":{
"path":"client_classes.members.phone_nos",
"query":{
"match":{
"client_classes.members.phone_nos.number":"99119XXXX"
}
},
"inner_hits":{}
}
}
}
Result from inner hits:
"inner_hits":{
"client_classes.members.phone_nos":{
"hits":{
"total":{
"value":1,
"relation":"eq"
},
"max_score":0.9808291,
"hits":[
{
"_index":"clients",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"client_classes",
"offset":0,
"_nested":{
"field":"members",
"offset":0,
"_nested":{
"field":"phone_nos",
"offset":0
}
}
},
"_score":0.9808291,
"_source":{
"ext":"91",
"number":"99119XXXX"
}
}
]
}
}
}
I get the desired matched result hierarchy of all the nested objects, in the inner hit, but I only receive the "offset" value and "field" from these objects. I need the full object of the corresponding offset.
Something like this:
{
"client_name":"client1",
"client_classes":[
{
"class_name":"class1",
"members":[
{
"name":"name1",
"phone_nos":[
{
"ext":"91",
"number":"99119XXXX"
}
]
}
]
}
]
}
I understand that with inner_hit I also get the complete root document, from where I can use the offset values from the innerhit object. But fetching the entire root document could be expensive for our memory, so I only need the result I have shared above.
Is there any such possibility as of now?
I am using elasticsearch 7.7
UPDATE: Added Mapping, result and a slight fix in document
Yes, just add "_source": false at the top-level and you'll only get the nested inner hits
{
"_source": false, <--- add this
"query":{
"nested":{
"path":"client_classes.members.phone_nos",
"query":{
"match":{
"client_classes.members.phone_nos.number":"99119XXXX"
}
},
"inner_hits":{}
}
}
}

Nested query on ElasticSearch for Long type (ES 5.0.4)

This is my first question on Stack overflow , please excuse me for the mistakes. I will improve on them in the future.
I am new to Elastic Search too. Okay so I am trying to do a exact match in elastic search (5.0.4). Instead of doing an exact match, the request returns all the documents present.
Not sure of this behavior.
Here is the mapping
{
"properties":{
"debug_urls":{
"properties":{
"characteristics":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"url_id":{
"type":"long"
}
},
"type":"nested"
},
"scanId":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
}
}
}
This is my request.
{
"query": {
"nested": {
"path": "debug_urls",
"query": {
"match": {
"debug_urls.url_id": 1
}
}
}
}
}
The response received,
{
"took":1,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"failed":0
},
"hits":{
"total":1,
"max_score":1,
"hits":[
{
"_index":"cust_cca39c0c6c8141008e9411032bbf4d21",
"_type":"debug-urls",
"_id":"AW70h0l72s9qXitMsWgC",
"_score":1,
"_source":{
"scan_id":"n_a0a523fb5c81435fb79c34c624c7fbd6",
"debug_urls":[
{
"url_id":1,
"characteristics":[
"FORM",
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
},
{
"url_id":2,
"characteristics":[
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
},
{
"url_id":3,
"characteristics":[
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
},
{
"url_id":4,
"characteristics":[
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
},
{
"url_id":5,
"characteristics":[
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
},
{
"url_id":6,
"characteristics":[
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
},
{
"url_id":7,
"characteristics":[
"EXTERNAL_SCRIPT",
"INLINE_SCRIPT"
]
}
]
}
}
]
}
}
If you only want to see the nested documents that match the criteria, you can leverage nested inner_hits:
{
"_source":["scan_id"], <--- add this line
"query": {
"nested": {
"path": "debug_urls",
"query": {
"match": {
"debug_urls.url_id": 1
}
},
"inner_hits": {} <--- add this line
}
}
}

ElasticSearch source filtering array of objects

Here is a document
{
"Id": "1",
"Name": "Thing",
"Prices": [
{"CompanyId": "1", "Price": "11.11"},
{"CompanyId": "2", "Price": "12.12"},
{"CompanyId": "3", "Price": "13.13"}
And here is the associated ElasticSearch schema:
"Prices" : {
"type" : "nested",
"properties" : {
"CompanyId": {
"type" : "integer"
},
"Price" : {
"type" : "scaled_float",
"scaling_factor" : 100
}
}
}
If a user is buying for CompantId = 3 then the supplier doesn't want them to be able to see the preferential pricing for CompanyId = 1, say.
Therefore I need to use a source filter to remove all prices for which the CompanyId is not 3.
I have found that this works.
"_source":{
"excludes": ["Prices.companyId.CompanyId"]
}
But I don't understand how or why.
It can't possibly work because the required CompanyId is not mentioned anywhere in the whole ElasticSearch search JSON.
Adding a full search JSON:
{
"query":{
"bool":{
"must":[
{
"match_all":{
}
}
],
"filter":{
"match":{
"PurchasingViews":6060
}
}
}
},
"size":20,
"aggs":{
"CompanyName.raw":{
"terms":{
"field":"CompanyName.raw",
"size":20,
"order":{
"_count":"desc"
}
}
}
},
"_source":{
"excludes":[
"PurchasingViews",
"ContractFilters",
"SearchField*",
"Keywords*",
"Menus*",
"Prices.companyId.CompanyId"
]
}
}
Result:
{
"took":224,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":1173525,
"max_score":1.0,
"hits":[
{
"_index":"products_purchasing",
"_type":"product_purchasing",
"_id":"12787114",
"_score":1.0,
"_source":{
"CompanyName":"...",
"Prices":[
{
"CompanyId":1474,
"Price":697.3
}
],
"CompanyId":571057,
"PartNumber":"...",
"LongDescription_en":"...",
"Name_en":"...",
"DescriptionSnippet_en":"...",
"ProductId":9605985,
"Id":12787114
}
}
]
},
"aggregations":{
"CompanyName.raw":{
"doc_count_error_upper_bound":84,
"sum_other_doc_count":21078,
"buckets":[
{
"key":"...",
"doc_count":534039
}
]
}
}
}
https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html
I believe the way you have put your mapping with the "nested" type has created the reference you are questioning.
Also - I would suggest framing the query as looking for 3 only rather than "excluding" everything except 3

Can i filter subarray in Elasticsearch?

I have orders and order products attached for each order as subarray in Elastic Search. When i'm aggregating Prices i need possibility to filter my order products in my documents of orders.
Example of my document in Elastic:
{
"OrderID":4567488,
"projectId":"4",
"Project":"direkt",
"legacy_id":null,
"supporterId":null,
"Origin":"FR",
"orderProducts":[
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"30",
"Price":"26.95",
},
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"15",
"Price":"15.22",
},
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"123",
"Price":"24.55",
},
]
}
How im filter right now:
{
"index":"order_index",
"from":0,
"size":100,
"body":{
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{
"term":{
"orderProducts.brandNo":"30"
}
}
],
}
}
}
}
}
}
What i'm expecting
{
"OrderID":4567488,
"projectId":"4",
"Project":"direkt",
"legacy_id":null,
"supporterId":null,
"Origin":"FR",
"orderProducts":[
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"30",
"Price":"26.95",
},
]
}
What i'm really getting:
All document.
That is possible? To filter subarray data?
UPD.
Yes this is my schema mappings:
"mappings":{
"order":{
"dynamic_templates":[
{
"strings":{
"mapping":{
"type":"string",
"fields":{
"raw":{
"index":"not_analyzed",
"type":"string"
}
}
},
"match_mapping_type":"string"
}
}
],
"properties":{
"orderProducts":{
"include_in_parent":true,
"properties":{
"OrderProductID":{
"type":"long"
},
"OrderID":{
"type":"long"
},
"brandNo":{
"type":"long"
},
"Price":{
"type":"double"
}
},
"type":"nested"
},
"OrderID":{
"type":"long"
}
}
}
},
All right, after some experiments i discovered that that aggregation can be done like this:
{
"aggs":{
"sales":{
"nested":{
"path":"orderProducts"
},
"aggs":{
"filtered_nestedobjects":{
"filter":{
"bool":{
"must":[
{
"terms":{
"orderProducts.brandNo":[
"30"
]
}
}
]
}
},
"aggs":{
"Quantity":{
"sum":{
"field":"orderProducts.Quantity"
}
}
}
}
}
}
}
}
And the answer to main question can we filter subarray of elastic is yes. With the inner_hits only i did this.

settings the "index" property of an elasticsearch object

say I have a mapping of objects as such the mapping is:
{"my_type":
{"properties":
{"name":{"type":"string","store":"yes","index":"not_analyzed"},
"more":{"type":"object",
"properties":{"a_known_number":{"type":"long","index":"yes"},
"some_json_object":{"type":"object"}
}
}
}
}
}
I do not know what sub fields the "some_json_object" will have, but i DO know that I only want to store this object, but not index any of it's sub-fields.
Can I do:
{"my_type":
{"properties":
{"name":{"type":"string","store":"yes","index":"not_analyzed"},
"more":{"type":"object",
"properties":{"a_known_number":{"type":"long","index":"yes"},
"some_json_object":{"type":"object","store":"yes","index":"no"}
}
}
}
}
}
and affect all of the resulting sub-fields?
No, you can't specify the entire "object" as not indexed. However you can use dynamic_templates (http://www.elasticsearch.org/guide/reference/mapping/root-object-type/) to do this:
{
"my_type":{
"properties":{
"name":{
"type":"string",
"store":"yes",
"index":"not_analyzed"
}
},
"dynamic_templates":[
{
"stored_json_object_template":{
"path_match":"some_json_object.*",
"mapping":{
"store":"yes",
"index":"no"
}
}
}
]
}
}
This tells the mapper to map all properties for "some_json_object" as stored strings.
Update
Removed type from mapping in order to match all property types (match_path => path_match).
Update 2
If you then create an index:
{
"mappings":{
"my_type":{
"properties":{
"name":{
"type":"string",
"store":"yes",
"index":"not_analyzed"
}
},
"dynamic_templates":[
{
"stored_json_object_template":{
"path_match":"some_json_object.*",
"mapping":{
"store":"yes",
"index":"no"
}
}
}
]
}
}
}
and index an object:
{
"Name":"Henrik",
"some_json_object":{
"string":"string",
"long":12345
}
}
it will then get the following mapping:
{
"testindex":{
"my_type":{
"dynamic_templates":[
{
"stored_json_object_template":{
"mapping":{
"index":"no",
"store":"yes"
},
"path_match":"some_json_object.*"
}
}
],
"properties":{
"name":{
"type":"string",
"index":"not_analyzed",
"store":true,
"omit_norms":true,
"index_options":"docs"
},
"some_json_object":{
"properties":{
"long":{
"type":"long",
"index":"no",
"store":true
},
"string":{
"type":"string",
"index":"no",
"store":true
}
}
}
}
}
}
}

Resources