ElasticSearch Aggregation Missing Data - elasticsearch

On elastic search, when I run an aggregation query it only runs it on one index. How do I get it to run on multiple indicies?

Do a multi-index search.
As an example, I created a couple of indexes (implicitly) by bulk indexing some data:
POST /_bulk
{ "index": {"_index": "test_index", "_type":"doc"}}
{ "name": "Brown foxes"}
{ "index": {"_index": "test_index_2", "_type":"doc"}}
{ "name": "Yellow furballs" }
{ "index": {"_index": "test_index", "_type":"doc"}}
{ "name": "my discovery" }
{ "index": {"_index": "test_index_2", "_type":"doc"}}
{ "name": "myself is fun" }
{ "index": {"_index": "test_index", "_type":"doc"}}
{ "name": ["foxy", "foo"] }
{ "index": {"_index": "test_index_2", "_type":"doc"}}
{ "name": ["foo bar", "baz"] }
Then I can run an aggregation on both indexes:
POST /test_index,test_index_2/_search?search_type=count
{
"aggs": {
"name_terms": {
"terms": {
"field": "name"
}
}
}
}
and I get back all the terms from both:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 0,
"hits": []
},
"aggregations": {
"name_terms": {
"buckets": [
{
"key": "foo",
"doc_count": 2
},
{
"key": "bar",
"doc_count": 1
},
{
"key": "baz",
"doc_count": 1
},
{
"key": "brown",
"doc_count": 1
},
{
"key": "discovery",
"doc_count": 1
},
{
"key": "foxes",
"doc_count": 1
},
{
"key": "foxy",
"doc_count": 1
},
{
"key": "fun",
"doc_count": 1
},
{
"key": "furballs",
"doc_count": 1
},
{
"key": "is",
"doc_count": 1
}
]
}
}
}
Here's the code:
http://sense.qbox.io/gist/e053a0c6c5453eae68d7b7ff2ff12588669b046e

Related

Iterating over doc to return a particular key's value in an array based on a match

data
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1000,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "learn",
"_id": "OeCLr4QBPMAw7FiXknKz",
"_score": 1,
"_source": {
"user_rating_size": 80,
"ratingdescription": 80,
"rating": "PG-13",
"release_year": 2004,
"user_rating_score": 82,
"title": "White Chicks",
"ratinglevel": "crude and sexual humor, language and some drug content"
}
},
{
"_index": "learn",
"_id": "QuCLr4QBPMAw7FiXknKz",
"_score": 1,
"_source": {
"user_rating_size": 80,
"ratingdescription": 90,
"rating": "TV-14",
"release_year": 2016,
"user_rating_score": 96,
"title": "Pretty Little Liars",
"ratinglevel": "Parents strongly cautioned. May be unsuitable for children ages 14 and under."
}
}
]
}
}
Mapping
{
"learn": {
"mappings": {
"_meta": {
"created_by": "file-data-visualizer"
},
"properties": {
"rating": {
"type": "keyword"
},
"ratingdescription": {
"type": "long"
},
"ratinglevel": {
"type": "text"
},
"release_year": {
"type": "long"
},
"title": {
"type": "text"
},
"user_rating_score": {
"type": "long"
},
"user_rating_size": {
"type": "long"
}
}
}
}
}
All i want is to return all the values of title as an array based on rating match(grouping).
I tried to group it based on rating but it returns the matching document. In this case i have to again loop through through to get just the value.
In aggregation, all I see from documentation is sum and other statistics based.
I also tried to do it through painless script but cant seem to figure out a way.
I had to add a keyword field type to title to be able to aggregate on it:
PUT learn
{
"mappings": {
"_meta": {
"created_by": "file-data-visualizer"
},
"properties": {
"rating": {
"type": "keyword"
},
"ratingdescription": {
"type": "long"
},
"ratinglevel": {
"type": "text"
},
"release_year": {
"type": "long"
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"user_rating_score": {
"type": "long"
},
"user_rating_size": {
"type": "long"
}
}
}
}
Via Aggregations
GET learn/_search
{
"size": 0,
"query": {
"match": {
"title": "pretty"
}
},
"aggs": {
"ratings": {
"terms": {
"field": "rating",
"size": 10
},
"aggs": {
"titles": {
"terms": {
"field": "title.keyword",
"size": 10
}
}
}
}
}
}
Results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"ratings": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "TV-14",
"doc_count": 2,
"titles": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Another Pretty TV-14 movie",
"doc_count": 1
},
{
"key": "Pretty Little Liars",
"doc_count": 1
}
]
}
},
{
"key": "PG-13",
"doc_count": 1,
"titles": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Pretty White Chicks",
"doc_count": 1
}
]
}
}
]
}
}
}
Via Collapse query
GET learn/_search
{
"_source": false,
"query": {
"match": {
"title": "pretty"
}
},
"collapse": {
"field": "rating",
"inner_hits": {
"name": "titles",
"size": 5,
"_source": ["title"]
}
}
}
Results
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "learn",
"_id": "JVV4vIQBtNG1OrZoVQ2v",
"_score": 0.7361701,
"fields": {
"rating": [
"TV-14"
]
},
"inner_hits": {
"titles": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 0.7361701,
"hits": [
{
"_index": "learn",
"_id": "JVV4vIQBtNG1OrZoVQ2v",
"_score": 0.7361701,
"_source": {
"title": "Pretty Little Liars"
}
},
{
"_index": "learn",
"_id": "_FV4vIQBtNG1OrZo-Q95",
"_score": 0.5897495,
"_source": {
"title": "Another Pretty TV-14 movie"
}
}
]
}
}
}
},
{
"_index": "learn",
"_id": "wcV5vIQB5Gw0WET8ve-k",
"_score": 0.7361701,
"fields": {
"rating": [
"PG-13"
]
},
"inner_hits": {
"titles": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.7361701,
"hits": [
{
"_index": "learn",
"_id": "wcV5vIQB5Gw0WET8ve-k",
"_score": 0.7361701,
"_source": {
"title": "Pretty White Chicks"
}
}
]
}
}
}
}
]
}
}

Count part of documents in elastic search

I want to get count of grade for student index in elastic search as :
for :
grade 1: count = 4,
grade 2: count = 1
grade 3: count = 1
grade 4: count = 1
Used the below query:
{
"aggs":
{
"grade":
{
"terms":
{
"field": "marks.grade"
}
}
}
}
Output:
{
"took": 9,
"timed_out": false,
"_shards":
{
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits":
{
"total":
{
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits":
[
{
"_index": "student",
"_type": "doc",
"_id": "001",
"_score": 1.0,
"_source":
{
"name": "abc",
"marks":
[
{
"grade": 1,
"score": 95
},
{
"grade": 2,
"score": 75
},
{
"grade": 2,
"score": 72
},
{
"grade": 3,
"score": 55
}
]
}
},
{
"_index": "student",
"_type": "doc",
"_id": "002",
"_score": 1.0,
"_source":
{
"name": "xyz",
"marks":
[
{
"grade": 4,
"score": 35
},
{
"grade": 2,
"score": 79
},
{
"grade": 2,
"score": 65
}
]
}
}
]
},
"aggregations":
{
"grade":
{
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets":
[
{
"key": 2,
"doc_count": 2
},
{
"key": 1,
"doc_count": 1
},
{
"key": 3,
"doc_count": 1
},
{
"key": 4,
"doc_count": 1
}
]
}
}
}
Here, it counts grade 2 count as 2 only instead of 4.
Is there any query to get the output grade 2 count as 4?
You can use a combination of terms aggregation with cardinality aggregation
{
"size":0,
"aggs": {
"grade": {
"terms": {
"field": "marks.grade"
},
"aggs": {
"count_of_grade": {
"cardinality": {
"field": "marks.grade"
}
}
}
}
}
}
Response will be
"aggregations": {
"grade": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 2,
"count_of_grade": {
"value": 4
}
},
{
"key": 1,
"doc_count": 1,
"count_of_grade": {
"value": 3
}
},
{
"key": 3,
"doc_count": 1,
"count_of_grade": {
"value": 3
}
},
{
"key": 4,
"doc_count": 1,
"count_of_grade": {
"value": 2
}
}
]
}
}
You can do this by making marks array nested. Here is an example mapping:
{
"mappings": {
"properties": {
"marks": { "type": "nested" }
}
}
}
And you can make a nested aggregation:
{
"size": 0,
"aggs": {
"counts": {
"nested": {
"path": "marks"
},
"aggs": {
"counts": {
"terms": {
"field": "marks.grade",
"size": 10
}
}
}
}
}
}
This is the result of the aggregation:
{
"key" : 2,
"doc_count" : 4
},
{
"key" : 1,
"doc_count" : 1
},
{
"key" : 3,
"doc_count" : 1
},
{
"key" : 4,
"doc_count" : 1
}

Filter on nested field with Elasticsearch

I have 2 entities in my project, users and schedule
I need to create a page on which, in the form of a weekly calendar, I can display all employees and their shift for each day
Example:
https://monosnap.com/file/tEb3rUYNRmredPWOdfxRBTBpqkh36H
For this, I created a new index in which I indexed all employees. Each employee has a nested field, where his shifts are stored
The problem is that I can't figure out aggregations and filters.
I need to
there is always a filter by date that refers to the shift field property.
It doesn't matter if there are suitable shifts or not, we show ALL employees
the following 2 aggregations, user role and type of shift are also displayed.
user role filters the list of employees
type of shift, shows or hides associated shifts
An example of my request
{
"aggs": {
"shifts.ref_type": {
"nested": {
"path": "shifts"
},
"aggs": {
"shifts.ref_type": {
"terms": {
"field": "shifts.ref_type",
"size": 1000
}
}
}
},
"role": {
"terms": {
"field": "role",
"size": 1000
}
},
"name": {
"terms": {
"field": "name",
"size": 1000
}
}
},
"query": {
"bool": {
"must": [
{
"term": {
"_routing": "1"
}
}
],
"should": [
{
"range": {
"shifts.date_from": {
"lte": 1636923600,
"gte": 1636318800
}
}
}
]
}
},
"sort": [
{
"created": "ASC"
}
],
"size": 1
}
Sample response
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 36,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "employee_shift",
"_type": "_doc",
"_id": "a8abf060-25c8-45ee-a50e-02a2e2ad1c40",
"_score": null,
"_routing": "1",
"_source": {
"created": 1633967157,
"type": "user",
"title": null,
"description": "",
"uuid": "a8abf060-25c8-45ee-a50e-02a2e2ad1c40",
"author": "System",
"author:name": "System",
"author:role": "",
"acc": 1,
"property": [
1
],
"status": "Enabled",
"class": [
""
],
"weight": "",
"tags": [],
"language": "en",
"ref_source_id": null,
"ref_source_helper": null,
"ref_property": [
"test hostel2"
],
"ref_property_default": "test hostel2",
"name": "Housekeeper 1",
"role": [
"Housekeeper"
],
"role:weight": "2",
"role:id": [
5
],
"pay_rate": null,
"experience": null,
"supervisor": null,
"gender": null,
"units": [
"102",
"103",
"106",
"107",
"110",
"111",
"116",
"117",
"120",
"121",
"124",
"125",
"128",
"129",
"132",
"133",
"136",
"137"
],
"task_inspection": "All tasks",
"shifts": [
{
"uuid": "f48ae398-0668-4693-b335-2fee3baa2941",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635196500",
"date_to": "1635197400",
"notes": null
},
{
"uuid": "8b4d8148-2583-4ccf-a1cc-ae5e6d1e728e",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635287400",
"date_to": "1635289200",
"notes": null
},
{
"uuid": "3f5520d8-8108-4abd-8e2a-70c00faf6994",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635369300",
"date_to": "1635373800",
"notes": null
},
{
"uuid": "d4009660-447c-47de-b0f3-3c1f2d8d8f99",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635286500",
"date_to": "1635288300",
"notes": null
},
{
"uuid": "b3d883f0-b71f-4df7-bb63-a50f137528a4",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635370200",
"date_to": "1635372900",
"notes": null
}
]
},
"sort": [
1633967157000
]
}
]
},
"aggregations": {
"role": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Houseman",
"doc_count": 4
},
{
"key": "Maintenance",
"doc_count": 4
},
{
"key": "Supervisor",
"doc_count": 4
},
{
"key": "Supervisor HSKP",
"doc_count": 4
},
{
"key": "Supervisor Maintenance",
"doc_count": 4
},
{
"key": "Administrator",
"doc_count": 3
},
{
"key": "Concierge dispatcher",
"doc_count": 3
},
{
"key": "Frontdesk",
"doc_count": 3
},
{
"key": "General manager",
"doc_count": 3
},
{
"key": "HKeeper",
"doc_count": 3
},
{
"key": "Housekeeper",
"doc_count": 3
},
{
"key": "Manager",
"doc_count": 3
}
]
},
"shifts.ref_type": {
"doc_count": 21,
"shifts.ref_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Work",
"doc_count": 19
},
{
"key": "test",
"doc_count": 2
}
]
}
},
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Administrator 123",
"doc_count": 1
},
{
"key": "Administrator 223",
"doc_count": 1
},
{
"key": "Administrator 3",
"doc_count": 1
},
{
"key": "Concierge dispatcher 1",
"doc_count": 1
},
{
"key": "Concierge dispatcher 2",
"doc_count": 1
},
{
"key": "Concierge dispatcher 3",
"doc_count": 1
},
{
"key": "Frontdesk 1",
"doc_count": 1
},
{
"key": "Frontdesk 2",
"doc_count": 1
},
{
"key": "Frontdesk 3",
"doc_count": 1
},
{
"key": "General manager 1",
"doc_count": 1
},
{
"key": "General manager 2",
"doc_count": 1
},
{
"key": "General manager 3",
"doc_count": 1
},
{
"key": "HKeeper 1",
"doc_count": 1
},
{
"key": "HKeeper 2",
"doc_count": 1
},
{
"key": "HKeeper 3",
"doc_count": 1
},
{
"key": "Housekeeper 1",
"doc_count": 1
},
{
"key": "Housekeeper 2",
"doc_count": 1
},
{
"key": "Housekeeper 3",
"doc_count": 1
},
{
"key": "Houseman 1",
"doc_count": 1
},
{
"key": "Houseman 2",
"doc_count": 1
},
{
"key": "Houseman 3",
"doc_count": 1
},
{
"key": "Maintenance 1",
"doc_count": 1
},
{
"key": "Maintenance 2",
"doc_count": 1
},
{
"key": "Maintenance 3",
"doc_count": 1
},
{
"key": "Manager 1222",
"doc_count": 1
},
{
"key": "Manager 2",
"doc_count": 1
},
{
"key": "Manager 3",
"doc_count": 1
},
{
"key": "Supervisor 1",
"doc_count": 1
},
{
"key": "Supervisor 2",
"doc_count": 1
},
{
"key": "Supervisor 3",
"doc_count": 1
},
{
"key": "Supervisor HSKP 1",
"doc_count": 1
},
{
"key": "Supervisor HSKP 2",
"doc_count": 1
},
{
"key": "Supervisor HSKP 3",
"doc_count": 1
},
{
"key": "Supervisor Maintenance 1",
"doc_count": 1
},
{
"key": "Supervisor Maintenance 2",
"doc_count": 1
},
{
"key": "Supervisor Maintenance 3",
"doc_count": 1
}
]
}
}
}
At the moment, everything seems to be working correctly, except for one point. Aggregation by the type of shift ALWAYS outputs data, although they should not be found by the filter for the date.
any advice? thank you
I'm making the answer based on the assumption from my comment:
you want your query to return all employees
you want the ref_type aggregation to only include shifts matching your date range
you want the "shifts" collections under your results to likewise only include shifts matching your date range
Apologies if I misunderstood your question.
One thing to get out of the way first, though you may have been aware: the should part of your query is not restricting the results, it's only affecting the score, since you already have a must.
As a corollary it's not going to affect the aggregated results instead, for that you need to use a filter aggregation:
"aggs": {
"shifts.ref_type": {
"nested": {
"path": "shifts"
},
"aggs": {
"shifts.ref_type": {
"filter": {
"range": {
"shifts.date_from": {
"gte": 1635370100,
"lte": 1635370300
}
}
},
"aggs": {
"shifts.ref_type": {
"terms": {
"field": "shifts.ref_type",
"size": 1000
}
}
}
}
}
},
This is going to get you the filtered counts in your aggregation, but that still won't filter your results - you'll get all the "shifts" in your hits. So a thing to be aware of with nested documents, your query is going to restrict the documents that are returned, based on the matches in the nested documents, but it's not going to actually filter out the nested documents that did not match. In order to do that you have another feature, nested inner hits, which lets you figure out exactly which nested documents matched. It's still not enough in your case since you don't want to filter out the results entirely (so even if there is no "inner hit" you still want to return the document, or at least have it be part of the aggregation). So now you have yet another feature, post-filter, which you can use to filter the documents after they have been aggregated. Taking the 2 together:
"post_filter": {
"nested": {
"path": "shifts",
"query": {
"bool": {
"must": [
{
"range": {
"shifts.date_from": {
"lte": 635370200,
"gte": 635370200
}
}
}
]
}
},
"inner_hits": {}
}
},
If you now set _source: false, you won't have the hits, and you just get the shifts that matched (and then you still have the employee name and roles in your aggregation results):
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "employee_shift",
"_type": "_doc",
"_id": "-tRnLn0B5PjpsgKgGXlB",
"_score": 0.08701137,
"inner_hits": {
"shifts": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "employee_shift",
"_type": "_doc",
"_id": "-tRnLn0B5PjpsgKgGXlB",
"_nested": {
"field": "shifts",
"offset": 4
},
"_score": 1,
"_source": {
"notes": null,
"ref_type:color": "",
"date_to": 635372900,
"ref_type": "Work",
"uuid": "b3d883f0-b71f-4df7-bb63-a50f137528a4",
"date_from": 635370200
}
}
]
}
}
}
}
]
},
"aggregations": {
"role": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Housekeeper",
"doc_count": 5
}
]
},
"shifts.ref_type": {
"doc_count": 25,
"shifts.ref_type": {
"doc_count": 4,
"shifts.ref_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Work",
"doc_count": 3
},
{
"key": "Work2",
"doc_count": 1
}
]
}
}
},
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Housekeeper 4",
"doc_count": 2
},
{
"key": "Housekeeper 1",
"doc_count": 1
},
{
"key": "Housekeeper 2",
"doc_count": 1
},
{
"key": "Housekeeper 3",
"doc_count": 1
}
]
}
}
}

ElasticSearch nested sort with filter

I've documents that contain a list of prices for specific keys, for example as the following
document1
{
"name":"doc1",
"cheapestPrices": [{
"key": "10000_BB",
"value": 50
}, {
"key": "10000_LO",
"value": 10
}, {
"key": "10000",
"value": 10
}, {
"key": "",
"value": 10
}
]
}
document2
{
"name":"doc2",
"cheapestPrices": [{
"key": "10000_BB",
"value": 15
}, {
"key": "10000_LO",
"value": 30
}, {
"key": "10000",
"value": 15
}, {
"key": "",
"value": 15
}
]
}
Now I send a query and I want to sort by given keys and the order should be from lowest to highest. I created this query:
{
"size": 10000,
"sort": [
{
"cheapestPrices.value": {
"mode": "min",
"nested": {
"filter": {
"bool": {
"should": [
{
"term": {
"cheapestPrices.key": {
"value": "10000_BB"
}
}
}
]
}
},
"path": "cheapestPrices"
},
"order": "asc"
}
}
]
}
Expecting that I would get doc2 (value 15 for that key) first and then doc1 (value 50 for that key)... but the result are doc1 and then doc2 and the sort score is exactly the same.
Result:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [{
"_index": "test_sortbyprice",
"_type": "_doc",
"_id": "doc1",
"_score": null,
"_source": {
"cheapestPrices": [{
"key": "10000_BB",
"value": 50
}, {
"key": "10000_LO",
"value": 10
}, {
"key": "10000",
"value": 10
}, {
"key": "",
"value": 10
}
],
"name": "doc1"
},
"sort": [
9223372036854775807
]
}, {
"_index": "test_sortbyprice",
"_type": "_doc",
"_id": "doc2",
"_score": null,
"_source": {
"cheapestPrices": [{
"key": "10000_BB",
"value": 15
}, {
"key": "10000_LO",
"value": 30
}, {
"key": "10000",
"value": 15
}, {
"key": "",
"value": 15
}
],
"name": "doc2"
},
"sort": [
9223372036854775807
]
}
]
}
}
The mapping is as follow:
{
"properties": {
"cheapestPrices": {
"type": "nested",
"properties": {
"value": {
"type": "integer"
},
"key": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
}
}
}
TL;DR
Change the term query to target the field cheapestPrices.key.keyword instead of cheapestPrices.key.
The sort query does not match any documents due to using term (an exact match) on a field that's been lowercased thanks to the standard analyzer which was applied by default on a text field w/ no extra analyzers. This means it's never going to equal 10000_BB (uppercase). But luckily you have the .keyword available which ensures no value modifications.
The sort scores are the same (I assume 9223372036854775807 a.k.a. Long.MAX_VALUE) because that's the default ES behavior. It's not really that far fetched when you think about it: if the sort query does not match anything, it'll assign the highest possible value.
If your order were desc, it'd have returned -Long.MAX_VALUE

Get all tags with count from all documents elasticsearch

I've got index mp_v1 with source fields: id and tags. "Tags" field contains all tags in document in string.
Example:
{
"_index": "mp_v1",
"_type": "mp",
"_id": "5",
"_score": 1,
"_source": {
"id": 5,
"tags": "tag1 black blue"
}
}
How can I get from elastic search tags with occurances in all documents? For example if I have two documents, the first one with tags "tag1 black blue" and second with tags "blue square" it should return: blue: 2, tag1: 1, black: 1, square: 1
I am running ES 5.12
PUT testindex_51
{
"settings": {
"analysis": {
"analyzer": {
},
"filter":{
}
}
},
"mappings": {
"table1": {
"properties": {
"title": {
"type": "text",
"analyzer": "whitespace",
"fielddata": true
}
}
}
}
}
POST testindex_50/table1
{
"title" : "tag1 aggs1 blue"
}
POST testindex_50/table1
{
"title" : "tag2 aggs2 blue"
}
POST testindex_50/table1/_search
{
"aggs": {
"tags_count": {
"terms": {
"field": "title",
"size": 10
}
}
}
}
Response
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"tags_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "blue",
"doc_count": 2
},
{
"key": "aggs1",
"doc_count": 1
},
{
"key": "aggs2",
"doc_count": 1
},
{
"key": "tag1",
"doc_count": 1
},
{
"key": "tag2",
"doc_count": 1
}
]
}
}
}
You can simply use a simple term aggregation to get the same with fielddata enabled (dirty way).
But would suggest to use breaking down the field and then perform aggregation.

Resources