Elasticsearch: sorted nested array - elasticsearch

Is it possible to configure the mapping of an index, or the discover view of this in index in a way that an array inside the documents is / will be sorted?
Background: I have a es index with documents containing an array:
This array is updated from time to time with new entries (objects containing a timestamp), and I would like this arrays to be sorted according to the timestamp inside the objects.

If your field is define as nested type then you can use inner_hits to sort the array of object. it will return the sorted object array inside inner_hits for each document.
You can define field as nested like below:
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"openTimes": {
"type": "nested",
"properties": {
"date": {
"type": "date"
},
"name": {
"type": "keyword"
}
}
}
}
}
}
Let consider below is your sample data:
{"index": { } }
{ "name": "second on 6th (3rd on the 5th)", "openTimes": [ { "date": "2018-12-05T12:00:00" ,"name":"abc"}, { "date": "2018-12-06T11:00:00","name":"xyz" }] }
{"index": { } }
{ "name": "third on 6th (1st on the 5th)", "openTimes": [ {"date": "2018-12-05T10:00:00","name":"abc"}, { "date": "2018-12-06T12:00:00","name":"xyz" }] }
{"index": { } }
{ "name": "first on the 6th (2nd on the 5th)", "openTimes": [ {"date": "2018-12-05T11:00:00","name":"abc" }, { "date": "2018-12-06T10:00:00","name":"xyz" }] }
Below is Query:
{
"query": {
"nested": {
"path": "openTimes",
"query": {
"match_all": {}
},
"inner_hits": {
"sort": {
"openTimes.date": "desc"
}
}
}
}
}
Sample Response:
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_score" : 1.0,
"_source" : {
"name" : "second on 6th (3rd on the 5th)",
"openTimes" : [
{
"date" : "2018-12-05T12:00:00",
"name" : "abc"
},
{
"date" : "2018-12-06T11:00:00",
"name" : "xyz"
}
]
},
"inner_hits" : {
"openTimes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_nested" : {
"field" : "openTimes",
"offset" : 1
},
"_score" : null,
"_source" : {
"date" : "2018-12-06T11:00:00",
"name" : "xyz"
},
"sort" : [
1544094000000
]
},
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_nested" : {
"field" : "openTimes",
"offset" : 0
},
"_score" : null,
"_source" : {
"date" : "2018-12-05T12:00:00",
"name" : "abc"
},
"sort" : [
1544011200000
]
}
]
}
}
}
}

Related

Elasticsearch - search for multiple values in a list of nested values

I'm trying to get the the documents that match all the itens inside a list, the field that I'm searching for is inside a list of nested :
map of my index:
PUT testindex1
{
"mappings": {
"properties": {
"patients": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "keyword"
}
}
}
}
}
}
Documents
PUT testindex1/_doc/1
{
"patients": [
{"name" : "1", "age" : "1"},
{"name" : "1", "age" : "2"},
{"name" : "1", "age" : "3"}
]
}
PUT testindex1/_doc/2
{
"patients": [
{"name" : "1", "age" : "1"},
{"name" : "1", "age" : "2"},
{"name" : "1", "age" : "3"}
]
}
PUT testindex1/_doc/3
{
"patients":[
{"name" : "1", "age" : "2"},
{"name" : "1", "age" : "5"},
{"name" : "1", "age" : "4"}
]
}
what I'm trying to get is all the documents where the patients ages are inside have list ["2", "1"], in this case only the document 1 and 2. I know that i can update the map by using
this approach
But this would mean that I would have to reprocess the entire dataset
get patients that have both ages "1" and "2" (only patients of index 1 and 2)
I've found the answer here : Search a nested field for multiple values on the same field with elasticsearch
Basicaly you need to search via a nested must :
GET testindex1/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "patients",
"query": {
"bool": {
"filter": [
{
"match": {
"patients.age": "2"
}
}
]
}
}
}
},
{
"nested": {
"path": "patients",
"query": {
"bool": {
"filter": [
{
"match": {
"patients.age": "1"
}
}
]
}
}
}
}
]
}
}
}
This returns only the patients that have age 1 and age 2, returning the following output :
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "testindex1",
"_id" : "1",
"_score" : 0.0,
"_source" : {
"patients" : [
{
"name" : "1",
"age" : "1"
},
{
"name" : "1",
"age" : "2"
},
{
"name" : "1",
"age" : "3"
}
]
}
},
{
"_index" : "testindex1",
"_id" : "2",
"_score" : 0.0,
"_source" : {
"patients" : [
{
"name" : "1",
"age" : "1"
},
{
"name" : "1",
"age" : "2"
},
{
"name" : "1",
"age" : "3"
}
]
}
}
]
}
}

elastic search version 6-7 , analyzer used for sort not working

I am creating the following my_cars index
PUT my_cars
{
"settings": {
"analysis": {
"analyzer": {
"sortable": {
"tokenizer": "keyword",
"filter": ["lowercase"]
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "sortable"
}
}
}
}
When i check the mapping , it seems fine :-
{
"my_cars" : {
"mappings" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "sortable"
}
}
}
}
}
But now when i run the query for search and sort
GET my_cars/_search
{
"query": {
"match_all": {}
},
"sort": {
"name.keyword": {
"order": "asc"
}
}
}
The capital/uppercase results show up first , hence making me think the analyzer is not working fine. the result i get is as follows :-
{
"took" : 163,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my_cars",
"_type" : "_doc",
"_id" : "f1RLUnoBZEZpPd-TeK9L",
"_score" : null,
"_source" : {
"name" : "Apples",
"price" : 250
},
"sort" : [
"Apples"
]
},
{
"_index" : "my_cars",
"_type" : "_doc",
"_id" : "H7JLUnoBh60DJePfnpGB",
"_score" : null,
"_source" : {
"name" : "Brocoli",
"price" : 250
},
"sort" : [
"Brocoli"
]
},
{
"_index" : "my_cars",
"_type" : "_doc",
"_id" : "gFRLUnoBZEZpPd-Tyq9A",
"_score" : null,
"_source" : {
"name" : "azus",
"price" : 110
},
"sort" : [
"azus"
]
},
{
"_index" : "my_cars",
"_type" : "_doc",
"_id" : "gVRMUnoBZEZpPd-TAq-A",
"_score" : null,
"_source" : {
"name" : "botpzus",
"price" : 80
},
"sort" : [
"botpzus"
]
}
]
}
}
As you can see the lowercase names come in last, how do i fix this ? I have build my analyzer based on THIS question. But unlike the answer in that question , i am unable to add the analyzer field directly inside the keyword mapping. How do i fix my alphabetical search irrespective of casing ?
The solution is to use a normalizer on the name.keyword:
PUT my_cars
{
"settings": {
"analysis": {
"normalizer": {
"sortable": {
"filter": ["lowercase"]
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"normalizer": "sortable"
}
}
}
}
}
}

Finding all objects with a certain field in ElasticSearch

My mapping looks like so:
"condition": {
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
and some data I have looks like:
"condition": [
{
"name": "condition",
"value": "new",
},
{
"name": "condition",
"value": "gently-used",
}
]
How can I write a query that finds all objects within the array that have a new condition?
I have the following but I am getting 0 results back:
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"attribute_condition": "new"
}
}
]
}
}
}
First, you need to map your condition field as a nested type.
"condition": {
"type": "nested",
"properties": {
"name": { "type": "keyword" },
"value": { "type": "keyword" }
}
},
Now you're able to query each element of the condition array independently from each other. Next, you need to use the nested query and request to retrieve the inner hits and output them in the inner_hits object of the query response
{
"query": {
"bool": {
"must": {
"nested": {
"path": "condition",
"query": {
"match": {
"condition.value": "new"
}
},
"inner_hits": {}
}
}
}
}
}
An example response will look like below:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "nested",
"_type" : "_doc",
"_id" : "Xx_LN3gBp5RUqdfAef3B",
"_score" : 0.6931471,
"_source" : {
"condition" : [
{
"name" : "condition",
"value" : "new"
},
{
"name" : "condition",
"value" : "gently-used"
}
]
},
"inner_hits" : { <--- here begins the list of inner hits
"condition" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "nested",
"_type" : "_doc",
"_id" : "Xx_LN3gBp5RUqdfAef3B",
"_nested" : {
"field" : "condition",
"offset" : 0
},
"_score" : 0.6931471,
"_source" : {
"name" : "condition",
"value" : "new"
}
}
]
}
}
}
}
]
}
}

How to build simple terms query for nested object?

I have index like this:
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "text"
}
},
"type": "nested"
},
"experience": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "text"
}
},
"type": "nested"
}
}
}
}
I insert this object:
POST job_offers/_doc
{
"title": "Junior Ruby on Rails Developer",
"location": [
{
"slug": "new-york",
"name": "New York"
},
{
"slug": "atlanta",
"name": "Atlanta"
},
{
"slug": "remote",
"name": "Remote"
}
],
"experience": [
{
"slug": "junior",
"name": "Junior"
}
]
}
This query returns 0 documents.
GET job_offers/_search
{
"query": {
"terms": {
"location.slug": [
"remote",
"new-york"
]
}
}
}
Can you explain me why? I thought it should return documents where location.slug is remote or new-york.
Nested- Query have a different syntax
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": ["remote","new-york"]
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_score" : 1.0,
"_source" : {
"title" : "Junior Ruby on Rails Developer",
"location" : [
{
"slug" : "new-york",
"name" : "New York"
},
{
"slug" : "atlanta",
"name" : "Atlanta"
},
{
"slug" : "remote",
"name" : "Remote"
}
],
"experience" : [
{
"slug" : "junior",
"name" : "Junior"
}
]
}
}
]
It will return entire document where location.slug matches "remote" or "new-york". If you want to get matched nested document , you need to use inner_hits
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": ["remote","new-york"]
}
},
"inner_hits": {} --> note
}
}
}
Result:
"hits" : [
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_score" : 1.0,
"_source" : {
"title" : "Junior Ruby on Rails Developer",
"location" : [
{
"slug" : "new-york",
"name" : "New York"
},
{
"slug" : "atlanta",
"name" : "Atlanta"
},
{
"slug" : "remote",
"name" : "Remote"
}
],
"experience" : [
{
"slug" : "junior",
"name" : "Junior"
}
]
},
"inner_hits" : { --> will give matched nested object
"location" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_nested" : {
"field" : "location",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"slug" : "new-york",
"name" : "New York"
}
},
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_nested" : {
"field" : "location",
"offset" : 2
},
"_score" : 1.0,
"_source" : {
"slug" : "remote",
"name" : "Remote"
}
}
]
}
}
}
}
]
Also I see that you are using two fields for same data with different types. if data is same in both fields(name and slug) and only data type is different, you can use fields for that
It is often useful to index the same field in different ways for
different purposes. This is the purpose of multi-fields. For instance,
a string field could be mapped as a text field for full-text search,
and as a keyword field for sorting or aggregations:
In that case your mapping will become below
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
},
"type": "nested"
},
"experience": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
},
"type": "nested"
}
}
}
}

Discard all result of same user if one match in result set

My requirement is if user has not performed e2 then that user will not come in result. For example here user_id 15371082 has performed e2 ,e3 but because user has performed e2 so both record of user(e2,e3) for userid 15371082 will discard from result
{
"id": 1,
"name": "a",
"user_id": 15371080,
"event" : 'e1'
},
{
"id": 1,
"name": "a",
"user_id": 15371082,
"event" : 'e2'
},
{
"id": 1,
"name": "a",
"user_id": 15371081,
"event" : 'e3'
},
{
"id": 1,
"name": "a",
"user_id": 15371082,
"event" : 'e3'
}
Expected result
{
"id": 1,
"name": "a",
"user_id": 15371080,
"event" : 'e1'
},
{
"id": 1,
"name": "a",
"user_id": 15371081,
"event" : 'e3'
}
My result should like above
Right way would be to create unique document for each user_id and add event as nested document.
Mapping:
PUT eventindex/_mappings
{
"properties": {
"name":{
"type": "text"
},
"user_id":{
"type": "integer"
},
"event":{
"type": "nested",
"properties": {
"name":{
"type":"text"
}
}
}
}
}
Data:
[
{
"_index" : "eventindex",
"_type" : "_doc",
"_id" : "tT1IL20Bcyz1xvxninMr",
"_score" : 1.0,
"_source" : {
"name" : "b",
"user_id" : 2,
"event" : [
{
"name" : "e1"
},
{
"name" : "e3"
}
]
}
},
{
"_index" : "eventindex",
"_type" : "_doc",
"_id" : "tj1ML20Bcyz1xvxngXNn",
"_score" : 1.0,
"_source" : {
"name" : "a",
"user_id" : 1,
"event" : [
{
"name" : "e2"
},
{
"name" : "e3"
}
]
}
}
]
````
Query:
````
GET eventindex/_search
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "event",
"query": {
"term": {
"event.name": {
"value": "e2"
}
}
}
}
}
]
}
}
}
````
Result:
````
[
{
"_index" : "eventindex",
"_type" : "_doc",
"_id" : "tT1IL20Bcyz1xvxninMr",
"_score" : 0.0,
"_source" : {
"name" : "b",
"user_id" : 2,
"event" : [
{
"name" : "e1"
},
{
"name" : "e3"
}
]
}
}
]
````

Resources