Elasticsearch - how to apply sort on nested field - elasticsearch

I'm rewriting the nested field in the documents and failing to get the query right to sort on the nested fields.
Previously I had the nested field like this:
"my_nested_obj": {
"project-type": [
{
"name": "Table",
"value": "159841"
}
],
"cost": [
{
"name": "Under $50",
"value": "426503"
}
],
"skill-level": [
{
"name": "Intermediate",
"value": "63897"
}
],
"room": [
{
"name": "Outdoor",
"value": "19246"
}
]
}....
And I was able to write queries like these where I can boost and also sort on the 'my_nested_obj' for example:
{
"from": 0,
"size": 50,
"query": {
"filtered": {
"query": {
"multi_match": {
"query": "something",
"fields": [
"content",
"name",
"my_nested_obj.skill-level.name^3"
]
}
},
"filter": {
"bool": {
"must": [
{
"match_all": [
]
},
{
"term": {
"retired": false
}
}
]
}
}
}
},
"sort": {
"my_nested_obj.skill-level.name": "desc"
},
"timeout": "1800ms"
}
Now, I need to reformat the nested field like:
"my_nested_obj": [
{
"name": "Table",
"type": "project-type",
"value": "159841"
},
{
"name": "Under $50",
"type": "cost",
"value": "426503"
},
{
"name": "Intermediate",
"type": "skill-level",
"value": "63897"
},
{
"name": "Outdoor",
"type": "room",
"value": "19246"
}
]....
I can do a generic sort on my_nested_obj.name like:
....
"sort": {
"my_nested_obj.name": "desc"
},
...
How do I go about adding for example sort specifically skill-level name and not all the my_nested_obj.name? Also is there some way to specify the boost?
Thanks!

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Nested object retrieval in ElasticSearch query

I'm new in ElasticSearch and I have a few questions regarding nested object retrieval when a specific condition is matched.
I have a tree-like structure as follow:
{
"id": 4,
"sora": [
{
"pContext": {
"context": {
"sT": "D3",
"uT": "ST"
},
"entities": [
{
"name": "premium",
"bName": "premium",
"fT": "site",
"eT": "F_P",
"children": [
{
"name": "capa",
"bName": "capa",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "code",
"bName": "Codes",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "selection A",
"fT": "site",
"eT": "SELECTION_A",
"children": [
{
"name": "A1",
"fT": "site",
"eT": "ADD",
"children": []
},
{
"name": "A2",
"fT": "site",
"eT": "ADD",
"children": []
}
]
}
]
}
]
}
},
{
"pContext": {
"context": {
"sT": "D2",
"uT": "ST"
},
"entities": [
{
"name": "112",
"bName": "112",
"eT": "D_TYPE",
"children": []
}
]
}
}
]
}
My structure can have more levels.
I have many documents as described above. In order to filter my document I can use the simple query sintax:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
},
"match": {
"sora.pContext.entities.name": "premium"
},
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
What I would like to know is, how can I get the nested object that
matches my query and their children. I need the object that matched
the must inclusive filter. Is that possible?
How can I search for a field without specifing the path?
Thanks
# EDIT
My mapping:
{
"mappings": {
"abc": {
"properties": {
"id": {
"type": "integer"
},
"sora": {
"type": "nested",
"properties": {
"pContext": {
"type": "nested",
"properties": {
"context": {
"type": "nested",
"properties": {
"sT": {
"type": "text"
},
"uT": {
"type": "text"
}
}
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"bName": {
"type": "text"
},
"fT": {
"type": "text"
},
"eT": {
"type": "text"
},
"children": {
"type": "object"
}
}
}
}
}
}
}
}
}
}
}
Yes you can get the matching objects by using inner_hits along with nested query and not the one you added to the question.
Your query will look as below:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"filter": [
{
"nested": {
"inner_hits": {},
"path": "sora.pContext",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sora.pContext.context",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
}
}
]
}
}
}
},
{
"nested": {
"path": "sora.pContext.entities",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.entities.name": "premium"
}
},
{
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
I have added link to inner_hits documentation where you can understand how the results will look like.
Well, if someone else is facing the same issue my solution was added all child in the same path/level as the parent but keep the mapping with parent and their children. With that, I'm able to search and retrieve the parts of the parent as wanted.

Elastic Search: Bool Query in nested properties

Lets assume I have data structured like this:
{ "id": "120400871755634330808993320",
"name": "Metaalschroef binnenzeskant, DIN 912 RVS A4-80",
"description": "m16x70 cilinderschroef bzk a4-80 din912 klasse 80",
"fullDescription": "Metaalschroef met een binnenzeskant cilinderkop",
"synonyms": [],
"properties": [
{
"name": "draad",
"value": "16",
"sort": 99
},
{
"name": "lengte",
"value": "70",
"sort": 99
},
{
"name": "materiaal",
"value": "roestvaststaal",
"sort": 99
},
{
"name": "kwaliteit (materiaal)",
"value": "A4",
"sort": 99
},
{
"name": "DIN",
"value": "912",
"sort": 99
},
{
"name": "AISI",
"value": "316",
"sort": 99
},
{
"name": "draadsoort",
"value": "metrisch",
"sort": 99
},
{
"name": "Merk",
"value": "Elcee Holland",
"sort": 1
}
]
}
How do I write a boolean query where I select all documents that have a property with name "draad" and value "16" and a property with name "lengte" and value "70".
Right now I have this but it returns 0 results:
"query" : {
"nested" : {
"path" : "properties",
"query" : {
"bool" : {
"must" : [{
"bool" : {
"must" : [{
"term" : {
"properties.name" : "Merk"
}
}, {
"term" : {
"properties.value" : "Facom"
}
}
]
}
}, {
"bool" : {
"must" : [{
"term" : {
"properties.name" : "materiaal"
}
}, {
"term" : {
"properties.value" : "kunststof"
}
}
]
}
}
]
}
}
}
}
Replacing the highest level "must" with "should" returns too many results, which makes sense as it translates to an "or".
When using must, the engine is trying to search for nested documents with name:Merk and value:Facom. But also with name:materiaal and value:kunststof - which is impossible to happen in the same nested document at once.
When using should as you mentioned, it translate to or - which is indeed possible.
Problem is, you also getting the entire parent document with all it's nested documents.
In my own answer I'm showing the steps to create an index with nested documents (you should mark the field properties as nested type`).
After complete those steps, you'll be able to get results with the following query:
{
"_source": [
"id",
"name",
"description"
],
"query": {
"bool": {
"must": [
{
"nested": {
"path": "properties",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"properties.name": "Merk"
}
},
{
"term": {
"properties.value": "Facom"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"properties.name": "materiaal"
}
},
{
"term": {
"properties.value": "kunststof"
}
}
]
}
}
]
}
},
"inner_hits":{
"size": 10
}
}
}
]
}
}
}
I found a solution that is working very well!
My property object now looks like this:
{
"name": "breedte(mm)",
"value": "1000",
"unit": "mm",
"sort": 99,
"nameSlug": "breedte-mm",
"slug": "breedte-mm-1000"
},
I added a slug (containing a normalized string for key + value) and a nameslug which is a normalized string for the name.
My index is mapped like this:
"properties": {
"type": "nested",
"include_in_parent": true,
"properties": {
"name": {
"type": "keyword"
},
"nameSlug": {
"type": "keyword"
},
"slug": {
"type": "keyword"
},
"sort": {
"type": "long"
},
"unit": {
"type": "text",
"index": false
},
"value": {
"type": "keyword"
}
}
}
The "include_in_parent" is important here. It allows me to do the query below:
"query": {
"bool": {
"must": [
{
"terms": {
"properties.slug": [
"merk-orbis",
"merk-bahco"
]
}
},
{
"terms": {
"properties.slug": [
"materiaal-staal",
"materiaal-kunststof"
]
}
}
]
}
},
This queries searches for all documents where "merk" is "Orbis" or "Bahco" and where "materiaal" is "staal" or "kunststof".
My aggregations look like this:
"merk_query": {
"filter": {
"bool": {
"must": [
{
"terms": {
"properties.slug": [
"materiaal-staal",
"materiaal-kunststof"
]
}
}
]
}
},
"aggs": {
"merk_facets": {
"nested": {
"path": "properties"
},
"aggs": {
"merk_only": {
"filter": {
"term": {
"properties.nameSlug": {
"value": "merk"
}
}
},
"aggs": {
"facets": {
"terms": {
"field": "properties.name",
"size": 1
},
"aggs": {
"facetvalues": {
"terms": {
"field": "properties.value",
"size": 10
}
}
}
}
}
}
}
}
}
},
I run filteraggregate which filters all documents that match a facet (but not the current one I am bulding).
The result of this aggragate is something like this:
"merk_query": {
"doc_count": 7686,
"merk_facets": {
"doc_count": 68658,
"merk_only": {
"doc_count": 7659,
"facets": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Merk",
"doc_count": 7659,
"facetvalues": {
"doc_count_error_upper_bound": 10,
"sum_other_doc_count": 438,
"buckets": [
{
"key": "Orbis",
"doc_count": 6295
},
{
"key": "DX",
"doc_count": 344
},
{
"key": "AXA",
"doc_count": 176
},
{
"key": "Talen Tools",
"doc_count": 127
},
{
"key": "Nemef",
"doc_count": 73
},
{
"key": "bonfix",
"doc_count": 67
},
{
"key": "Bahco",
"doc_count": 64
},
{
"key": "Henderson",
"doc_count": 27
},
{
"key": "Maasland Groep",
"doc_count": 25
},
{
"key": "SYSTEC",
"doc_count": 23
}
]
}
}
]
}
}
}
}
},
And this is the end result in the browser:

ElasticSearch - Getting paged result in a nested list (nested pagination)

I have the following Json that describes a country-city (1:n) relation
{
"country": [
{
"id": 1,
"name": "Country1",
"city": [
{"id": 1, "name": "City1"},
{"id": 2,"name": "City2"}
]
}, {
"id": 2,
"name": "Country2",
"city": [
{"id": 3,"name": "City3"},
{"id": 4,"name": "City4"}
]
}, {
"id": 3,
"name": "Country3",
"city": [
{"id": 5,"name": "City5"},
{"id": 6,"name": "City6"}
]
}
]
}
I have loaded it into an ES map with 3 documents of the three countries.
I have added nested property in the city index
...
"city": {
"type": "nested",
...
I want to query all cities and get a paged result.
For instance 3 hits will return city1, city2, city3
I want to filter by country name
I tried
GET /127.0.0.1:9200/country_city/_search
{
"from": 0,
"size": 2,
"fields": [
"city.id", "city.name"
]
}
and
GET /127.0.0.1:9200/country_city/country/_search?_source=false
{
"query": {
"nested": {
"path": "city",
"query": {
"match_all": {}
},
"inner_hits": {
"sort": "city.id",
"from": 0,
"size": 3
}
}
},
"fields": [
"name",
"city.id",
"city.name"
]
}
But the first returned two 4 cities instead of 2.
(2 countries have 2 cities each)
The second returned all documents(although size is 2 in the request) and in an inner element returned the first 3 cities of each country.
How Can I get a page size of the nested object?
And then progress to the next page?
This should work
Mappings
{
"mappings": {
"type": {
"properties": {
"country": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text"
},
"city": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
Query
{
"query": {
"nested": {
"path": "country",
"inner_hits": {},
"query": {
"nested": {
"path": "country.city",
"query": {
"match_all": {}
},
"inner_hits": {
"from": 0,
"size": 1,
"_source": {
"includes": ["country.city.name", "country.city.id"]
}
}
}
}
}
}
}
github bug
source filtering
Thanks

Elasticsearch how to sort with condition

On my ElasticSearch (2.x) I have documents like this:
{
"title": "A good title",
"formats": [{
"name": "pdf",
"prices": [{
"price": 11.99,
"currency": "EUR"
}, {
"price": 18.99,
"currency": "AUD"
}]
}]
}
I'd like to sort documents by formats.prices.price but only where the formats.prices.currency === 'EUR'
I tried to do a nested field on formats.prices and then run this query:
{
"query": {
"filtered": {
"query": {
"and": [
{
"match_all": {}
}
]
}
}
},
"sort": {
"formats.prices.price": {
"order": "desc",
"nested_path": "formats.prices",
"nested_filter": {
"term": {
"currency": "EUR"
}
}
}
}
}
But unfortunately I cannot get the right order.
UPDATE:
Relevant part of mapping:
"formats": {
"properties": {
"name": {
"type": "string"
},
"prices": {
"type": "nested",
"include_in_parent": true,
"properties": {
"currency": {
"type": "string"
},
"price": {
"type": "double"
}
}
}
}
},
i hope this will solve your problem
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "formats.prices",
"filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}
},
"from": 0,
"size": 50,
"sort": [
{
"formats.prices.price": {
"order": "asc",
"nested_path": "formats.prices",
"nested_filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}

Resources