Elasticsearch update a new mapping on index with default values - elasticsearch

I am updating by my index with new properties in Elasticsearch and trying to add default vales for new created properties, i have tried the below approach , but the update query is failing with the following error message 'failed to create query: [nested] nested object under path [summaryTableColumns] is not of nested type
New Mapping
{
"properties": {
"subjectPropertyFields": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"summaryTableColumns": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
}
}
}
Update query using painless that i have tried
{
"script": {
"source": "ctx._source.summaryTableColumns= params.summaryTableColumns",
"lang": "painless",
"params": {
"summaryTableColumns": [
{
"key": "Property Name",
"value": "name"
},
{
"key": "City",
"value": "city"
},
{
"key": "Distance",
"value": "propertyAddress"
},
{
"key": "Units",
"value": "units"
},
{
"key": "Built",
"value": "yearBuilt"
},
{
"key": "Occupancy",
"value": "occupancyAsOfDate"
},
{
"key": "Avg SF",
"value": "avgSf"
},
{
"key": "Avg Rent",
"value": "avgMarketRentSf"
},
{
"key": "Avg Rent/SF",
"value": "avgRentSf"
},
{
"key": "NA",
"value": "NA"
}
]
}
},
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "summaryTableColumns",
"query": {
"bool": {
"filter": {
"exists": {
"field": "summaryTableColumns"
}
}
}
}
}
}
],
"should": {
"bool": {
"must_not": {
"match": {
"templateName": "salesComps"
}
}
}
}
}
}
}
error i am facing
'failed to create query: [nested] nested object under path [summaryTableColumns] is not of nested type

Related

How to aggregate matched terms in a query_string search?

I wish to search wildcard terms in a nested list of dict and then obtain a list of terms and its uuid grouped by matched wildcard.
I've the following mapping in my index:
"mappings": {
"properties": {
"uuid": {
"type": "keyword"
},
"urls": {
"type": "nested",
"properties": {
"url": {
"type": "keyword"
},
"is_visited": {
"type": "boolean"
}
}
}
}
}
and a lot of data such this:
{
"uuid":"afa9ac03-0723-4d66-ae18-08a51e2973bd"
"urls": [
{
"is_visited": true,
"url": "https://www.google.com"
},
{
"is_visited": false,
"url": "https://www.facebook.com"
},
{
"is_visited": true,
"url": "https://www.twitter.com"
},
]
},
{
"uuid":"4a1c695d-756b-4d9d-b3a0-cf524d955884"
"urls": [
{
"is_visited": true,
"url": "https://www.stackoverflow.com"
},
{
"is_visited": false,
"url": "https://www.facebook.com"
},
{
"is_visited": false,
"url": "https://drive.google.com"
},
{
"is_visited": false,
"url": "https://maps.google.com"
},
]
}
...
I wish to search via wildcard "*google.com OR *twitter.com" and obtain something like this:
"hits": [
"*google.com": [
{
"uuid": "4a1c695d-756b-4d9d-b3a0-cf524d955884",
"_source": {
"is_visited": false,
"url": "https://drive.google.com"
}
},
{
"id": "4a1c695d-756b-4d9d-b3a0-cf524d955884",
"_source": {
"is_visited": false,
"url": "https://maps.google.com"
}
},
{
"uuid":"afa9ac03-0723-4d66-ae18-08a51e2973bd",
"_source": {
"is_visited": true,
"url": "https://www.google.com"
}
}
]
"*twitter.com": [
{
"uuid":"afa9ac03-0723-4d66-ae18-08a51e2973bd",
"_source": {
"is_visited": true,
"url": "https://www.twitter.com"
},
},
]
]
This is my (python) search query:
body = {
#"_source": False,
"size": 100,
"query": {
"nested": {
"path": "urls",
"query":{
"query_string":{
"query": f"urls.url:{urlToSearch}",
}
}
,"inner_hits": {
"size":100 # returns top 100 results
}
}
}
}
but it returns an hit for each matched term instead of aggregate them in a list similar to what I would like to get.
EDIT
This is my setting and mapping:
{
"settings": {
"analysis": {
"char_filter": {
"my_filter": {
"type": "mapping",
"mappings": [
"- => _",
]
},
},
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"char_filter": [
"my_filter"
],
"filter": [
"lowercase",
]
}
}
}
},
"mappings": {
"properties": {
"uuid": {
"type": "keyword"
},
"urls": {
"type": "nested",
"properties": {
"url": {
"type": "keyword"
},
"is_visited": {
"type": "boolean"
}
}
}
}
}
}
Elasticsearch will not provide the output you want the way you set up the query.
This scenario to be an aggregation. My suggestion was to apply the nested query and use aggregation on the results.
Attention point wildcard query:
Avoid beginning patterns with * or ?. This can increase the iterations
needed to find matching terms and slow search performance.
{
"size": 0,
"query": {
"nested": {
"path": "urls",
"query": {
"bool": {
"should": [
{
"wildcard": {
"urls.url": {
"value": "*google.com"
}
}
},
{
"wildcard": {
"urls.url": {
"value": "*twitter.com"
}
}
}
]
}
}
}
},
"aggs": {
"agg_providers": {
"nested": {
"path": "urls"
},
"aggs": {
"google.com": {
"terms": {
"field": "urls.url",
"include": ".*google.com",
"size": 10
}
},
"twitter.com": {
"terms": {
"field": "urls.url",
"include": ".*twitter.com",
"size": 10
}
}
}
}
}
}
Results:
"aggregations": {
"agg_providers": {
"doc_count": 7,
"twitter.com": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "https://www.twitter.com",
"doc_count": 1
}
]
},
"google.com": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "https://drive.google.com",
"doc_count": 1
},
{
"key": "https://maps.google.com",
"doc_count": 1
},
{
"key": "https://www.google.com",
"doc_count": 1
}
]
}
}
}

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Nested object retrieval in ElasticSearch query

I'm new in ElasticSearch and I have a few questions regarding nested object retrieval when a specific condition is matched.
I have a tree-like structure as follow:
{
"id": 4,
"sora": [
{
"pContext": {
"context": {
"sT": "D3",
"uT": "ST"
},
"entities": [
{
"name": "premium",
"bName": "premium",
"fT": "site",
"eT": "F_P",
"children": [
{
"name": "capa",
"bName": "capa",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "code",
"bName": "Codes",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "selection A",
"fT": "site",
"eT": "SELECTION_A",
"children": [
{
"name": "A1",
"fT": "site",
"eT": "ADD",
"children": []
},
{
"name": "A2",
"fT": "site",
"eT": "ADD",
"children": []
}
]
}
]
}
]
}
},
{
"pContext": {
"context": {
"sT": "D2",
"uT": "ST"
},
"entities": [
{
"name": "112",
"bName": "112",
"eT": "D_TYPE",
"children": []
}
]
}
}
]
}
My structure can have more levels.
I have many documents as described above. In order to filter my document I can use the simple query sintax:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
},
"match": {
"sora.pContext.entities.name": "premium"
},
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
What I would like to know is, how can I get the nested object that
matches my query and their children. I need the object that matched
the must inclusive filter. Is that possible?
How can I search for a field without specifing the path?
Thanks
# EDIT
My mapping:
{
"mappings": {
"abc": {
"properties": {
"id": {
"type": "integer"
},
"sora": {
"type": "nested",
"properties": {
"pContext": {
"type": "nested",
"properties": {
"context": {
"type": "nested",
"properties": {
"sT": {
"type": "text"
},
"uT": {
"type": "text"
}
}
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"bName": {
"type": "text"
},
"fT": {
"type": "text"
},
"eT": {
"type": "text"
},
"children": {
"type": "object"
}
}
}
}
}
}
}
}
}
}
}
Yes you can get the matching objects by using inner_hits along with nested query and not the one you added to the question.
Your query will look as below:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"filter": [
{
"nested": {
"inner_hits": {},
"path": "sora.pContext",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sora.pContext.context",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
}
}
]
}
}
}
},
{
"nested": {
"path": "sora.pContext.entities",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.entities.name": "premium"
}
},
{
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
I have added link to inner_hits documentation where you can understand how the results will look like.
Well, if someone else is facing the same issue my solution was added all child in the same path/level as the parent but keep the mapping with parent and their children. With that, I'm able to search and retrieve the parts of the parent as wanted.

How to do aggregation on nested objects - Elasticsearch

I'm pretty new to Elasticsearch so please bear with me.
This is part of my document in ES.
{
"source": {
"detail": {
"attribute": {
"Size": ["32 Gb",4],
"Type": ["Tools",4],
"Brand": ["Sandisk",4],
"Color": ["Black",4],
"Model": ["Sdcz36-032g-b35",4],
"Manufacturer": ["Sandisk",4]
}
},
"title": {
"list": [
"Sandisk Cruzer 32gb Usb 32 Gb Flash Drive , Black - Sdcz36-032g"
]
}
}
}
So what I want to achieve is to find the best three or top three hits of the attribute object. For example, if I do a search for "sandisk", I want to get three attributes like ["Size", "Color", "Model"] or whatever attributes based on the top hits aggregation.
So i did a query like this
{
"size": 0,
"aggs": {
"categoryList": {
"filter": {
"bool": {
"filter": [
{
"term": {
"title.list": "sandisk"
}
}
]
}
},
"aggs": {
"results": {
"terms": {
"field": "detail.attribute",
"size": 3
}
}
}
}
}
}
But it seems to be not working. How do I fix this? Any hints would be much appreciated.
This is the _mappings. It is not the complete one, but I guess this would suffice.
{
"catalog2_0": {
"mappings": {
"product": {
"dynamic": "strict",
"dynamic_templates": [
{
"attributes": {
"path_match": "detail.attribute.*",
"mapping": {
"type": "text"
}
}
}
],
"properties": {
"detail": {
"properties": {
"attMaxScore": {
"type": "scaled_float",
"scaling_factor": 100
},
"attribute": {
"dynamic": "true",
"properties": {
"Brand": {
"type": "text"
},
"Color": {
"type": "text"
},
"MPN": {
"type": "text"
},
"Manufacturer": {
"type": "text"
},
"Model": {
"type": "text"
},
"Operating System": {
"type": "text"
},
"Size": {
"type": "text"
},
"Type": {
"type": "text"
}
}
},
"description": {
"type": "text"
},
"feature": {
"type": "text"
},
"tag": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
}
}
},
"title": {
"properties": {
"en": {
"type": "text"
}
}
}
}
}
}
}
}
According the documentation you can't make aggregation on field that have text datatype. They must have keyword datatype.
Then you can't make aggregation on the detail.attribute field in that way: The detail.attribute field doesn't store any value: it is an object datatype - not a nested one as you have written in the question, that means that it is a container for other field like Size, Brand etc. So you should aggregate against detail.attribute.Size field - if this one was a keyword datatype - for example.
Another presumable error is that you are trying to run a term query on a text datatype - what is the datatype of title.list field?. Term query is a prerogative for field that have keyword datatype, while match query is used to query against text datatype
Here is what I have used for a nested aggs query, minus the actual value names.
The actual field is a keyword, which as already mentioned is required, that is part of a nested JSON object:
"STATUS_ID": {
"type": "keyword",
"index": "not_analyzed",
"doc_values": true
},
Query
GET index name/_search?size=200
{
"aggs": {
"panels": {
"nested": {
"path": "nested path"
},
"aggs": {
"statusCodes": {
"terms": {
"field": "nested path.STATUS.STATUS_ID",
"size": 50
}
}
}
}
}
}
Result
"aggregations": {
"status": {
"doc_count": 12108963,
"statusCodes": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "O",
"doc_count": 5912218
},
{
"key": "C",
"doc_count": 401586
},
{
"key": "E",
"doc_count": 135628
},
{
"key": "Y",
"doc_count": 3742
},
{
"key": "N",
"doc_count": 1012
},
{
"key": "L",
"doc_count": 719
},
{
"key": "R",
"doc_count": 243
},
{
"key": "H",
"doc_count": 86
}
]
}
}

Elastic Search: Bool Query in nested properties

Lets assume I have data structured like this:
{ "id": "120400871755634330808993320",
"name": "Metaalschroef binnenzeskant, DIN 912 RVS A4-80",
"description": "m16x70 cilinderschroef bzk a4-80 din912 klasse 80",
"fullDescription": "Metaalschroef met een binnenzeskant cilinderkop",
"synonyms": [],
"properties": [
{
"name": "draad",
"value": "16",
"sort": 99
},
{
"name": "lengte",
"value": "70",
"sort": 99
},
{
"name": "materiaal",
"value": "roestvaststaal",
"sort": 99
},
{
"name": "kwaliteit (materiaal)",
"value": "A4",
"sort": 99
},
{
"name": "DIN",
"value": "912",
"sort": 99
},
{
"name": "AISI",
"value": "316",
"sort": 99
},
{
"name": "draadsoort",
"value": "metrisch",
"sort": 99
},
{
"name": "Merk",
"value": "Elcee Holland",
"sort": 1
}
]
}
How do I write a boolean query where I select all documents that have a property with name "draad" and value "16" and a property with name "lengte" and value "70".
Right now I have this but it returns 0 results:
"query" : {
"nested" : {
"path" : "properties",
"query" : {
"bool" : {
"must" : [{
"bool" : {
"must" : [{
"term" : {
"properties.name" : "Merk"
}
}, {
"term" : {
"properties.value" : "Facom"
}
}
]
}
}, {
"bool" : {
"must" : [{
"term" : {
"properties.name" : "materiaal"
}
}, {
"term" : {
"properties.value" : "kunststof"
}
}
]
}
}
]
}
}
}
}
Replacing the highest level "must" with "should" returns too many results, which makes sense as it translates to an "or".
When using must, the engine is trying to search for nested documents with name:Merk and value:Facom. But also with name:materiaal and value:kunststof - which is impossible to happen in the same nested document at once.
When using should as you mentioned, it translate to or - which is indeed possible.
Problem is, you also getting the entire parent document with all it's nested documents.
In my own answer I'm showing the steps to create an index with nested documents (you should mark the field properties as nested type`).
After complete those steps, you'll be able to get results with the following query:
{
"_source": [
"id",
"name",
"description"
],
"query": {
"bool": {
"must": [
{
"nested": {
"path": "properties",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"properties.name": "Merk"
}
},
{
"term": {
"properties.value": "Facom"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"properties.name": "materiaal"
}
},
{
"term": {
"properties.value": "kunststof"
}
}
]
}
}
]
}
},
"inner_hits":{
"size": 10
}
}
}
]
}
}
}
I found a solution that is working very well!
My property object now looks like this:
{
"name": "breedte(mm)",
"value": "1000",
"unit": "mm",
"sort": 99,
"nameSlug": "breedte-mm",
"slug": "breedte-mm-1000"
},
I added a slug (containing a normalized string for key + value) and a nameslug which is a normalized string for the name.
My index is mapped like this:
"properties": {
"type": "nested",
"include_in_parent": true,
"properties": {
"name": {
"type": "keyword"
},
"nameSlug": {
"type": "keyword"
},
"slug": {
"type": "keyword"
},
"sort": {
"type": "long"
},
"unit": {
"type": "text",
"index": false
},
"value": {
"type": "keyword"
}
}
}
The "include_in_parent" is important here. It allows me to do the query below:
"query": {
"bool": {
"must": [
{
"terms": {
"properties.slug": [
"merk-orbis",
"merk-bahco"
]
}
},
{
"terms": {
"properties.slug": [
"materiaal-staal",
"materiaal-kunststof"
]
}
}
]
}
},
This queries searches for all documents where "merk" is "Orbis" or "Bahco" and where "materiaal" is "staal" or "kunststof".
My aggregations look like this:
"merk_query": {
"filter": {
"bool": {
"must": [
{
"terms": {
"properties.slug": [
"materiaal-staal",
"materiaal-kunststof"
]
}
}
]
}
},
"aggs": {
"merk_facets": {
"nested": {
"path": "properties"
},
"aggs": {
"merk_only": {
"filter": {
"term": {
"properties.nameSlug": {
"value": "merk"
}
}
},
"aggs": {
"facets": {
"terms": {
"field": "properties.name",
"size": 1
},
"aggs": {
"facetvalues": {
"terms": {
"field": "properties.value",
"size": 10
}
}
}
}
}
}
}
}
}
},
I run filteraggregate which filters all documents that match a facet (but not the current one I am bulding).
The result of this aggragate is something like this:
"merk_query": {
"doc_count": 7686,
"merk_facets": {
"doc_count": 68658,
"merk_only": {
"doc_count": 7659,
"facets": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Merk",
"doc_count": 7659,
"facetvalues": {
"doc_count_error_upper_bound": 10,
"sum_other_doc_count": 438,
"buckets": [
{
"key": "Orbis",
"doc_count": 6295
},
{
"key": "DX",
"doc_count": 344
},
{
"key": "AXA",
"doc_count": 176
},
{
"key": "Talen Tools",
"doc_count": 127
},
{
"key": "Nemef",
"doc_count": 73
},
{
"key": "bonfix",
"doc_count": 67
},
{
"key": "Bahco",
"doc_count": 64
},
{
"key": "Henderson",
"doc_count": 27
},
{
"key": "Maasland Groep",
"doc_count": 25
},
{
"key": "SYSTEC",
"doc_count": 23
}
]
}
}
]
}
}
}
}
},
And this is the end result in the browser:

Resources