ElasticSearch DSL: query on custom tags too - elasticsearch

I'm new to ES, but already have a basic query that I need to extend.
The query is currently doing a search based on keywords and also on geo-distance.
Now, I have added a custom tag into my index, and I wish to take it into account too.
I wish to filter and score my results based on the imageTags (tag + score)!
PS: I have seen other similar posts, but I can't figure out how to adapt my query (none of my attempts work).
Here is my query:
GET /posts_index/_search
{
"track_total_hits": true,
"from":0,
"size":10,
"_source": [
"id",
"tags",
"taxonomies",
"storesNames",
"geoLocations",
"storesIds",
"imageUri"
],
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"distance_feature": {
"field": "geoLocations",
"pivot": "10km",
"origin": [
-71.3,
41.15
]
}
},
{
"distance_feature": {
"field": "creationTime",
"pivot": "14d",
"origin": "now"
}
},
{
"query_string": {
"fields": [
"tags^3",
"taxonomies^5"
],
"query": "",
"fuzziness": "auto"
}
},
]
}
},
"functions": [
{
"script_score": {
"script": {
"source": "Math.sqrt(doc['commentsCount'].value)"
}
}
},
{
"script_score": {
"script": {
"source": "Math.log(2 + doc['likesCount'].value)"
}
}
},
{
"script_score": {
"script": {
"source": "Math.log(2 + doc['viewsCount'].value)"
}
}
}
],
"score_mode": "avg"
}
}
}
Here is one of my attempts to modify it:
{
"query": {
"nested": {
"path": "imageTags",
"score_mode": "sum",
"query": {
"function_score": {
"query": {
"match": {
"imageTags.tag.keyword": "tripod"
}
},
"field_value_factor": {
"field": "imageTags.score",
"factor": 1,
"missing": 0
}
}
}
}
}
}
By example, here is and example of the index:
{
"id": "4a9afd93-62bc-e8b2-29b4-39f5b073336d",
"tags": [
"fashion",
"mode",
"summer"
],
"imageTags": [
{
"score": 0.95150965,
"tag": "four-poster"
},
{
"score": 0.014835004,
"tag": "window"
},
{
"score": 0.014835004,
"tag": "shade"
},
{
"score": 0.009375425,
"tag": "sliding"
},
{
"score": 0.009375425,
"tag": "door"
}
],
"taxonomies": [],
"categories": [],
"qualityScore": 0.0,
"geoLocations": [
{
"lat": 50.4651156,
"lon": 4.865208
}
],
"storesIds": [
"ba9b3f59-50aa-8774-11a7-39f5ad58ae1a"
],
"storesNames": [
"Zara Namur"
],
"creationTime": "2020-06-10T12:48:30.5710000Z",
"updateTime": "2020-06-10T12:48:30.5710000Z",
"imageUri": "https://localhost:44359/cdn/e_76372856-f7a0-49cc-d3d9-39f5ad58ad6d/653d147084637b2af68b39f5b0733359.jpg",
"description": "",
"likesCount": 0,
"viewsCount": 0,
"commentsCount": 0,
"ImageTags": [
{
"score": 0.95150965,
"tag": "four-poster"
},
{
"score": 0.014835004,
"tag": "window"
},
{
"score": 0.014835004,
"tag": "shade"
},
{
"score": 0.009375425,
"tag": "sliding"
},
{
"score": 0.009375425,
"tag": "door"
}
]
}

Related

Modify elasticsearch score based on nested field value with filter

Elasticsearch version: 7.11
I have the following structure of index:
PUT /my-test-index
{
"mappings": {
"properties": {
"brand": {
"type": "text"
},
"id": {
"type": "keyword"
},
"availabilityId": {
"type": "integer"
},
"priorities": {
"type": "nested",
"properties": {
"typeId": {
"type": "keyword"
},
"value": {
"type": "integer"
}
}
},
"title": {
"type": "text"
}
}
}
}
For instance, using this data:
POST /my-test-index/_doc
{
"id": 1,
"brand": "Milk One",
"availabilityId": 1,
"title": "Great Value 2% Reduced-Fat Milk, 0.5 Gallon, 64 Fl. Oz",
"priorities": [
{
"typeId": 1,
"value": 3000
},
{
"typeId": 2,
"value": 4000
}
]
}
POST /my-test-index/_doc
{
"id": 2,
"brand": "Milky Two",
"availabilityId": 2,
"title": "Great Value 3.5% Milk Fat Whole Milk, 64 Fl Oz",
"priorities": [
{
"typeId": 1,
"value": 1000
},
{
"typeId": 2,
"value": 3000
}
]
}
POST /my-test-index/_doc
{
"id": 3,
"brand": "Nesty",
"availabilityId": 3,
"title": "Great Value 1% Low-Fat Milk, 128 Fl Oz"
}
POST /my-test-index/_doc
{
"id": 4,
"brand": "No milk",
"availabilityId": 4,
"title": "Almond Breeze Vanilla Almondmilk",
"priorities": [
{
"typeId": 1,
"value": 6000
},
{
"typeId": 2,
"value": 2000
}
]
}
I want:
filtered documents by availabilityId (1, 2, 3)
multi match by title, brand
modify score by multiplying it with log1p by priorities.value, but only with type = 1. If priorities is null, then set it to 6000
sort by score
My query:
POST /my-test-index/_search?typed_keys=true
{
"query": {
"bool": {
"filter": [
{
"bool": {
"filter": [
{
"terms": {
"availabilityId": [
"1",
"2",
"3"
]
}
}
]
}
}
],
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"fields": [
"title^100",
"brand^15"
],
"fuzziness": 0,
"minimum_should_match": "2<80%",
"query": "milk",
"type": "most_fields"
}
}
]
}
}
],
"should": [
{
"nested": {
"path": "priorities",
"query": {
"function_score": {
"query": {
"match": {
"priorities.typeId": "1"
}
},
"functions": [
{
"field_value_factor": {
"field": "priorities.value",
"modifier": "log1p",
"missing": 6000
}
}
]
}
}
}
}
]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_id": {
"order": "asc"
}
}
]
}
Problem:
final score is not multiply, but only sum
documents without priorities is ignored
I solved my problem by using the function script_score:
POST /my-test-index/_search?typed_keys=true
{
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script" : """double priority1 = 6000; if(params._source["priorities"] != null){for (int i = 0; i < params._source["priorities"].length; ++i){ if(params._source["priorities"][i].typeId == 1) priority1 = params._source["priorities"][i].value;}} return Math.log10(priority1 + 1);"""
}
}
],
"query": {
"bool": {
"filter": [
{
"bool": {
"filter": [
{
"terms": {
"availabilityId": [
"1",
"2",
"3"
]
}
}
]
}
}
],
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"fields": [
"title^100",
"brand^15"
],
"fuzziness": 0,
"minimum_should_match": "2<80%",
"query": "milk",
"type": "most_fields"
}
}
]
}
}
]
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_id": {
"order": "asc"
}
}
]
}

Highlight nested object in Elasticsearch

Here is my sample dataset,
{
"parent":[
{
"name":"John Doe 1",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 1",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 2",
"height":100.00,
"width":100.00
}
]
},
{
"name":"John Doe 2",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 3",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 4",
"height":100.00,
"width":100.00
}
]
}
]
}
And my definition:
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"default": {
"type": "simple"
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"parent": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "text"
},
"sex": {
"type": "text"
},
"child": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"height": {
"type": "float"
},
"width": {
"type": "float"
}
}
}
}
}
}
}
}
}
I'm using the following query to look for matches in the parent.name property and can get highlights.
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John",
"fuzziness": "AUTO:3,6",
"prefix_length": "0"
}
}
}
]
}
}
}
}
],
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}
Is there a way to get inline highlights for the matches in the child.name properties also so that it would be easy to find exactly which element of that corresponding array got matched?
For example, for the given sample data, if I search by "Doe", I'm expecting to get 6 hits, whereas if I search by "Jane", I would get only 4.
You can simply add another nested query clause inside you top level should.
Here's how your query should look:
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John Doe 1"
}
}
}
]
}
}
}
},
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.child.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent.child",
"query": {
"bool": {
"must": [
{
"match": {
"parent.child.name": {
"query": "Jane Doe 1"
}
}
}
]
}
}
}
}
],
"minimum_should_match": 1
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}

Filter not working for weighted search

I am pretty new to elasticsearch and have not really got the hold of it. So I have a search, the results of which will be weighed according to the weight of their tags, which works absolutely fine, but later when I introduced a filter, the search always gives me empty results. Here is what I have tried:
{
"nested": {
"path": "tags",
"score_mode": "sum",
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"tags.tag": "big"
}
}
],
"filter": {
"term": {
"type.primary": "audio"
}
}
}
},
"field_value_factor": {
"field": "tags.weight"
},
"boost_mode": "multiply",
"boost": 10
}
}
}
}
The example result with the filter should be something like this:
{
"_index": "assets",
"_type": "Asset",
"_id": "5a1dc3c0848662ee49e36f43s",
"_score": 886.8744,
"_source": {
"name": "And Action Breakbeat",
"meta_data": {
"type": "audio/mp3",
"file_name": "music_zapsplat_and_action_breakbeat.mp3"
},
"file_key": "music_zapsplat_and_action_breakbeat.mp3",
"src": {
"url": "https://exapmle.com/music_zapsplat_and_action_breakbeat.mp3"
},
"type": {
"primary": "AUDIO",
"secondary": "mp3"
},
"thumbnail_url": "https://example.com/thumbnail/audio.jpg",
"tags": [
{
"tag": "big",
"weight": 10
},
{
"tag": "beat",
"weight": 5
},
{
"tag": "music",
"weight": 3.3333333333333335
}
],
"isDeleted": false,
}
}
Thank you!
You cannot match type.primary inside a nested query for tags. Try this query instead:
{
"query": {
"bool": {
"filter": {
"term": {
"type.primary": "audio"
}
},
"must": [
{
"nested": {
"path": "tags",
"query": {
"function_score": {
"query": {
"match_phrase_prefix": {
"tags.tag": "big"
}
},
"field_value_factor": {
"field": "tags.weight"
},
"score_mode": "sum",
"boost_mode": "multiply",
"boost": 10
}
}
}
}
]
}
}
}

Post filters and double nested aggregation with multiple filters in ElasticSearch

This is data sample that I have in my index:
[{
"filters": [
{
"group": "color",
"attribute": "red"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "tables"
},
{
"group": "material",
"attribute": "wood"
}
],
"image": "img",
"itemId": "id"
},
{
"filters": [
{
"group": "color",
"attribute": "green"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "tables"
}
],
"image": "img",
"itemId": "id"
},
{
"filters": [
{
"group": "color",
"attribute": "brown"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "chairs"
},
{
"group": "style",
"attribute": "modern"
}
],
"image": "img",
"itemId": "id"
}]
Example of my query:
{
"size": 48,
"sort": [
{
"sequence": {
"order": "asc"
}
}
],
"aggs": {
"price": {
"range": {
"field": "salePrice",
"ranges": [
{
"to": 50.0
},
{
"from": 50.0,
"to": 100.0
},
{
"from": 100.0,
"to": 250.0
},
{
"from": 250.0,
"to": 500.0
},
{
"from": 500.0,
"to": 750.0
},
{
"from": 750.0,
"to": 1000.0
},
{
"from": 1000.0,
"to": 1500.0
},
{
"from": 1500.0,
"to": 2000.0
},
{
"from": 2000.0,
"to": 2500.0
},
{
"from": 2500.0,
"to": 3000.0
},
{
"from": 3000.0,
"to": 3500.0
},
{
"from": 3500.0
}
]
}
},
"filters": {
"nested": {
"path": "filters"
},
"aggs": {
"groups": {
"terms": {
"field": "filters.group"
},
"aggs": {
"attributes": {
"terms": {
"field": "filters.attribute"
}
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"match": {
"searchPattern": {
"query": "chairs",
"operator": "and"
}
}
}
],
"filter": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "sub category"
}
}
},
{
"term": {
"filters.attribute": {
"value": "tables"
}
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
}
]
}
}
}
As you can see, here I do double aggregation for nested "filters" array, then do filtration and I receive such data:
category:
office(3)
color:
red(1)
green(1)
brown(1)
sub category:
tables(2)
........
And it's logical, because I do filtration and only then do aggregation. But I want to get other sub categories and show there count. So, if user selects several attributes in one group, I want to do Or filtering. If selects attribute in another group, wanna do and filtering. Like so:
{
"size": 48,
"sort": [
{
"sequence": {
"order": "asc"
}
}
],
"aggs": {
"price": {
"range": {
"field": "salePrice",
"ranges": [
{
"to": 50.0
},
{
"from": 50.0,
"to": 100.0
},
{
"from": 100.0,
"to": 250.0
},
{
"from": 250.0,
"to": 500.0
},
{
"from": 500.0,
"to": 750.0
},
{
"from": 750.0,
"to": 1000.0
},
{
"from": 1000.0,
"to": 1500.0
},
{
"from": 1500.0,
"to": 2000.0
},
{
"from": 2000.0,
"to": 2500.0
},
{
"from": 2500.0,
"to": 3000.0
},
{
"from": 3000.0,
"to": 3500.0
},
{
"from": 3500.0
}
]
}
},
"filters": {
"nested": {
"path": "filters"
},
"aggs": {
"groups": {
"terms": {
"field": "filters.group"
},
"aggs": {
"attributes": {
"terms": {
"field": "filters.attribute"
}
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"match": {
"searchPattern": {
"query": "chairs",
"operator": "and"
}
}
}
],
"filter": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "Category"
}
}
},
{
"bool": {
"should": [
{
"term": {
"filters.attribute": {
"value": "Decor"
}
}
},
{
"term": {
"filters.attribute": {
"value": "Patio Dining"
}
}
}
]
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
},
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "Type"
}
}
},
{
"term": {
"filters.attribute": {
"value": "Coverlets"
}
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
}
]
}
}
}
But, of course, this query has the same problem. So, is it possible to solve the problem in ElasticSearch? I found few words about post_filter, but have no ideas how to use it and how it can helps me, because I need to recalculate group attributes each time. Is it possible or I need to "store" group attributes anywhere and show them after each aggregation?

Elasticsearch how to sort with condition

On my ElasticSearch (2.x) I have documents like this:
{
"title": "A good title",
"formats": [{
"name": "pdf",
"prices": [{
"price": 11.99,
"currency": "EUR"
}, {
"price": 18.99,
"currency": "AUD"
}]
}]
}
I'd like to sort documents by formats.prices.price but only where the formats.prices.currency === 'EUR'
I tried to do a nested field on formats.prices and then run this query:
{
"query": {
"filtered": {
"query": {
"and": [
{
"match_all": {}
}
]
}
}
},
"sort": {
"formats.prices.price": {
"order": "desc",
"nested_path": "formats.prices",
"nested_filter": {
"term": {
"currency": "EUR"
}
}
}
}
}
But unfortunately I cannot get the right order.
UPDATE:
Relevant part of mapping:
"formats": {
"properties": {
"name": {
"type": "string"
},
"prices": {
"type": "nested",
"include_in_parent": true,
"properties": {
"currency": {
"type": "string"
},
"price": {
"type": "double"
}
}
}
}
},
i hope this will solve your problem
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "formats.prices",
"filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}
},
"from": 0,
"size": 50,
"sort": [
{
"formats.prices.price": {
"order": "asc",
"nested_path": "formats.prices",
"nested_filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}

Resources