Elasticsearch Filter - AND/OR behaviour - elasticsearch

I have this query where I am searching for all documents which match type: location and then applying a filter on the result using exact match on postalCode and countryCode but a prefix on the address.
The filter works fine and behaves as an AND condition i.e all 3 matches. How can I achieve an OR condition in the filter? With the OR condition - It should return results even if one filter matches.
Elasticsearch version - 7.9
GET index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"type": "location"
}
}
],
"filter": [
{
"term": {
"postalCode": "12345"
}
},
{
"prefix": {
"address": "555"
}
},
{
"term": {
"countryCode": "US"
}
}
]
}
}
}

You can use a combination of bool should clause inside the filter clause.
Adding a working example with index data,search query, and search result
Index Data:
{
"postalCode": "12345",
"address": "555",
"countryCode": "US",
"type":"location"
}
{
"postalCode": "9",
"address": "555",
"countryCode": "US",
"type":"location"
}
{
"postalCode": "9",
"address": "4",
"countryCode": "US",
"type":"location"
}
{
"postalCode": "9",
"address": "4",
"countryCode": "AK",
"type":"location"
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"match": {
"type": "location"
}
}
],
"filter": [
{
"bool": {
"should": [
{
"term": {
"postalCode": "12345"
}
},
{
"prefix": {
"address": "555"
}
},
{
"term": {
"countryCode.keyword": "US"
}
}
],
"minimum_should_match":1
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "65192559",
"_type": "_doc",
"_id": "2",
"_score": 0.10536051,
"_source": {
"postalCode": "9",
"address": "555",
"countryCode": "US",
"type": "location"
}
},
{
"_index": "65192559",
"_type": "_doc",
"_id": "1",
"_score": 0.10536051,
"_source": {
"postalCode": "12345",
"address": "555",
"countryCode": "US",
"type": "location"
}
},
{
"_index": "65192559",
"_type": "_doc",
"_id": "3",
"_score": 0.10536051,
"_source": {
"postalCode": "9",
"address": "4",
"countryCode": "US",
"type": "location"
}
}
]

Related

ElasticSearch wildcard highlighting with hyphen

I am having trouble with wildcard query. When i have some hyphen - it does not highlight anything after it. I played with highlight settings but did not found any solution yet. Is it normal behavior?
I am making some index:
PUT testhighlight
PUT testhighlight/_mapping/_doc
{
"properties": {
"title": {
"type": "text",
"term_vector": "with_positions_offsets"
},
"content": {
"type": "text",
"term_vector": "with_positions_offsets"
}
}
}
Then i create documents:
PUT testhighlight/_doc/1
{
"title": "1",
"content": "test-input"
}
PUT testhighlight/_doc/2
{
"title": "2",
"content": "test input"
}
PUT testhighlight/_doc/3
{
"title": "3",
"content": "testinput"
}
Then i execute this search request:
GET testhighlight/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"title",
"content"
],
"query": "test*"
}
}
]
}
},
"highlight": {
"fields": {
"content": {
"boundary_max_scan": 10,
"fragment_offset": 5,
"fragment_size": 250,
"type": "fvh",
"number_of_fragments": 5,
"order": "score",
"boundary_scanner": "word",
"post_tags": [
"</span>"
],
"pre_tags": [
"""<span class="highlight-search">"""
]
}
}
}
}
It returns these hits:
"hits": [
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"title": "2",
"content": "test input"
},
"highlight": {
"content": [
"""<span class="highlight-search">test</span> input"""
]
}
},
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"title": "1",
"content": "test-input"
},
"highlight": {
"content": [
"""<span class="highlight-search">test</span>-input"""
]
}
},
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"title": "3",
"content": "testinput"
},
"highlight": {
"content": [
"""<span class="highlight-search">testinput</span>"""
]
}
}
]
It looks alright, but didn't highlighted the whole "test-input" in document with ID 1. Is there any way to do so?

ElasticSearch compound queries

My index data is
{
"first_name":"Kevin",
"last_name":"John",
"job": "IT"
}
{
"first_name":"John",
"last_name":"Thimothy",
"job": "Accountant"
}
{
"first_name":"Eric",
"last_name":"Villa",
"job": "Driver"
}
{
"first_name":"John",
"last_name":"Villa",
"job": "Student"
}
I am not sure if anyone could help me to build a query to get data that have first_name or last_name as John and have a job as IT or Student.
You need to use a combination of the bool/must/should clause
Search Query:
{
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"first_name": "John"
}
},
{
"match": {
"last_name": "John"
}
}
]
}
},
{
"bool": {
"should": [
{
"match": {
"job": "IT"
}
},
{
"match": {
"job": "student"
}
}
]
}
}
]
}
}
}
Search Result will be
"hits": [
{
"_index": "66982646",
"_type": "_doc",
"_id": "1",
"_score": 2.4079456,
"_source": {
"first_name": "Kevin",
"last_name": "John",
"job": "IT"
}
},
{
"_index": "66982646",
"_type": "_doc",
"_id": "4",
"_score": 1.89712,
"_source": {
"first_name": "John",
"last_name": "Villa",
"job": "Student"
}
}
]

elasticsearch filter on nested array

lets say records have city field as an array of city names.
records ex:
record 1:
{
cities : [
{name: city1},
{name : city2},
{name : city3}
]
}
record 2:
{
cities : [
{name: city2},
{name : city3},
{name : city4}
]
}
record 3:
{
cities : [
{name: city3},
{name : city4},
{name : city5}
]
}
requirement:
My filter criteria is to fetch the records matches with city1 or city2 or city3 but since the record 1 matches all 3 it should come first and record 2 matches 2 so it should come 2nd and record 3 matches only one so it should come last.
You don't have to use the nested data-type as you don't have the nested properties or complex object, its very simple and easy to achieve.
Working example
Index mapping
{
"mappings": {
"properties": {
"cities": {
"type": "text"
}
}
}
}
Index sample docs
{
"cities": [
"tel-aviv", "bangalore", "sf"
]
}
{
"cities": [
"tel-aviv"
]
}
{
"cities": [
"sf"
]
}
Search query
{
"query": {
"bool": {
"should": [
{
"match": {
"cities": "tel-aviv"
}
},
{
"match": {
"cities": "bangalore"
}
},
{
"match": {
"cities": "sf"
}
}
]
}
}
}
And search result with proper expected result and score
"hits": [
{
"_index": "cities",
"_type": "_doc",
"_id": "1",
"_score": 1.850198,
"_source": {
"cities": [
"tel-aviv",
"bangalore",
"sf"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "2",
"_score": 0.9983525,
"_source": {
"cities": [
"tel-aviv"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "3",
"_score": 0.6133945,
"_source": {
"cities": [
"sf"
]
}
}
]
Adding another answer with nested bool queries:
Index Mapping:
{
"mappings": {
"properties":{
"Cities": {
"type": "nested",
"dynamic": "true"
}
}}
}
Index Data:
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Bangalore"
}
}
]
}
}
}
},
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Hyderabad"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "1",
"_score": 3.297317, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
},
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "2",
"_score": 1.6486585, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
}
]

Elasticsearch Similar Text Query

Given the following documents in an index (lets call it addresses):
{
ADDRESS: {
ID: 1,
LINE1: "steet 1",
CITY: "kuala lumpur",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 2,
LINE1: "steet 1",
CITY: "kualalumpur city",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 3,
LINE1: "steet 1",
CITY: "kualalumpur",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 4,
LINE1: "steet 1",
CITY: "kuala lumpur city",
COUNTRY: "MALAYSIA",
...
}
}
At this point, I found the query to grab "kualalumpur", "kuala lumpur", "kualalumpur city" with the search text "kualalumpur".
But "kuala lumpur city" is missing from the result despite near similarity with "kualalumpur city".
Here is my query so far:
{
"query": {
"bool": {
"should": [
{"match": {"ADDRESS.STREET": {"query": "street 1", "fuzziness": 1, "operator": "AND"}}},
{
"bool": {
"should": [
{"match": {"ADDRESS.CITY": {"query": "kualalumpur", "fuzziness": 1, "operator": "OR"}}},
{"match": {"ADDRESS.CITY.keyword": {"query": "kualalumpur", "fuzziness": 1, "operator": "OR"}}}
]
}
}
],
"filter": {
"bool": {
"must": [
{"term": {"ADDRESS.COUNTRY.keyword": "MALAYSIA"}}
]
}
},
"minimum_should_match": 2
}
}
}
Given the condition, is it possible at all for Elasticsearch to return all four documents with search text "kualalumpur"?
You can use edge-n gram tokenizer on the country field to get the all four docs, tried it in my local and adding below working example.
Create custom analyzer and apply it on your field
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "edgeNGramTokenizer"
}
},
"tokenizer": {
"edgeNGramTokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "edgeNGram",
"max_gram": "40"
}
}
},
"max_ngram_diff": "50"
}
},
"mappings": {
"properties": {
"country": {
"type": "text",
"analyzer" : "ngram_analyzer"
}
}
}
}
Index your all four sample docs, like below
{
"country" : "kuala lumpur"
}
search query with term kualalumpur matches all four docs
{
"query": {
"match" : {
"country" : "kualalumpur"
}
}
}
"hits": [
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "3",
"_score": 5.0003963,
"_source": {
"country": "kualalumpur"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "2",
"_score": 4.4082437,
"_source": {
"country": "kualalumpur city"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "1",
"_score": 0.5621849,
"_source": {
"country": "kuala lumpur"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "4",
"_score": 0.4956103,
"_source": {
"country": "kuala lumpur city"
}
}
]

How to aggregate on nested objects in elasticsearch

I have the following mapping in ES:
"mappings": {
"products": {
"properties": {
"product": {
"type" : "nested",
"properties": {
"features": {
"type": "nested"
},
"sitedetails": {
"type": "nested"
}
}
}
}
}
}
and then 3 products like this:
"hits": [
{
"_index": "catalog",
"_type": "products",
"_id": "AVNE8F4mFYOWvB4rMqdO",
"_score": 1,
"_source": {
"product": {
"ean": "abc",
"features": {
"productType": "DVD player"
},
"color": "Black",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "zzz",
"url": "http://www.amazon.com/dp/zzz"
}
],
"category": "Portable DVD Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_id": "AVNE8XkXFYOWvB4rMqdQ",
"_score": 1,
"_source": {
"product": {
"ean": "def",
"features": {
"ProductType": "MP3 player"
},
"color": "Black",
"manufacturer": "LG",
"sitedetails": [
{
"name": "amazon.com",
"sku": "aaa",
"url": "http://www.amazon.com/dp/aaa"
}
],
"category": "MP3 Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_id": "AVNIh-xVWwxj6Cz_r8AT",
"_score": 1,
"_source": {
"product": {
"ean": "abc",
"features": {
"productType": "DVD player"
},
"color": "White",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "ggg",
"url": "http://www.amazon.com/dp/ggg"
}
],
"category": "Portable DVD Players"
}
}
}
]
I need to display on the UI side 2 filters, one for Manufacturer and one for website.
How can I aggregate on product.manufacturer and product.sitedetails.name?
tnx!
Figured it out:
GET /catalog/products/_search
{
"aggs": {
"byManufacturer": {
"nested": {
"path": "product"
},
"aggs": {
"byManufacturer": {
"terms": {
"field": "product.manufacturer"
}
}
}
},
"bySeller": {
"nested": {
"path": "product.sitedetails"
},
"aggs": {
"bySeller": {
"terms": {
"field": "product.sitedetails.name"
}
}
}
}
}
}

Resources