Not able to search elasticsearch document - elasticsearch

I am newbie at elasticsearch. Using elasticsearch 7.8.1 for some custom search for my application.
Here is the sample dataset.
The search that need to happen is something like this:
select * from maintenance_logs
where vinNumber = "xyz"
and organizationId = 1
and dtcCode like %p101%
or subSystem like %p101%
or description like %p101%;
Here is the document stored:
GET /maintenance_logs/_search
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "maintenance_logs",
"_type" : "_doc",
"_id" : "41a47230-02d1-11ed-a8f8-813988188fd2",
"_score" : 1.0,
"_source" : {
"_class" : "com.domain.search.MaintenanceLog",
"id" : "41a47230-02d1-11ed-a8f8-813988188fd2",
"maintenanceActivity" : "test103",
"vinNumber" : "DH34ASD7SDFF84742",
"organizationId" : 1,
"partitionYear" : "2022",
"dtcCode" : "",
"subSystem" : "",
"description" : "",
"odometer" : 91000,
"statsDate" : "2022-07-13"
}
},
{
"_index" : "maintenance_logs",
"_type" : "_doc",
"_id" : "5fac7720-033d-11ed-97e1-a3441dab3d6a",
"_score" : 1.0,
"_source" : {
"_class" : "com.search.MaintenanceLog",
"id" : "5fac7720-033d-11ed-97e1-a3441dab3d6a",
"maintenanceActivity" : "test103",
"vinNumber" : "DH34ASD7SDFF84742",
"organizationId" : 1,
"partitionYear" : "2022",
"dtcCode" : "D101",
"subSystem" : "ac vent",
"description" : "ac vent replaced",
"odometer" : 91000,
"statsDate" : "2022-07-14"
}
}
]
}
}
This is how my Document looks:
#Document(indexName = "maintenance_logs", createIndex = true)
public class MaintenanceLog {
#Id
private String id;
private String maintenanceActivity;
private String vinNumber;
private Integer organizationId;
private String partitionYear;
private String dtcCode;
private String subSystem;
private String description;
private Integer odometer;
}
Here is my query: The intention is, I have a search bar where lets say I typed p101. Then it should look through all the documents.
do exact match on vin_number and organizatinid, and then whatever matches (partial match eg mysql like query) from any one of these attributes dtcCode or subSystem or maintenanceActivity or description.
GET /maintenance_logs/_search
{
"query": {
"bool" : {
"must" : [
{ "term" : { "vinNumber" : "DH34ASD7SDFF84742" } },
{ "term" : { "organizationId" : 1 } }
],
"should" : [
{ "term" : { "dtcCode": "p101*" } },
{ "term" : { "subSystem" : "p101*" }},
{ "term" : { "maintenanceActivity" : "p101*" }},
{ "term" : { "description" : "p101*" }}
],
"minimum_should_match" : 1,
"boost" : 1.0
}
}
}

You have two issues with your query:
One is you are using term query instead of wildcard query pattern.
Second is you are trying term query text type of field for vinNumber field.
To resolve this issue, You need to use wildcard query instead of term query and you need to use vinNumber.keyword insted of vinNumber (considering you are using multi type field for vinNumber as text and keyword both). Please check below query:
{
"query": {
"bool": {
"must": [
{
"term": {
"vinNumber.keyword": "DH34ASD7SDFF84742"
}
},
{
"term": {
"organizationId": 1
}
}
],
"minimum_should_match": 1,
"should": [
{
"wildcard": {
"dtcCode": {
"value": "d10*"
}
}
},
{
"wildcard": {
"subSystem": {
"value": "p101*"
}
}
},
{
"wildcard": {
"maintenanceActivity": {
"value": "p101*"
}
}
},
{
"wildcard": {
"description": {
"value": "p101*"
}
}
}
]
}
}
}
Also, you need to set "minimum_should_match": 1 as your query have AND condition with vinNumberand organizationId.

Term queries are used for exact text matching. If you need to do the partial search you can either use Wildcard Query, Regexp Query or Query String
If you are using the default mapping then you need to modify your query as :
{
"query": {
"bool": {
"must": [
{
"term": {
"vinNumber.keyword": "DH34ASD7SDFF84742"
}
},
{
"term": {
"organizationId": 1
}
}
],
"should": [
{
"query_string": {
"query": "*p101*",
"fields": [
"dtcCode"
]
}
},
{
"query_string": {
"query": "*p101*",
"fields": [
"subSystem"
]
}
},
{
"query_string": {
"query": "*p101*",
"fields": [
"maintenanceActivity"
]
}
},
{
"query_string": {
"query": "*p101*",
"fields": [
"description"
]
}
}
],
"minimum_should_match": 1
}
}
}
Note :
If you need to perform a partial search in such a way that, the text matches from the beginning of the value of the fields dtcCode,subSystem, etc. then you can simply go with Prefix Query as well.

Related

elasticsearch filter nested object

I have an index with a nested object containing two attributes namely scopeId and categoryName. Following is the mappings part of the index
"mappedCategories" : {
"type" : "nested",
"properties": {
"scopeId": {"type":"long"},
"categoryName": {"type":"text",
"analyzer" : "productSearchAnalyzer",
"search_analyzer" : "productSearchQueryAnalyzer"}
}
}
A sample document containing the nested mappedCategories object is as follows:
POST productsearchna_2/_doc/1
{
"categoryName" : "Operating Systems",
"contexts" : [
0
],
"countryCode" : "US",
"id" : "10076327-1",
"languageCode" : "EN",
"localeId" : 1,
"mfgpartno" : "test123",
"manufacturerName" : "Hewlett Packard Enterprise",
"productDescription" : "HPE Microsoft Windows 2000 Datacenter Server - Complete Product - Complete Product - 1 Server - Standard",
"productId" : 10076327,
"skus" : [
{"sku": "43233004",
"skuName": "UNSPSC"},
{"sku": "43233049",
"skuName": "SP Richards"},
{"sku": "43234949",
"skuName": "Ingram Micro"}
],
"mappedCategories" : [
{"scopeId": 3228552,
"categoryName": "Laminate Bookcases"},
{"scopeId": 3228553,
"categoryName": "Bookcases"},
{"scopeId": 3228554,
"categoryName": "Laptop"}
]
}
I want to filter categoryName "lap" on scopeId: 3228553 i.e. my query should return 0 hits since Laptop is mapped to scopeId 3228554. But my following query is returning 1 hit with scopeId : 3228554
POST productsearchna_2/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "mappedCategories",
"query": {
"term": {
"mappedCategories.categoryName": "lap"
}
},
"inner_hits": {}
}
}
],
"filter": [
{
"nested": {
"path": "mappedCategories",
"query": {
"term": {
"mappedCategories.scopeId": {
"value": 3228552
}
}
}
}
}
]
}
},
"_source": ["mappedCategories.categoryName", "productId"]
}
Following is part of the result of the query:
"inner_hits" : {
"mappedCategories" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.5586993,
"hits" : [
{
"_index" : "productsearchna_2",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "mappedCategories",
"offset" : 2
},
"_score" : 1.5586993,
"_source" : {
"scopeId" : 3228554,
"categoryName" : "Laptop"
}
}
]
}
}
I want my query to return zero hits, and in case I search for "book" with scopeId: 3228552, I want my query to return 2 hits, 1 for Bookcases and another for Laminate Bookcases categoryNames. Please help.
This query solves part of the problem but when searching for book" with scopeId: 3228552 it will only get 1 result.
GET idx_test/_search?filter_path=hits.hits.inner_hits
{
"query": {
"nested": {
"path": "mappedCategories",
"query": {
"bool": {
"filter": [
{
"term": {
"mappedCategories.scopeId": {
"value": 3228553
}
}
}
],
"must": [
{
"match": {
"mappedCategories.categoryName": "laptop"
}
}
]
}
},
"inner_hits": {}
}
}
}

Elasticsearch filter by multiple fields in an object which is in an array field

The goal is to filter products with multiple prices.
The data looks like this:
{
"name":"a",
"price":[
{
"membershipLevel":"Gold",
"price":"5"
},
{
"membershipLevel":"Silver",
"price":"50"
},
{
"membershipLevel":"Bronze",
"price":"100"
}
]
}
I would like to filter by membershipLevel and price. For example, if I am a silver member and query price range 0-10, the product should not appear, but if I am a gold member, the product "a" should appear. Is this kind of query supported by Elasticsearch?
You need to make use of nested datatype for price and make use of nested query for your use case.
Please see the below mapping, sample document, query and response:
Mapping:
PUT my_price_index
{
"mappings": {
"properties": {
"name":{
"type":"text"
},
"price":{
"type":"nested",
"properties": {
"membershipLevel":{
"type":"keyword"
},
"price":{
"type":"double"
}
}
}
}
}
}
Sample Document:
POST my_price_index/_doc/1
{
"name":"a",
"price":[
{
"membershipLevel":"Gold",
"price":"5"
},
{
"membershipLevel":"Silver",
"price":"50"
},
{
"membershipLevel":"Bronze",
"price":"100"
}
]
}
Query:
POST my_price_index/_search
{
"query": {
"nested": {
"path": "price",
"query": {
"bool": {
"must": [
{
"term": {
"price.membershipLevel": "Gold"
}
},
{
"range": {
"price.price": {
"gte": 0,
"lte": 10
}
}
}
]
}
},
"inner_hits": {} <---- Do note this.
}
}
}
The above query means, I want to return all the documents having price.price range from 0 to 10 and price.membershipLevel as Gold.
Notice that I've made use of inner_hits. The reason is despite being a nested document, ES as response would return the entire set of document instead of only the document specific to where the query clause is applicable.
In order to find the exact nested doc that has been matched, you would need to make use of inner_hits.
Below is how the response would return.
Response:
{
"took" : 128,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.9808291,
"hits" : [
{
"_index" : "my_price_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.9808291,
"_source" : {
"name" : "a",
"price" : [
{
"membershipLevel" : "Gold",
"price" : "5"
},
{
"membershipLevel" : "Silver",
"price" : "50"
},
{
"membershipLevel" : "Bronze",
"price" : "100"
}
]
},
"inner_hits" : {
"price" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.9808291,
"hits" : [
{
"_index" : "my_price_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "price",
"offset" : 0
},
"_score" : 1.9808291,
"_source" : {
"membershipLevel" : "Gold",
"price" : "5"
}
}
]
}
}
}
}
]
}
}
Hope this helps!
Let me take show you how to do it, using the nested fields and query and filter context. I will take your example to show, you how to define index mapping, index sample documents, and search query.
It's important to note the include_in_parent param in Elasticsearch mapping, which allows us to use these nested fields without using the nested fields.
Please refer to Elasticsearch documentation about it.
If true, all fields in the nested object are also added to the parent
document as standard (flat) fields. Defaults to false.
Index Def
{
"mappings": {
"properties": {
"product": {
"type": "nested",
"include_in_parent": true
}
}
}
}
Index sample docs
{
"product": {
"price" : 5,
"membershipLevel" : "Gold"
}
}
{
"product": {
"price" : 50,
"membershipLevel" : "Silver"
}
}
{
"product": {
"price" : 100,
"membershipLevel" : "Bronze"
}
}
Search query to show Gold with price range 0-10
{
"query": {
"bool": {
"must": [
{
"match": {
"product.membershipLevel": "Gold"
}
}
],
"filter": [
{
"range": {
"product.price": {
"gte": 0,
"lte" : 10
}
}
}
]
}
}
}
Result
"hits": [
{
"_index": "so-60620921-nested",
"_type": "_doc",
"_id": "1",
"_score": 1.0296195,
"_source": {
"product": {
"price": 5,
"membershipLevel": "Gold"
}
}
}
]
Search query to exclude Silver, with same price range
{
"query": {
"bool": {
"must": [
{
"match": {
"product.membershipLevel": "Silver"
}
}
],
"filter": [
{
"range": {
"product.price": {
"gte": 0,
"lte" : 10
}
}
}
]
}
}
}
Above query doesn't return any result as there isn't any matching result.
P.S :- this SO answer might help you to understand nested fields and query on them in detail.
You have to use Nested fields and nested query to archive this: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-nested-query.html
Define you Price property with type "Nested" and then you will be able to filter by every property of nested object

Combining nested query get illegal_state_exception failed to find nested object under path

I'm creating a query on Elasticsearch, for find documents through all indices.
I need to combine should, must and nested query on Elasticsearch, i get the right result but i get an error inside the result.
This is the query I'm using
GET _all/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{ "term": { "trimmed_final_url": "https://www.repubblica.it/t.../" } }
],
"must": [
{
"nested": {
"path": "entities",
"query": {
"bool": {
"must": [
{ "term": { "entities.id": "138511" } }
]
}
}
}
},
{
"term": {
"language": { "value": "it" }
}
}
]
}
}
And this is the result
{
"_shards" : {
"total" : 38,
"successful" : 14,
"skipped" : 0,
"failed" : 24,
"failures" : [
{
"shard" : 0,
"index" : ".kibana_1",
"node" : "7twsq85TSK60LkY0UiuWzA",
"reason" : {
"type" : "query_shard_exception",
"reason" : """
failed to create query: {
...
"index_uuid" : "HoHi97QFSaSCp09iSKY1DQ",
"index" : ".reporting-2019.06.02",
"caused_by" : {
"type" : "illegal_state_exception",
"reason" : "[nested] failed to find nested object under path [entities]"
}
}
},
...
"hits" : {
"total" : {
"value" : 50,
"relation" : "eq"
},
"max_score" : 16.90015,
"hits" : [
{
"_index" : "i_201906_v1",
"_type" : "_doc",
"_id" : "MugcbmsBAzi8a0oJt96Q",
"_score" : 16.90015,
"_source" : {
"language" : "it",
"entities" : [
{
"id" : 101580,
},
{
"id" : 156822,
},
...
I didn't write some fields because the code is too long
I am new to StackOverFlow (made this account to answer this question :D) so if this answer is out of line bear with me. I have been dabbling in nested fields in Elasticsearch recently so I have some ideas as to how this error could be appearing.
Have you defined a mapping for your document type? I don't believe Elasticsearch will recognize the field as nested if you do not tell it to do so in the mapping:
PUT INDEX_NAME
{
"mappings": {
"DOC_TYPE": {
"properties": {
"entities": {"type": "nested"}
}
}
}
}
You may have to specify this mapping for each index and document type. Not sure if there is a way to do that all with one request.
I also noticed you have a "should" clause with minimum matches set to 1. I believe this is exactly the same as a "must" clause so I am not sure what purpose this achieves (correct me if I'm wrong). If your mapping is specified, the query should look something like this:
GET /_all/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "entities",
"query": {
"term": {
"entities.id": {
"value": "138511"
}
}
}
}
},
{
"term": {
"language": {
"value": "it"
}
}
},
{
"term": {
"trimmed_final_url": {
"value": "https://www.repubblica.it/t.../"
}
}
}
]
}
}
}

access query value from function_score to compute new score

I need to customize ES score. The score function I need to implement is:
score = len(document_term) - len(query_term)
For instance, one of my document in the ES index is :
{
"name": "foobar"
}
And the search query
{
"query": {
"function_score": {
"query": {
"match": {
"name": {
"query": "foo"
}
}
},
"functions": [
{
"script_score": {
"script": {
"source": "doc['name'].value.length() - ?LEN(query_tem)?"
}
}
}
],
"boost_mode": "replace"
}
}
}
The above search should provide a score of 6 - 3 = 3. But I didn't find a solution to get access the value of the query term.
Is it possible to access the value of the query term in a function_score context ?
There is no direct way to do this, however you can achieve that in the below way where you would need to add the query parameters in two different parts of the query.
Before that one important note, you cannot apply the doc['myfield'].value if the field is of type text, instead you would need to have its sibling field created as keyword and refer that in the script, which again I've mentioned below:
Mapping:
PUT myindex
{
"mappings" : {
"properties" : {
"myfield" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
Sample Document:
POST myquery/_doc/1
{
"myfield": "I've become comfortably numb"
}
Query:
POST <your_index_name>/_search
{
"query": {
"function_score": {
"query": {
"match": {
"myfield": "numb"
}
},
"functions": [
{
"script_score": {
"script": {
"source": "return doc['myfield.keyword'].value.length() - params.myquery.length()",
"params": {
"myquery": "numb" <---- Add the query string here as well
}
}
}
}
],
"boost_mode": "replace"
}
}
}
Response:
{
"took" : 558,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 24.0,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "1",
"_score" : 24.0,
"_source" : {
"myfield" : "I've become comfortably numb"
}
}
]
}
}
Hope this helps!

Elastic search: exact match query on string array

Given this document:
{"name": "Perfect Sunny-Side Up Eggs","ingredientList": ["canola oil","eggs"]}
How can I build a query in elastic search to return exact matches on a string array given query term "oil eggs", so far this it what I have, but it returns other irrelevant documents:
POST /recipes/recipe/_search
{
"query": {
"match": {
"ingredientList": {
"query": [
"oil",
"eggs"
],
"operator": "and"
}
}
}
}
for instance, this document is returned but it doesn't contain "oil". Results should only contain "oil" and "eggs":
{"name": "Quick Baked French Toast","ingredientList": ["butter","cinnamon raisin bread","eggs"]}
Your query will look like this:
{
"query": {
"bool": {
"must": [
{
"term": {
"ingredientList": "oil"
}
},
{
"term": {
"ingredientList": "eggs"
}
}
]
}
}
}
Gives me the results:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [ {
"_index" : "ingredients",
"_type" : "recipe",
"_id" : "AVeprXFrNutW6yNguPqp",
"_score" : 1.0,
"_source" : {
"name" : "Perfect Sunny-Side Up Eggs",
"ingredientList" : [ "canola oil", "eggs" ]
}
} ]
}
}
Elastic dont have API to exact match array. But same can be achieved using two methods:
Using multiple must blocks (not preferred)
Using terms set query and script
"query": {
"bool": {
"must": [
{
"terms_set": {
"ingredientList": {
"terms": ingredients,
"minimum_should_match_script": {
"source": "Math.min(params.num_terms, {})".format(len(ingredients))
}
}
}
},
{
"script": {
"script": {
"inline": "doc['ingredientList'].length == params.list_length",
"lang": "painless",
"params": {
"list_length": len(ingredients)
}
}
}
}
]
}
}

Resources