How to query elasticsearch nested object using fuzziness and wildcard query - elasticsearch

{
"took":1,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":1,
"max_score":1,
"hits":[
{
"_index":"event_11",
"_type":"_doc",
"_id":"1",
"_score":1,
"_source":{
"title":"Event One",
"comments":{
"author":"Alvin",
"author_id":1
}
},
"inner_hits":{
"comments":{
"hits":{
"total":1,
"max_score":1,
"hits":[
{
"_index":"event_11",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"comments",
"offset":0
},
"_score":1,
"_source":{
"author":"Alvin",
"author_id":1
}
}
]
}
}
}
}
]
}
}
I am trying to query the above data with the below wildcard query:
GET event_11/_search
{
"query": {
"nested": {
"path": "comments",
"query": {
"wildcard": {
"comments.author": "Al*"
}
}
}
}
}
The above query is giving empty result set. Can someone help me fix the search query using wildcard and fuzziness? I am using ElasticSearch 6 and Kibana to create my queries. PHP SDK is used to write queries from PHP application.

You can try this.
{
"query": {
"nested": {
"path": "comments",
"query": {
"bool": {
"should": [
{
"wildcard": {
"comments.author": "real*"
}
},
{
"match": {
"comments.author": {
"query": "reaa",
"fuzziness": "AUTO"
}
}
}
]
}
}
}
}
}

Related

ElasticSearch filter by nested boolean type fields

I need to query on multiple nested fields on boolean types.
Structure of mapping:
"mappings" : {
"properties" : {
"leaders" : {
"type" : "nested",
"properties" : {
"except_1" : {
"type" : "boolean"
},
"except_2" : {
"type" : "boolean"
},
"counter" : {
"type" : "integer"
}
}
}
}
}
I am trying to use query both except1 and except2 only to False.
Below my try, unfortunately it returns True and False for both fields and I cannot fix it.
"query": {
"nested": {
"path": "leaders",
"query": {
"bool": {
"must": [
{
"term": {
"leaders.except_1": False
}
},
{
"term": {
"leaders.except_2": False
}
}
]
}
}
}
}
What you're probably looking for is the inner_hits option -- showing only the matched nested subdocuments.
PUT leaders
{"mappings":{"properties":{"leaders":{"type":"nested","properties":{"except_1":{"type":"boolean"},"except_2":{"type":"boolean"},"counter":{"type":"integer"}}}}}}
POST leaders/_doc
{
"leaders": [
{
"except_1": true,
"except_2": false
},
{
"except_1": false,
"except_2": false
}
]
}
GET leaders/_search
{
"query": {
"nested": {
"path": "leaders",
"inner_hits": {},
"query": {
"bool": {
"must": [
{
"term": {
"leaders.except_1": false
}
},
{
"term": {
"leaders.except_2": false
}
}
]
}
}
}
}
}
then
GET leaders/_search
{
"query": {
"nested": {
"path": "leaders",
"inner_hits": {},
"query": {
"bool": {
"must": [
{
"term": {
"leaders.except_1": false
}
},
{
"term": {
"leaders.except_2": false
}
}
]
}
}
}
}
}
yielding
{
"hits":[
{
"_index":"leaders",
"_type":"_doc",
"_id":"u-he8HEBG_KW3EFn-gMz",
"_score":0.87546873,
"_source":{ <-- default behavior
"leaders":[
{
"except_1":true,
"except_2":false
},
{
"except_1":false,
"except_2":false
}
]
},
"inner_hits":{
"leaders":{
"hits":{
"total":{
"value":1,
"relation":"eq"
},
"max_score":0.87546873,
"hits":[ <------- only the matching nested subdocument
{
"_index":"leaders",
"_type":"_doc",
"_id":"u-he8HEBG_KW3EFn-gMz",
"_nested":{
"field":"leaders",
"offset":1
},
"_score":0.87546873,
"_source":{
"except_1":false,
"except_2":false
}
}
]
}
}
}
}
]
}
Furthermore, you can force the system to only return the inner_hits by saying "_source": "inner_hits" on the top-level of your search query.

Combine multiple individual queries into one to get aggregated result in Elasticsearch

I have built two queries in ElasticSearch to get the counts for each error message. for example, the first query is to get how many error messages related to "was not found" error
GET /logstash*/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.pod_name": "api"
}
},
{
"match": {
"log": "error"
}
},
{
"match": {
"log": {
"query": "was not found",
"operator": "and"
}
}
},
{
"range": {"#timestamp": {
"time_zone": "CET",
"gt": "now-7d",
"lte": "now"}}
}
]
}
}
}
},
"aggs" : {
"type_count" : {
"value_count" : {
"script" : {
"source" : "doc['log.keyword'].value"
}
}
}
}
}
The second query is to get the count of error messages related to "Duplicate Entry" error
GET /logstash*/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.pod_name": "api"
}
},
{
"match": {
"log": "error"
}
},
{
"match": {
"log": {
"query": "Duplicate entry",
"operator": "and"
}
}
},
{
"range": {"#timestamp": {
"time_zone": "CET",
"gt": "now-7d",
"lte": "now"}}
}
]
}
}
}
},
"aggs" : {
"type_count" : {
"value_count" : {
"script" : {
"source" : "doc['log.keyword'].value"
}
}
}
}
}
My boss really wants me to combine these individual query into a one big query, then get the list of counts for each error messages in one output. Since we have a lot of error messages, which means we have to write each query for each error message, then we have to run each query to get the counts. Is there a way I can click one run to get the list of counts?
I have been trying use query string query and looking for solutions on either Stack Overflow and Documentation. However, there is no luck
You can use filter aggregation along with the value_count aggregation to combine these two queries. In both the queries, out of the 4 queries inside must clause only one differs. You can take this out and combine them with the two filter aggregations as below:
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.pod_name": "api"
}
},
{
"match": {
"log": "error"
}
},
{
"range": {
"#timestamp": {
"time_zone": "CET",
"gt": "now-7d",
"lte": "now"
}
}
}
]
}
}
}
},
"aggs": {
"not_found_count": {
"filter": {
"match": {
"log": {
"query": "was not found",
"operator": "and"
}
}
},
"aggs": {
"count": {
"value_count": {
"script": {
"source": "doc['log.keyword'].value"
}
}
}
}
},
"duplicate_entry_count": {
"filter": {
"match": {
"log": {
"query": "Duplicate entry",
"operator": "and"
}
}
},
"aggs": {
"count": {
"value_count": {
"script": {
"source": "doc['log.keyword'].value"
}
}
}
}
}
}
}

ElasticSearch aggs with function_score

I'm trying to exclude duplicated documents which have the same slug parameters to do it I use aggs in ElasticSearch (version 2.4). I use - this query:
{
"fields":[
"id",
"score"],
"size":0,
"query":{
"function_score":{
"query":{
"bool":{
"should":[
{
"match":{
"main_headline.en":{
"query":"headline_for_search"
}
}
},
{
"match":{
"body.en":"body for search"
}
}],
"must_not":{
"term":{
"id":75333
}
},
"filter":[
{
"term":{
"status":3
}
},
[
{
"term":{
"sites":6
}
}]]
}
},
"functions":[
{
"gauss":{
"published_at":{
"scale":"140w",
"decay":0.3
}
}
}
]
},
"aggs":{
"postslug":{
"terms":{
"field":"slug",
"order":{
"top_score":"desc"
}
},
"aggs":{
"grouppost":{
"top_hits": {
"_source": {
"include": [
"id",
"slug",
]
},
"size" : 10
}
}
}
}
}
}
}
When I run it I get error
failed to parse search source. expected field name but got [START_OBJECT]
I can`t figure out where is a mistake.
Without section aggs all works fine (except present duplicates)
I see one issue which relates to the fact that in the source filtering section include should read includes. Also, the aggs section is not at the right location, you have it in the query section, and it should be at the top-level:
{
"fields": [
"id",
"score"
],
"size": 0,
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"match": {
"main_headline.en": {
"query": "headline_for_search"
}
}
},
{
"match": {
"body.en": "body for search"
}
}
],
"must_not": {
"term": {
"id": 75333
}
},
"filter": [
{
"term": {
"status": 3
}
},
[
{
"term": {
"sites": 6
}
}
]
]
}
},
"functions": [
{
"gauss": {
"published_at": {
"scale": "140w",
"decay": 0.3
}
}
}
]
}
},
"aggs": {
"postslug": {
"terms": {
"field": "slug",
"order": {
"top_score": "desc"
}
},
"aggs": {
"grouppost": {
"top_hits": {
"_source": {
"includes": [
"id",
"slug"
]
},
"size": 10
}
}
}
}
}
}

Mutiple query_strings (nested and not nested)

I have got the following index:
{
"thread":{
"properties":{
"members":{
"type":"nested",
"properties":{
"memberId":{
"type":"keyword"
},
"firstName":{
"type":"keyword",
"copy_to":[
"members.fullName"
]
},
"fullName":{
"type":"text"
},
"lastName":{
"type":"keyword",
"copy_to":[
"members.fullName"
]
}
}
},
"name":{
"type":"text"
}
}
}
}
I want to implement a search, that finds all threads, that either match the members name or the thread name, as long as the user id matches.
My current query looks like this:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "members",
"score_mode": "none",
"query": {
"bool": {
"filter": [
{ "match": { "members.id": "123456789" } }
]
}
}
}
},
{
"nested": {
"path": "members",
"query": {
"bool": {
"must": {
"simple_query_string": {
"query": "Rhymen",
"fields": ["members.fullName"]
}
}
}
}
}
}
]
}
}
}
Can I filter the members and thread names in one query or do I have to merge two separate queries? I tried adding a "should" with "minimum_should_match: 1" so I could add a second not nested "query_string". But that didn't work as expected (scores were pretty screwed).
yeah i think this should work.
you have to keep the concern for filter memberId in both the filters. Nested filter will need it to match the user with memberId and name.
{
"query": {
"bool": {
"must": [{
"nested": {
"path": "members",
"query": {
"term": {
"members.memberId": {
"value": 1
}
}
}
}
},
{
"bool": {
"should": [{
"term": {
"name": {
"value": "thread_name"
}
}
},
{
"nested": {
"path": "members",
"query": {
"bool": {
"should": [{
"term": {
"members.fullName": {
"value": "trump"
}
}
},
{
"term": {
"members.memberId": {
"value": 1
}
}
}
]
}
}
}
}
]
}
}
]
}
}
}

Nested ElasticSearch query results in too many items

The nested ElasticSearch query below returns some results it should not hit. A lot of results do not contain the requested order number but are listed nevertheless. I'm not getting all documents though so the query is definitely reducing the result set on some level.
{
"query": {
"nested": {
"path": "orders",
"query": {
"match": {
"orderNumber": "242347"
}
}
}
}
}
The query result (truncated):
{
"took":0,
"timed_out":false,
"_shards": {
"total":1,
"successful":1,
"failed":0
},
"hits": {
"total":60,
"max_score":9.656103,
"hits":[
{
"_index": "index1",
"_type":"documenttype1",
"_id":"mUmudQrVSC6rn68ujDJ8iA",
"_score":9.656103,
"_source" : {
"documentId": 12093894,
"orders": [
{
"customerId": 129048669,
"orderNumber": "242347", // <-- CORRECT HIT ON ORDER
},
{
"customerId": 229405848,
"orderNumber": "431962"
}
]
}
},
{
"_index":"index1",
"_type":"documenttype1",
"_id":"9iO5QBCpT_6kmH3CoBTdWw",
"_score":9.656103,
"_source" : {
"documentId": 43390283,
// <-- ORDER ISN'T HERE BUT THE DOCUMENT IS HIT NEVERTHELESS!
"orders": [
{
"customerId": 229405848,
"orderNumber": "431962"
},
{
"customerId": 129408979,
"orderNumber": "142701"
}
]
}
}
// Left out 58 more results most of which do not contain
// the requested order number.
]
}
}
As you can see, there is a hit (actually, there are quite a few of them) that shouldn't be there because none of the orders contain the requested order number.
This is the mapping for documenttype1:
{
"index1":{
"properties":{
"documentId":{
"type":"integer"
},
"orders":{
"type":"nested",
"properties":{
"customerId":{
"type":"integer"
},
"orderNumber":{
"type":"string",
"analyzer":"custom_internal_code"
}
}
}
}
}
}
Finally, here are the settings to clarify the custom_internal_code analyzer as referred to in the mapping shown above:
{
"index1":{
"settings":{
"index.analysis.analyzer.custom_internal_code.filter.1":"asciifolding",
"index.analysis.analyzer.custom_internal_code.type":"custom",
"index.analysis.analyzer.custom_internal_code.filter.0":"lowercase",
"index.analysis.analyzer.custom_internal_code.tokenizer":"keyword",
}
}
}
for a exact search use termquery [1] and make orderNumber not_analyzed [2].
[1]
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-term-query.html#query-dsl-term-query
[2]
http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/mapping-intro.html#_literal_index_literal
It seems that you should use bool query instead of match.
But. If you want just filter your records, your should use nested filter instead of query. It works faster, because you have not to calculate scores.
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-nested-filter.html
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "orders",
"filter": {
"bool": {
"must": [
{
"term": {
"orderNumber": "242347"
}
}
]
}
},
"_cache": true
}
}
}
}
}

Resources