Limit the size per index when searching multiple index in Elastic - elasticsearch

I have been following the guidelines from this post. I can get the desired output but in the same DSL how can I limit the size of results for each index ?
Full text Search with Multiple index in Elastic Search using NEST C#
POST http://localhost:9200/componenttypeindex%2Cprojecttypeindex/Componenttype%2CProjecttype/_search?pretty=true&typed_keys=true
{
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"term": {
"_index": {
"value": "componenttypeindex"
}
}
}
],
"must": [
{
"multi_match": {
"fields": [
"Componentname",
"Summary^1.1"
],
"operator": "or",
"query": "test"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"_index": {
"value": "projecttypeindex"
}
}
}
],
"must": [
{
"multi_match": {
"fields": [
"Projectname",
"Summary^0.3"
],
"operator": "or",
"query": "test"
}
}
]
}
}
]
}
}
}

With your given query, you could use aggregations to group and limit number of hits per index (in this case, limiting to 5):
{
"size": 0,
"query": {
... Same query as above ...
},
"aggs": {
"index_agg": {
"terms": {
"field": "_index",
"size": 20
},
"aggs": {
"hits_per_index": {
"top_hits": {
"size": 5
}
}
}
}
}
}

Related

ElasticSearch: Query nested array for empty and specific value in single query

Documents structure -
{
"hits": [
{
"_type": "_doc",
"_id": "ef0a2c44179a513476b080cc2a585d95",
"_source": {
"DIVISION_NUMBER": 44,
"MATCHES": [
{
"MATCH_STATUS": "APPROVED",
"UPDATED_ON": 1599171303000
}
]
}
},
{
"_type": "_doc",
"_id": "ef0a2c44179a513476b080cc2a585d95",
"_source": {
"DIVISION_NUMBER": 44,
"MATCHES": [ ]
}
}
]
}
Question - MATCHES is a nested array inside there is a text field MATCH_STATUS that can have any values say "APPROVED","REJECTED".
I am looking to search ALL documents that contain MATCH_STATUS having values say "APPROVED", "RECOMMENDED" as well as where there is no data in MATCHES (empty array "MATCHES": [ ]). Please note I want this in a single query.
I am able to do this in two separate queries like this -
GET all matches with status = RECOMMENDED, APPROVED
"must": [
{
"nested": {
"path": "MATCHES",
"query": {
"terms": {
"MATCHES.MATCH_STATUS.keyword": [
"APPROVED",
"RECOMMENDED"
]
}
}
}
}
]
GET all matches having empty array "MATCHES" : [ ]
{
"size": 5000,
"query": {
"bool": {
"filter": [],
"must_not": [
{
"nested": {
"path": "MATCHES",
"query": {
"exists": {
"field": "MATCHES"
}
}
}
}
]
}
},
"from": 0
}
You can combine both queries using should clause.
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"nested": {
"path": "MATCHES",
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"terms": {
"MATCHES.MATCH_STATUS.keyword": [
"APPROVED",
"RECOMMENDED"
]
}
}
]
}
}
}
},
{
"bool": {
"must_not": [
{
"nested": {
"path": "MATCHES",
"query": {
"bool": {
"filter": {
"exists": {
"field": "MATCHES"
}
}
}
}
}
}
]
}
}
]
}
}
}
Update: To answer your comment.
Missing aggregation does not support nested field for now. There is open issue as of now.
To get count of empty matches, you can use a filter aggregation with the nested query wrapped into the must_not clause of the bool query.
{
"aggs": {
"missing_matches_agg": {
"filter": {
"bool": {
"must_not": {
"nested": {
"query": {
"match_all": {}
},
"path": "MATCHES"
}
}
}
}
}
}
}

Elasticsearch multiple fields wildcard bool query

Currently using bool query which searches for a combination of both input words or either one of input word on field "Name". How to search on multiple fields using wild cards?
POST inventory_dev/_search
{"from":0,"query":{"bool":{"must":[{"bool":{"should":[{"term":{"Name":{"value":"dove"}}},{"term":{"Name":{"value":"3.75oz"}}},{"bool":{"must":[{"wildcard":{"Name":{"value":"*dove*"}}},{"wildcard":{"Name":{"value":"*3.75oz*"}}}]}}]}}]}},"size":10,"sort":[{"_score":{"order":"desc"}}]}
You can use query_string in place of wildcard query, to search on multiple fields
{
"from": 0,
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"term": {
"Name": {
"value": "dove"
}
}
},
{
"term": {
"Name": {
"value": "3.75oz"
}
}
},
{
"bool": {
"must": [
{
"query_string": {
"query": "*dove*",
"fields": [
"field1",
"Name"
]
}
},
{
"query_string": {
"query": "*3.75oz*",
"fields": [
"field1",
"Name"
]
}
}
]
}
}
]
}
}
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}

ElasticSearch should with nested and bool must_not exists

With the following mapping:
"categories": {
"type": "nested",
"properties": {
"category": {
"type": "integer"
},
"score": {
"type": "float"
}
}
},
I want to use the categories field to return documents that either:
have a score above a threshold in a given category, or
do not have the categories field
This is my query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"terms": {
"categories.category": [
<id>
]
}
},
{
"range": {
"categories.score": {
"gte": 0.5
}
}
}
]
}
}
}
},
{
"bool": {
"must_not": [
{
"exists": {
"field": "categories"
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}
It correctly returns documents both with and without the categories field, and orders the results so the ones I want are first, but it doesn't filter the results having score below the 0.5 threshold.
Great question.
That is because categories is not exactly a field from the elasticsearch point of view[a field on which inverted index is created and used for querying/searching] but categories.category and categories.score is.
As a result categories being not found in any document, which is actually true for all the documents, you observe the result what you see.
Modify the query to the below and you'd see your use-case working correctly.
POST <your_index_name>/_search
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"terms": {
"categories.category": [
"100"
]
}
},
{
"range": {
"categories.score": {
"gte": 0.5
}
}
}
]
}
}
}
},
{
"bool": {
"must_not": [ <----- Note this
{
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "categories.category"
}
},
{
"exists": {
"field": "categories.score"
}
}
]
}
}
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}

Elasticsearch specifying index filter in aggregation

I have an elastic query aggregation in which I need to filter aggregation on the basis on index name. Query section actually working on multiple indexes, but I want to filter aggregation for particular index. Please help me how we can pass index filter in aggregation -
{
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"query_string": {
"fields": [
"productDesc",
"productDescription"
],
"default_operator": "AND",
"query": "machine"
}
}
]
}
}
],
"must": [ ],
"must_not": [ ]
}
},
"size": 0,
"aggs": {
"RelatedKeywords": { //here I want to add filter of index
"sampler": {
"shard_size": 20
},
"aggregations": {
"keywords": {
"significant_text": {
"field": "productDesc",
"size": 100,
"filter_duplicate_text": true
}
}
}
}
}
}
You can do it like this:
{
"aggs": {
"index": {
"filter": {
"term": {
"_index": "index-name"
}
},
"aggs": {
"RelatedKeywords": {
"sampler": {
"shard_size": 20
},
"aggregations": {
"keywords": {
"significant_text": {
"field": "productDesc",
"size": 100,
"filter_duplicate_text": true
}
}
}
}
}
}
}
}

Elasticsearch Query for getting field with 'AND' relation

I'm having elastic document as below
I want a search query satisfying condition:
how to get the those OPERATIONS and CATEGORY values that has both AREA=Mumbai and AREA=Chennai
So Output should be CATEGORY:Consulting1 , OPERATIONS: Regulatory Operations
Use terms Query :
{
"query": {
"terms": {
"AREA": [
"Mumbai",
"Chennai"
]
}
}
}
May be that works:
{
"query": {
"bool": {
"must": [
{"term": { "AREA" : "Mumbai" }},
{"term": { "AREA" : "Chennai" }}
]
}
}
}
Try this and let me know:
{
"size": 0,
"query": {
"bool": {
"should": [
{
"term": {
"AREA": "mumbai"
}
},
{
"term": {
"AREA": "chennai"
}
}
]
}
},
"aggs": {
"unique_operations": {
"terms": {
"field": "OPERATIONS",
"size": 10
},
"aggs": {
"count_areas": {
"cardinality": {
"field": "AREA"
}
},
"top": {
"top_hits": {
"size": 2,
"_source": {
"include": ["CATEGORY"]
}
}
},
"areas_bucket_filter": {
"bucket_selector": {
"buckets_path": {
"areasCount": "count_areas"
},
"script": "areasCount == 2"
}
}
}
}
}
}
LATER EDIT: added top_hits aggregation to get back sample documents covering the request for the categories.
Please try this one.
{
"query": {
"bool": {
"should": [
{
"query_string": {
"default_field": "AREA",
"query": "mumbai"
}
},
{
"query_string": {
"default_field": "AREA",
"query": "chennai"
}
}
]
}
}
}[![result][1]][1]

Resources