ElasticSearch high level client search failed occasional - elasticsearch

when I using ElasticSearch High Level Client by submit asyncSearch,I got wrong response occasional. That shard total > 0 but successful and failed is 0, and I can't find any log about this search. for example, searchBuilder log as follow:
{
"size": 0,
"query": {...},
"aggregations": {
"term0": {
"filter": {
"match_all": {
"boost": 1
}
},
"aggregations": {
"countCOUNT_DISTINCTdid": {
"cardinality": {
"field": "did",
"precision_threshold": 40000
}
}
}
}
}
}
Then get wrong response content:
{
"took": 1002,
"timed_out": false,
"terminated_early": false,
"num_reduce_phases": 0,
"_shards": {
"total": 20,
"successful": 0,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "gte"
},
"max_score": null,
"hits": []
}
}
But when query on Kibana as above, the correct result is:
{
"took" : 231,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"term0" : {
"doc_count" : 8526098,
"countCOUNT_DISTINCTdid" : {
"value" : 3929368
}
}
}
}
by the way, other search request at the same time using the same client is ok.
Why does this happen and how to avoid it? Thanks a lot for any hints

Related

What is the peformance impact using multiple query in search then msearch in Elasticsearch

I want to co-relate query and responses. For example, 10 responses should be returned for 10 queries.
Msearch (_msearch) satisfy the need for me as it returns the empty results even if query doesn't match. But I believe Msearch lower in performance compared to search (_search) request in which doesn't return the number of responses as number of queries
Questions:
Is there any performance impact between Msearch vs search (with bool must query as below)
How to achieve number of request = number of responses in search query?
Multiple query using search with bool should.
GET /index1/_search
{
"from": 0,
"size": 10,
"sort": [
{
"created_date": {
"order": "desc"
}
}
],
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"title": {
"value": "Title 1"
}
}
},
{
"exists": {
"field": "first_name"
}
},
{
"term": {
"field_name": {
"value": "Sample title 1"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"title": {
"value": "Title 2"
}
}
},
{
"exists": {
"field": "last_name"
}
},
{
"term": {
"field_name": {
"value": "Sample title 2"
}
}
}
]
}
}
]
}
}
}
Response:
{
"took" : 15,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 2,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
Multiple queries using Msearch
GET index1/_msearch
{}
{"from":0,"size":10,"sort":[{"created_date":{"order":"desc"}}],"query":{"bool":{"must":[{"term":{"title":{"value":"Title 1"}}},{"exists":{"field":"first_name"}},{"term":{"field_name":{"value":"Sample title 1"}}}]}}}
{}
{"from":0,"size":10,"sort":[{"created_date":{"order":"desc"}}],"query":{"bool":{"must":[{"term":{"title":{"value":"Title 2"}}},{"exists":{"field":"last_name"}},{"term":{"field_name":{"value":"Sample title 2"}}}]}}}
Response:
{
"took" : 23,
"responses" : [
{
"took" : 21,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 2,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"status" : 200
},
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 2,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"status" : 200
}
]
}

elasticsearch - only return specific fields without _source?

I've found some answer like
Make elasticsearch only return certain fields?
But they all need _source field.
In my system, disk and network are both scarce resources.
I can't store _source field and I don't need _index, _score field.
ElasticSearch Version: 5.5
Index Mapping just likes
{
"index_2020-04-08": {
"mappings": {
"type1": {
"_all": {
"enabled": false
},
"_source": {
"enabled": false
},
"properties": {
"rank_score": {
"type": "float"
},
"first_id": {
"type": "keyword"
},
"second_id": {
"type": "keyword"
}
}
}
}
}
}
My query:
GET index_2020-04-08/type1/_search
{
"query": {
"bool": {
"filter": {
"term": {
"first_id": "hello"
}
}
}
},
"size": 1000,
"sort": [
{
"rank_score": {
"order": "desc"
}
}
]
}
The search results I got :
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": null,
"hits": [
{
"_index": "index_2020-04-08",
"_type": "type1",
"_id": "id_1",
"_score": null,
"sort": [
0.06621722
]
},
{
"_index": "index_2020-04-08",
"_type": "type1",
"_id": "id_2",
"_score": null,
"sort": [
0.07864579
]
}
]
}
}
The results I want:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": null,
"hits": [
{
"_id": "id_1"
},
{
"_id": "id_2"
}
]
}
}
Can I implement it?
To return specific fields in the document, you must do one of the two:
Include the _source field in your documents, which is enabled by default.
Store specific fields with the stored fields feature which must be enabled manually
Because you want pretty much the document Ids and some metadata, you can use the filter_path feature.
Here's an example that's close to what you want (just change the field list):
$ curl -X GET "localhost:9200/metricbeat-7.6.1-2020.04.02-000002/_search?filter_path=took,timed_out,_shards,hits.total,hits.max_score,hits.hits._id&pretty"
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_id" : "8SEGSHEBzNscjCyQ18cg"
},
{
"_id" : "8iEGSHEBzNscjCyQ18cg"
},
{
"_id" : "8yEGSHEBzNscjCyQ18cg"
},
{
"_id" : "9CEGSHEBzNscjCyQ18cg"
},
{
"_id" : "9SEGSHEBzNscjCyQ18cg"
},
{
"_id" : "9iEGSHEBzNscjCyQ18cg"
},
{
"_id" : "9yEGSHEBzNscjCyQ18cg"
},
{
"_id" : "-CEGSHEBzNscjCyQ18cg"
},
{
"_id" : "-SEGSHEBzNscjCyQ18cg"
},
{
"_id" : "-iEGSHEBzNscjCyQ18cg"
}
]
}
}
Just to clarify based on the SO question you linked -- you're not storing the _source, you're requesting it from ES. It's usually used to limit what you want to have retrieved, i.e.
...
"_source": ["only", "fields", "I", "need"]
...
_score, _index etc are meta fields that are going to be retrieved no matter what. You can "hack" it a bit by seeting the size to 0 and aggregating, i.e.
{
"size": 0,
"aggs": {
"by_ids": {
"terms": {
"field": "_id"
}
}
}
}
which will save you a few bytes
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"terms" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Ac76WXEBnteqn982smh_",
"doc_count" : 1
},
{
"key" : "As77WXEBnteqn982EGgq",
"doc_count" : 1
}
]
}
}
}
but performing aggregations has a cost of its own.

Elasticsearch bool must query

I am trying to write a Elasticsearch bool query. I am having an issue querying an field (DATE) using bool must query.
Elastic search data look like so:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 15,
"successful": 15,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 11.519888,
"hits": [
{
"_index": "test-2019.06.27",
"_type": "test",
"_id": "pa6gmGsByDlvLvAyiRF-",
"_score": 11.519888,
"_source": {
"DATE": "01/06/19"
}
}
]
}
}
Elasticsearch query like that:
{
"query":
{
"bool" : {
"must" : [
{
"match" : {
"DATE" : {
"query" : "01/06/19",
"operator" : "AND",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
The query is not working.
Any idea please?
For date-typed queries, I used to write Range query.
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-range-query.html
Try the below code,
{
"query": {
"range" : {
"DATE" : {
"gte" : "now-1d/d",
"lt" : "now/d"
}
}
}
}

filtering on 2 values of same field

I have a status field, which can have one of the following values,
I can filter for data which have status completed. I can also see data which has ongoing.
But I want to display the data which have status completed and ongoing at the same time.
But I don't know how to add filters for 2 values on a single field.
How can I achieve what I want ?
EDIT - Thanks for answers. But that is not what i wanted.
Like here I have filtered for status:completed, I want to filter for 2 values in this exact way.
I know I can edit this filter and , and use your queries, But I need a simple way to do this(query way is complex), as I have to show it to my marketing team and they don't have any idea about queries. I need to convince them.
If I understand your question correctly, you want to perform an aggregation on 2 values of a field.
This should be possible with a query similar to this one with a terms query:
{
"size" : 0,
"query" : {
"bool" : {
"must" : [ {
"terms" : {
"status" : [ "completed", "unpaid" ]
}
} ]
}
},
"aggs" : {
"freqs" : {
"terms" : {
"field" : "status"
}
}
}
}
This will give a result like this one:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"freqs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ {
"key" : "unpaid",
"doc_count" : 4
}, {
"key" : "completed",
"doc_count" : 1
} ]
}
}
}
Here is my toy mapping definition:
{
"bookings" : {
"properties" : {
"status" : {
"type" : "keyword"
}
}
}
}
You need a filter in aggregation.
{
"size": 0,
"aggs": {
"agg_name": {
"filter": {
"bool": {
"should": [
{
"terms": {
"status": [
"completed",
"ongoing"
]
}
}
]
}
}
}
}
}
Use the above query to get results like this:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 0,
"hits": []
},
"aggregations": {
"agg_name": {
"doc_count": 6
}
}
}
The result what you want is the doc_count
For your reference bool query in elasticsearch, should it's like OR conditions,
{
"query":{
"bool":{
"should":[
{"must":{"status":"completed"}},
{"must":{"status":"ongoing"}}
]
}
},
"aggs" : {
"booking_status" : {
"terms" : {
"field" : "status"
}
}
}
}

Using the returned value of sum aggregation - elasticsearch

I made this query to sum all my "practiceValue" that are doubles.
{
"size" : 0,
"query" : {
"bool" : {
"must_not" : [
{
"missing" : { "field" : "practiceObj.practiceValue" }
}
],
"must" : [
{
"match" : { "entityObj.description" : "FIRST" }
}
]
}
},
"aggs" : {
"total" : {
"sum" : { "script" : "(doc['practiceObj.practiceValue'].value)"
}
}
}
}
My query returned the following:
{
"took": 32,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 11477,
"max_score": 0,
"hits": []
},
"aggregations": {
"total": {
"value": 1593598.7499999984
}
}
}
How can I use that "total" value in order to round it?
"value" equals to 1593598.7499999984 and I want to make it 1593598.75
Thanks!

Resources