I am observing high disk read I/O in Elasticsearch nodes.
Environment
Elasticssearch 2.3.1
Disk SSD
Cores - 16
RAM - 64 GB
Segments and merging could be one of the issue. But as mentioned in this link I don't see any INFO log stating now throttling indexing.
Can someone let me know what could be the problem and how can I debug this issue?
The node stats looks like below:-
"indices" : {
"docs" : {
"count" : 42096315,
"deleted" : 16809358
},
"store" : {
"size_in_bytes" : 188061514556,
"throttle_time_in_millis" : 0
},
"indexing" : {
"index_total" : 4971184,
"index_time_in_millis" : 24161898,
"index_current" : 0,
"index_failed" : 0,
"delete_total" : 10317957,
"delete_time_in_millis" : 3323977,
"delete_current" : 0,
"noop_update_total" : 165455,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 3078536,
"time_in_millis" : 4166218,
"exists_total" : 3076266,
"exists_time_in_millis" : 4165295,
"missing_total" : 2270,
"missing_time_in_millis" : 923,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 811510,
"query_time_in_millis" : 49063191,
"query_current" : 0,
"fetch_total" : 55590,
"fetch_time_in_millis" : 2561106,
"fetch_current" : 0,
"scroll_total" : 205,
"scroll_time_in_millis" : 68748,
"scroll_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 37047,
"total_time_in_millis" : 29575123,
"total_docs" : 48646732,
"total_size_in_bytes" : 189196722890,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 7243267,
"total_auto_throttle_in_bytes" : 186360552
},
"refresh" : {
"total" : 253329,
"total_time_in_millis" : 14111583
},
"flush" : {
"total" : 824,
"total_time_in_millis" : 197608
},
"warmer" : {
"current" : 0,
"total" : 479781,
"total_time_in_millis" : 440805
},
"query_cache" : {
"memory_size_in_bytes" : 523777408,
"total_count" : 144964341,
"hit_count" : 5901881,
"miss_count" : 139062460,
"cache_size" : 7773,
"cache_count" : 442280,
"evictions" : 434507
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"segments" : {
"count" : 263,
"memory_in_bytes" : 273031904,
"terms_memory_in_bytes" : 203860300,
"stored_fields_memory_in_bytes" : 34899016,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 5229120,
"doc_values_memory_in_bytes" : 29043468,
"index_writer_memory_in_bytes" : 0,
"index_writer_max_memory_in_bytes" : 527069180,
"version_map_memory_in_bytes" : 14761,
"fixed_bit_set_memory_in_bytes" : 7048640
},
"translog" : {
"operations" : 137655,
"size_in_bytes" : 122949018
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 0,
"miss_count" : 0
}
},
"os" : {
"timestamp" : 1508238172920,
"cpu_percent" : 91,
"load_average" : 22.31,
"mem" : {
"total_in_bytes" : 67543134208,
"free_in_bytes" : 912490496,
"used_in_bytes" : 66630643712,
"free_percent" : 1,
"used_percent" : 99
},
"swap" : {
"total_in_bytes" : 4093636608,
"free_in_bytes" : 1753239552,
"used_in_bytes" : 2340397056
}
},
"process" : {
"timestamp" : 1508238172920,
"open_file_descriptors" : 915,
"max_file_descriptors" : 65535,
"cpu" : {
"percent" : 2,
"total_in_millis" : 99746040
},
"mem" : {
"total_virtual_in_bytes" : 87529877504
}
},
"jvm" : {
"timestamp" : 1508238172921,
"uptime_in_millis" : 292500150,
"mem" : {
"heap_used_in_bytes" : 1868708912,
"heap_used_percent" : 35,
"heap_committed_in_bytes" : 5255331840,
"heap_max_in_bytes" : 5255331840,
"non_heap_used_in_bytes" : 103936064,
"non_heap_committed_in_bytes" : 106307584,
"pools" : {
"young" : {
"used_in_bytes" : 153647352,
"max_in_bytes" : 907345920,
"peak_used_in_bytes" : 907345920,
"peak_max_in_bytes" : 907345920
},
"survivor" : {
"used_in_bytes" : 35321888,
"max_in_bytes" : 113377280,
"peak_used_in_bytes" : 113377280,
"peak_max_in_bytes" : 113377280
},
"old" : {
"used_in_bytes" : 1679739672,
"max_in_bytes" : 4234608640,
"peak_used_in_bytes" : 3660407304,
"peak_max_in_bytes" : 4234608640
}
}
},
"threads" : {
"count" : 199,
"peak_count" : 204
},
"gc" : {
"collectors" : {
"young" : {
"collection_count" : 32655,
"collection_time_in_millis" : 1844356
},
"old" : {
"collection_count" : 611,
"collection_time_in_millis" : 176197
}
}
},
"buffer_pools" : {
"direct" : {
"count" : 258,
"used_in_bytes" : 269582255,
"total_capacity_in_bytes" : 269582255
},
"mapped" : {
"count" : 146,
"used_in_bytes" : 71612833894,
"total_capacity_in_bytes" : 71612833894
}
},
"classes" : {
"current_loaded_count" : 8483,
"total_loaded_count" : 8499,
"total_unloaded_count" : 16
}
},
"thread_pool" : {
"bulk" : {
"threads" : 16,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 16,
"completed" : 44913
},
"fetch_shard_started" : {
"threads" : 1,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 9,
"completed" : 13
},
"fetch_shard_store" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"flush" : {
"threads" : 5,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 5,
"completed" : 9011
},
"force_merge" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"generic" : {
"threads" : 1,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 6,
"completed" : 29268
},
"get" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"index" : {
"threads" : 16,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 16,
"completed" : 9460079
},
"listener" : {
"threads" : 8,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 8,
"completed" : 1237173
},
"management" : {
"threads" : 4,
"queue" : 0,
"active" : 1,
"rejected" : 0,
"largest" : 4,
"completed" : 44128
},
"percolate" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"refresh" : {
"threads" : 8,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 8,
"completed" : 253330
},
"search" : {
"threads" : 25,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 25,
"completed" : 992032
},
"snapshot" : {
"threads" : 0,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 0,
"completed" : 0
},
"warmer" : {
"threads" : 5,
"queue" : 0,
"active" : 0,
"rejected" : 0,
"largest" : 5,
"completed" : 518307
}
},
"fs" : {
"timestamp" : 1508238172922,
"total" : {
"total_in_bytes" : 1847237029888,
"free_in_bytes" : 921102319616,
"available_in_bytes" : 839458172928
},
"data" : [ {
"path" : "/var/lib/elasticsearch/elasticsearch/nodes/0",
"mount" : "/var (/dev/mapper/vag-var)",
"type" : "ext4",
"total_in_bytes" : 1847237029888,
"free_in_bytes" : 921102319616,
"available_in_bytes" : 839458172928,
"spins" : "false"
} ]
},
"transport" : {
"server_open" : 140,
"rx_count" : 7926335,
"rx_size_in_bytes" : 15511144109,
"tx_count" : 7667433,
"tx_size_in_bytes" : 47171921335
},
"http" : {
"current_open" : 1,
"total_opened" : 63123
},
"breakers" : {
"request" : {
"limit_size_in_bytes" : 2102132736,
"limit_size" : "1.9gb",
"estimated_size_in_bytes" : 0,
"estimated_size" : "0b",
"overhead" : 1.0,
"tripped" : 0
},
"fielddata" : {
"limit_size_in_bytes" : 3153199104,
"limit_size" : "2.9gb",
"estimated_size_in_bytes" : 0,
"estimated_size" : "0b",
"overhead" : 1.03,
"tripped" : 0
},
"parent" : {
"limit_size_in_bytes" : 3678732288,
"limit_size" : "3.4gb",
"estimated_size_in_bytes" : 0,
"estimated_size" : "0b",
"overhead" : 1.0,
"tripped" : 0
}
}
}
}
}
refresh_interval & translog.flush_threshold_size are both set to their default values.
hot-threads report is as follows:- (_nodes/hot_threads?pretty"
)
Hot threads at 2017-10-17T12:45:39.517Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
71.6% (357.8ms out of 500ms) cpu usage by thread 'elasticsearch[Axum][[denorm][1]: Lucene Merge Thread #6011]'
3/10 snapshots sharing following 13 elements
org.apache.lucene.index.MultiTermsEnum.pushTop(MultiTermsEnum.java:275)
org.apache.lucene.index.MultiTermsEnum.next(MultiTermsEnum.java:301)
org.apache.lucene.index.FilterLeafReader$FilterTermsEnum.next(FilterLeafReader.java:195)
org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter.write(BlockTreeTermsWriter.java:438)
org.apache.lucene.codecs.perfield.PerFieldPostingsFormat$FieldsWriter.write(PerFieldPostingsFormat.java:198)
org.apache.lucene.codecs.FieldsConsumer.merge(FieldsConsumer.java:105)
org.apache.lucene.index.SegmentMerger.mergeTerms(SegmentMerger.java:193)
org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:95)
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:4075)
org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:3655)
org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:588)
org.elasticsearch.index.engine.ElasticsearchConcurrentMergeScheduler.doMerge(ElasticsearchConcurrentMergeScheduler.java:94)
org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:626)
Related
I am using Elasticsearch profile API to check query performance. But I found the two values took and profile -> shards -> searches -> query -> time_in_nanos is very different.
Below is an output example:
{
"took" : 1139,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 238957,
"relation" : "eq"
},
"max_score" : 0.0,
...
"profile" : {
"shards" : [
{
"id" : "[-PQSJU3MQViBXwOaQN-IOg][mp-transaction-green][0]",
"searches" : [
{
"query" : [
{
"type" : "BoostQuery",
"description" : "(ConstantScore(+timestampUtc:[1640959200000 TO 9223372036854775807] +entityUuid:1b1404d7-5c2b-4a14-bf9e-8bdc494e7234))^0.0",
"time_in_nanos" : 103557672,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 1,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 43104397,
"match" : 1133540,
"next_doc_count" : 238966,
"score_count" : 238957,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 8247036,
"advance_count" : 18,
"score" : 6004966,
"build_scorer_count" : 43,
"create_weight" : 38801,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 45028932
},
"children" : [
{
"type" : "BooleanQuery",
"description" : "+timestampUtc:[1640959200000 TO 9223372036854775807] +entityUuid:1b1404d7-5c2b-4a14-bf9e-8bdc494e7234",
"time_in_nanos" : 83178924,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 1,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 29133549,
"match" : 1132067,
"next_doc_count" : 238966,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 8243103,
"advance_count" : 18,
"score" : 0,
"build_scorer_count" : 43,
"create_weight" : 29040,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 44641165
},
"children" : [
{
"type" : "IndexOrDocValuesQuery",
"description" : "timestampUtc:[1640959200000 TO 9223372036854775807]",
"time_in_nanos" : 31267154,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 1,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 8401867,
"match" : 1123004,
"next_doc_count" : 238817,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 294443,
"advance_count" : 9346,
"score" : 0,
"build_scorer_count" : 61,
"create_weight" : 5182,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 21442658
}
},
{
"type" : "TermQuery",
"description" : "entityUuid:1b1404d7-5c2b-4a14-bf9e-8bdc494e7234",
"time_in_nanos" : 21386796,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 5486,
"match" : 0,
"next_doc_count" : 149,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 12360641,
"advance_count" : 245078,
"score" : 0,
"build_scorer_count" : 61,
"create_weight" : 10808,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 9009861
}
}
]
}
]
}
],
"rewrite_time" : 10711,
"collector" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 10057341
}
]
}
],
"aggregations" : [ ]
}
]
}
As you can see the value in took is 1139 which is a litter more than 1 seconds. But if I sum up all "time_in_nanos" which is only 248 milliseconds. Why do they have so much different? Is it because of network latency?
First thing first, took in Elasticsearch response doesn't include the network latency between Elasticsearch client and server, it means the time took at Elasticsearch server side only.
Second thing, I am not sure if you have provided the complete output of your profile API, I see the children of your query and if you sum all of your component it will be close to took, would be better if you can provide your complete search query and profile output to confirm it.
es version:7.3.2.
The total query time so much longer than the single shard time.
This problem can only occur if the same piece of data has not been requested for a long time, and then the data is requested now.
This problem also disappears when data is no longer written to the index in real time.
I would like to know how to troubleshoot this problem.
Thanks!
request:
GET friend_relation_realtime_v2/_search?human=true {
"query": {
"bool": {
"filter": {
"term": {
"user_id": "544799000"
}
}
}
}
}
result:
{
"took" : 1277,
"timed_out" : false,
"_shards" : {
"total" : 10,
"successful" : 10,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 233,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
.........
]
},
"profile" : {
"shards" : [
{
"id" : "[2mYeMFE1RO2Uu2pi63sMNQ][friend_relation_realtime_v2][3]",
"searches" : [
{
"query" : [
{
"type" : "BoostQuery",
"description" : "(ConstantScore(user_id:544799000))^0.0",
"time" : "315.2micros",
"time_in_nanos" : 315291,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 4262,
"match" : 0,
"next_doc_count" : 19,
"score_count" : 19,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 10573,
"advance_count" : 3,
"score" : 1339,
"build_scorer_count" : 26,
"create_weight" : 5623,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 293426
},
"children" : [
{
"type" : "TermQuery",
"description" : "user_id:544799000",
"time" : "301.4micros",
"time_in_nanos" : 301455,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 1582,
"match" : 0,
"next_doc_count" : 19,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 9888,
"advance_count" : 3,
"score" : 0,
"build_scorer_count" : 26,
"create_weight" : 2994,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 286942
}
}
]
}
],
"rewrite_time" : 2381,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time" : "19micros",
"time_in_nanos" : 19029,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time" : "9.1micros",
"time_in_nanos" : 9134
}
]
}
]
}
],
"aggregations" : [ ]
},
{
"id" : "[2mYeMFE1RO2Uu2pi63sMNQ][friend_relation_realtime_v2][4]",
"searches" : [
{
"query" : [
{
"type" : "BoostQuery",
"description" : "(ConstantScore(user_id:544799000))^0.0",
"time" : "320.9micros",
"time_in_nanos" : 320910,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 4158,
"match" : 0,
"next_doc_count" : 24,
"score_count" : 24,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 9283,
"advance_count" : 2,
"score" : 1345,
"build_scorer_count" : 31,
"create_weight" : 10394,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 295648
},
"children" : [
{
"type" : "TermQuery",
"description" : "user_id:544799000",
"time" : "298.3micros",
"time_in_nanos" : 298395,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 1811,
"match" : 0,
"next_doc_count" : 24,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 8764,
"advance_count" : 2,
"score" : 0,
"build_scorer_count" : 31,
"create_weight" : 3754,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 284008
}
}
]
}
],
"rewrite_time" : 4100,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time" : "33.7micros",
"time_in_nanos" : 33781,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time" : "10.2micros",
"time_in_nanos" : 10214
}
]
}
]
}
],
"aggregations" : [ ]
},
{
"id" : "[I0cNQW50Q3C_kt28KVSVsQ][friend_relation_realtime_v2][1]",
"searches" : [
{
"query" : [
{
"type" : "BoostQuery",
"description" : "(ConstantScore(user_id:544799000))^0.0",
"time" : "384.6micros",
"time_in_nanos" : 384608,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 5840,
"match" : 0,
"next_doc_count" : 33,
"score_count" : 31,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 27664,
"advance_count" : 4,
"score" : 1749,
"build_scorer_count" : 26,
"create_weight" : 19208,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 330052
},
"children" : [
{
"type" : "TermQuery",
"description" : "user_id:544799000",
"time" : "338.5micros",
"time_in_nanos" : 338550,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 2227,
"match" : 0,
"next_doc_count" : 33,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 24780,
"advance_count" : 4,
"score" : 0,
"build_scorer_count" : 26,
"create_weight" : 3957,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 307522
}
}
]
}
],
"rewrite_time" : 7897,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time" : "45.1micros",
"time_in_nanos" : 45124,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time" : "22.1micros",
"time_in_nanos" : 22110
}
]
}
]
}
],
"aggregations" : [ ]
},
.............
]
}
}
When you run profiler in the search the total time of your query would be much higher than usual run, best way to check the total time is to run the query without profiler and compare the time with each node.
A few minutes after the elasticsearch cluster run, all nodes gives the error:
[ERROR][o.e.x.m.c.n.NodeStatsCollector] [node-1] collector [node_stats] timed out when collecting data
[ERROR][o.e.x.m.c.n.NodeStatsCollector] [node-1] collector [node_stats] timed out when collecting data
all nodes gives this error and indexer that is a java program index data whit bulk api very well. and there is no error in logs of my indexer program. but elastic search node gives this error.
i used 4 elasticsearch nodes and kibana in one machine whit windows server 2019 data center
the version of elasticsearch and kibana is 7.10.0 and Cluster Status is :
{
"_nodes" : {
"total" : 4,
"successful" : 4,
"failed" : 0
},
"cluster_name" : "es-cluster",
"cluster_uuid" : "RRhGhaElfh5lUxGfsKg",
"timestamp" : 1245375859907,
"status" : "green",
"indices" : {
"count" : 1,
"shards" : {
"total" : 9,
"primaries" : 3,
"replication" : 2.0,
"index" : {
"shards" : {
"min" : 9,
"max" : 9,
"avg" : 9.0
},
"primaries" : {
"min" : 3,
"max" : 3,
"avg" : 3.0
},
"replication" : {
"min" : 2.0,
"max" : 2.0,
"avg" : 2.0
}
}
},
"docs" : {
"count" : 0,
"deleted" : 0
},
"store" : {
"size" : "36.1mb",
"size_in_bytes" : 37936718,
"reserved" : "0b",
"reserved_in_bytes" : 0
},
"fielddata" : {
"memory_size" : "0b",
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"query_cache" : {
"memory_size" : "0b",
"memory_size_in_bytes" : 0,
"total_count" : 0,
"hit_count" : 0,
"miss_count" : 0,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"completion" : {
"size" : "0b",
"size_in_bytes" : 0
},
"segments" : {
"count" : 0,
"memory" : "0b",
"memory_in_bytes" : 0,
"terms_memory" : "0b",
"terms_memory_in_bytes" : 0,
"stored_fields_memory" : "0b",
"stored_fields_memory_in_bytes" : 0,
"term_vectors_memory" : "0b",
"term_vectors_memory_in_bytes" : 0,
"norms_memory" : "0b",
"norms_memory_in_bytes" : 0,
"points_memory" : "0b",
"points_memory_in_bytes" : 0,
"doc_values_memory" : "0b",
"doc_values_memory_in_bytes" : 0,
"index_writer_memory" : "233.1mb",
"index_writer_memory_in_bytes" : 244509188,
"version_map_memory" : "0b",
"version_map_memory_in_bytes" : 0,
"fixed_bit_set" : "0b",
"fixed_bit_set_memory_in_bytes" : 0,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
},
"mappings" : {
"field_types" : [
{
"name" : "boolean",
"count" : 3,
"index_count" : 1
},
{
"name" : "date",
"count" : 4,
"index_count" : 1
},
{
"name" : "geo_point",
"count" : 1,
"index_count" : 1
},
{
"name" : "integer",
"count" : 8,
"index_count" : 1
},
{
"name" : "ip",
"count" : 2,
"index_count" : 1
},
{
"name" : "keyword",
"count" : 12,
"index_count" : 1
},
{
"name" : "object",
"count" : 1,
"index_count" : 1
},
{
"name" : "text",
"count" : 17,
"index_count" : 1
}
]
},
"analysis" : {
"char_filter_types" : [ ],
"tokenizer_types" : [ ],
"filter_types" : [
{
"name" : "stop",
"count" : 3,
"index_count" : 1
}
],
"analyzer_types" : [
{
"name" : "custom",
"count" : 1,
"index_count" : 1
}
],
"built_in_char_filters" : [ ],
"built_in_tokenizers" : [
{
"name" : "standard",
"count" : 1,
"index_count" : 1
}
],
"built_in_filters" : [
{
"name" : "arabic_normalization",
"count" : 1,
"index_count" : 1
},
{
"name" : "decimal_digit",
"count" : 1,
"index_count" : 1
},
{
"name" : "lowercase",
"count" : 1,
"index_count" : 1
},
{
"name" : "persian_normalization",
"count" : 1,
"index_count" : 1
}
],
"built_in_analyzers" : [ ]
}
},
"nodes" : {
"count" : {
"total" : 4,
"coordinating_only" : 0,
"data" : 3,
"data_cold" : 3,
"data_content" : 3,
"data_hot" : 3,
"data_warm" : 3,
"ingest" : 3,
"master" : 3,
"ml" : 0,
"remote_cluster_client" : 4,
"transform" : 3,
"voting_only" : 1
},
"versions" : [
"7.10.0"
],
"os" : {
"available_processors" : 48,
"allocated_processors" : 24,
"names" : [
{
"name" : "Windows Server 2019",
"count" : 4
}
],
"pretty_names" : [
{
"pretty_name" : "Windows Server 2019",
"count" : 4
}
],
"mem" : {
"total" : "383.4gb",
"total_in_bytes" : 411772076032,
"free" : "127.2gb",
"free_in_bytes" : 136611741696,
"used" : "256.2gb",
"used_in_bytes" : 275160334336,
"free_percent" : 33,
"used_percent" : 67
}
},
"process" : {
"cpu" : {
"percent" : 0
},
"open_file_descriptors" : {
"min" : -1,
"max" : -1,
"avg" : 0
}
},
"jvm" : {
"max_uptime" : "38.2m",
"max_uptime_in_millis" : 2297261,
"versions" : [
{
"version" : "14.0.2",
"vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
"vm_version" : "14.0.2+12-46",
"vm_vendor" : "Oracle Corporation",
"bundled_jdk" : true,
"using_bundled_jdk" : false,
"count" : 4
}
],
"mem" : {
"heap_used" : "2.2gb",
"heap_used_in_bytes" : 2433056080,
"heap_max" : "50gb",
"heap_max_in_bytes" : 53687091200
},
"threads" : 153
},
"fs" : {
"total" : "6.5tb",
"total_in_bytes" : 7196607758336,
"free" : "6.2tb",
"free_in_bytes" : 6888031485952,
"available" : "6.2tb",
"available_in_bytes" : 6888031469568
},
"plugins" : [ ],
"network_types" : {
"transport_types" : {
"security4" : 4
},
"http_types" : {
"security4" : 4
}
},
"discovery_types" : {
"zen" : 4
},
"packaging_types" : [
{
"flavor" : "default",
"type" : "zip",
"count" : 4
}
],
"ingest" : {
"number_of_pipelines" : 2,
"processor_stats" : {
"gsub" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
}
}
}
}
}
Any Idea for solve this error?
I'm having some unexpectedly slow queries and so I ran the profiling API (Elasticsearch 7.4). However, it doesn't explain the slowness, as all components take at most a couple of milliseconods, whereas the query "took" more than 1 second: took" : 1254
Here's the query, which using applicationId for routing (it uses query_string rather than a typical must clause for unrelated reasons, but that doesn't affect the query performance. Note that the index has a configured default sorting on timestamp (desc)):
POST indexname/_search?routing=cbcd0350-ba63-11e9-a4af-ed719166c0ae
{
"profile": true,
"query": {
"bool": {
"must": {
"query_string": {
"query": "action:foo"
}
},
"filter": [
{
"terms": {
"applicationId": [
"cbcd0350-ba63-11e9-a4af-ed719166c0ae"
]
}
},
{
"range": {
"timestamp": {
"gte": "1601142184297",
"lte": "1601143384297"
}
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
Below is the profile result:
"profile" : {
"shards" : [
{
"id" : "[9pyht_PVS0mTX_qoJMGhqg][indexname][12]",
"searches" : [
{
"query" : [
{
"type" : "BooleanQuery",
"description" : "+action:foo #ConstantScore(applicationId:cbcd0350-ba63-11e9-a4af-ed719166c0ae) #timestamp:[1601142184297 TO 1601143384297]",
"time_in_nanos" : 9193115,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 4475919,
"match" : 0,
"next_doc_count" : 5994,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 243183,
"advance_count" : 18,
"score" : 0,
"build_scorer_count" : 38,
"create_weight" : 75323,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 4392639
},
"children" : [
{
"type" : "TermQuery",
"description" : "action:foo",
"time_in_nanos" : 818107,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 605683,
"advance_count" : 6012,
"score" : 0,
"build_scorer_count" : 56,
"create_weight" : 24653,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 181702
}
},
{
"type" : "ConstantScoreQuery",
"description" : "ConstantScore(applicationId:cbcd0350-ba63-11e9-a4af-ed719166c0ae)",
"time_in_nanos" : 1548337,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 1388326,
"advance_count" : 6012,
"score" : 0,
"build_scorer_count" : 54,
"create_weight" : 8210,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 145734
},
"children" : [
{
"type" : "TermQuery",
"description" : "applicationId:cbcd0350-ba63-11e9-a4af-ed719166c0ae",
"time_in_nanos" : 704814,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 0,
"match" : 0,
"next_doc_count" : 0,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 593783,
"advance_count" : 6012,
"score" : 0,
"build_scorer_count" : 54,
"create_weight" : 4011,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 100953
}
}
]
},
{
"type" : "IndexOrDocValuesQuery",
"description" : "timestamp:[1601142184297 TO 1601143384297]",
"time_in_nanos" : 4533095,
"breakdown" : {
"set_min_competitive_score_count" : 0,
"match_count" : 0,
"shallow_advance_count" : 0,
"set_min_competitive_score" : 0,
"next_doc" : 542974,
"match" : 0,
"next_doc_count" : 5994,
"score_count" : 0,
"compute_max_score_count" : 0,
"compute_max_score" : 0,
"advance" : 212511,
"advance_count" : 1996,
"score" : 0,
"build_scorer_count" : 54,
"create_weight" : 1122,
"shallow_advance" : 0,
"create_weight_count" : 1,
"build_scorer" : 3768443
}
}
]
}
],
"rewrite_time" : 50858,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 2098312,
"children" : [
{
"name" : "SimpleFieldCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 812015
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
The issue is (apart from the query being slow), that the profile API reports 9193115 nanos which is 9 millis + 2 millis for collection. Which are the other stages that can make it so much slower, given that only one shard is queried thanks to the routing?
Update: search is slow when there's heavy indexing (but CPU and memory are fine)
I have a result query like:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 15.0735855,
"hits" : [
{
"_index" : "khoso",
"_type" : "sim",
"_id" : "0964693123",
"_score" : 15.0735855,
"_source" : {
"id" : "0964693123",
"i" : "0964693123",
"ut" : 10,
"utP" : 1,
"utT" : 1,
"utC" : 1,
"f" : "09646.93.123",
"s" : [
{
"id" : 268,
"p" : 800000,
"pb" : 800000,
"pg" : 560000,
"l" : {
"sec" : 0,
"usec" : 0
},
"da" : {
"sec" : 0,
"usec" : 0
},
"d" : true,
"d3" : true,
"d2" : true
},
{
"id" : 2067,
"p" : 750000,
"pb" : 699000,
"pg" : 524250,
"l" : {
"sec" : 0,
"usec" : 0
},
"da" : {
"sec" : 0,
"usec" : 0
},
"d" : true,
"d3" : true,
"d2" : true
}
],
"s2" : [
268,
2067
],
"pt" : 4.5,
"m" : 10,
"p" : 0,
"pb" : 800000,
"pg" : 560000,
"c" : [
81,
111
],
"c2" : 81,
"t" : 1,
"d" : true,
"d2" : true,
"l" : {
"sec" : 0,
"usec" : 0
},
"d3" : true,
"h" : true,
"hg" : true,
"e" : "693123",
"pn" : 960000,
"s3" : [ ]
}
}
]
}
}
Now I wanna remove an object in this array. For example, I want to remove the object with id == 268. Like this
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 15.0735855,
"hits" : [
{
"_index" : "khoso",
"_type" : "sim",
"_id" : "0964693123",
"_score" : 15.0735855,
"_source" : {
"id" : "0964693123",
"i" : "0964693123",
"ut" : 10,
"utP" : 1,
"utT" : 1,
"utC" : 1,
"f" : "09646.93.123",
"s" : [
{
"id" : 2067,
"p" : 750000,
"pb" : 699000,
"pg" : 524250,
"l" : {
"sec" : 0,
"usec" : 0
},
"da" : {
"sec" : 0,
"usec" : 0
},
"d" : true,
"d3" : true,
"d2" : true
}
],
"s2" : [
268,
2067
],
"pt" : 4.5,
"m" : 10,
"p" : 0,
"pb" : 800000,
"pg" : 560000,
"c" : [
81,
111
],
"c2" : 81,
"t" : 1,
"d" : true,
"d2" : true,
"l" : {
"sec" : 0,
"usec" : 0
},
"d3" : true,
"h" : true,
"hg" : true,
"e" : "693123",
"pn" : 960000,
"s3" : [ ]
}
}
]
}
}
How can i do it? Please give me some query to do this. Thank you!