We are using opensearch having elasticsearch v7.10.2 and we have a large index where we index data regularly and delete the data(which gets a month old) regularly as well but over a period of time now we are experiencing a degradation in search performance in our queries where we mostly use has_child queries, I want to know that the docs deleted are they still reciting in my index and are they still consuming the resources, if yes, then how can I get them removed, below I have attached a few stats.
GET _cat/segments?index=segment_index_570de84a4f0d925f98343571&s=docs.deleted
A few of my segments have reached 5GB as well, here attaching only a few segments example for better understanding
segment_index_570de84a4f0d925f98343571 83 r x.x.x.x _36u6 148830 4566709 5083016 4.2gb 292024 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 83 p x.x.x.x _36u6 148830 4566709 5083016 4.2gb 292024 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 84 p x.x.x.x _37hm 149674 4569844 5127644 4.4gb 290112 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 84 r x.x.x.x _37hm 149674 4569838 5127650 4.4gb 290112 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 40 p x.x.x.x _30oh 140849 3765550 5460181 4.7gb 292616 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 40 r x.x.x.x _30oh 140849 3765544 5460187 4.7gb 292616 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 29 r x.x.x.x _2ygs 137980 3359481 5519471 4.9gb 287504 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 29 p x.x.x.x _2ygs 137980 3359481 5519471 4.9gb 287504 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 13 p x.x.x.x _2u1h 132245 2731374 5565823 4gb 289480 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 13 r x.x.x.x _2u1h 132245 2731372 5565825 4gb 289480 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 57 p x.x.x.x _3bx3 155415 4144634 5615761 4.5gb 304728 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 57 r x.x.x.x _3bx3 155415 4144620 5615775 4.5gb 304728 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 10 r x.x.x.x _2yau 137766 3535782 5821903 4.6gb 290072 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 10 p x.x.x.x _2yau 137766 3535778 5821907 4.6gb 290072 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 5 r x.x.x.x _2vsz 134531 3052529 5940943 5gb 287288 true true 8.7.0 false
segment_index_570de84a4f0d925f98343571 5 p x.x.x.x _2vsz 134531 3052529 5940943 5gb 287288 true true 8.7.0 false
GET segment_index_570de84a4f0d925f98343571/_stats
{
"_shards" : {
"total" : 200,
"successful" : 200,
"failed" : 0
},
"_all" : {
"primaries" : {
"docs" : {
"count" : 2764210965,
"deleted" : 768121801
},
"store" : {
"size_in_bytes" : 1882036902899,
"reserved_in_bytes" : 0
},
"indexing" : {
"index_total" : 35049143,
"index_time_in_millis" : 93342006,
"index_current" : 1,
"index_failed" : 7,
"delete_total" : 28158400,
"delete_time_in_millis" : 2241164,
"delete_current" : 0,
"noop_update_total" : 108,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 8272612,
"time_in_millis" : 9407739,
"exists_total" : 7714730,
"exists_time_in_millis" : 6864869,
"missing_total" : 557882,
"missing_time_in_millis" : 2542870,
"current" : 0
},
"search" : {
"open_contexts" : 99,
"query_total" : 781661,
"query_time_in_millis" : 46180985,
"query_current" : 0,
"fetch_total" : 25828,
"fetch_time_in_millis" : 31922549,
"fetch_current" : 0,
"scroll_total" : 150005,
"scroll_time_in_millis" : 3934488045,
"scroll_current" : 99,
"suggest_total" : 0,
"suggest_time_in_millis" : 0,
"suggest_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 5069,
"total_time_in_millis" : 15670663,
"total_docs" : 232287891,
"total_size_in_bytes" : 144734357228,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 3012035,
"total_auto_throttle_in_bytes" : 1635281344
},
"refresh" : {
"total" : 26258,
"total_time_in_millis" : 24294337,
"external_total" : 23116,
"external_total_time_in_millis" : 51934585,
"listeners" : 0
},
"flush" : {
"total" : 129,
"periodic" : 29,
"total_time_in_millis" : 83553
},
"warmer" : {
"current" : 0,
"total" : 23099,
"total_time_in_millis" : 28150896
},
"query_cache" : {
"memory_size_in_bytes" : 1971367836,
"total_count" : 5941141,
"hit_count" : 1281540,
"miss_count" : 4659601,
"cache_size" : 34136,
"cache_count" : 34655,
"evictions" : 519
},
"fielddata" : {
"memory_size_in_bytes" : 2270860360,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 3062,
"memory_in_bytes" : 686053834,
"terms_memory_in_bytes" : 583830952,
"stored_fields_memory_in_bytes" : 2159936,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 84022720,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 16040226,
"index_writer_memory_in_bytes" : 3083169874,
"version_map_memory_in_bytes" : 14212574,
"fixed_bit_set_memory_in_bytes" : 441678080,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
},
"translog" : {
"operations" : 1145991,
"size_in_bytes" : 1862599220,
"uncommitted_operations" : 1145991,
"uncommitted_size_in_bytes" : 1862599220,
"earliest_last_modified_age" : 10
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 692,
"miss_count" : 7824
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 146589584
}
},
"total" : {
"docs" : {
"count" : 5528419715,
"deleted" : 1568758887
},
"store" : {
"size_in_bytes" : 3779599075512,
"reserved_in_bytes" : 0
},
"indexing" : {
"index_total" : 65246167,
"index_time_in_millis" : 116379853,
"index_current" : 2,
"index_failed" : 7,
"delete_total" : 56316800,
"delete_time_in_millis" : 4569453,
"delete_current" : 0,
"noop_update_total" : 108,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 8279717,
"time_in_millis" : 9461541,
"exists_total" : 7721681,
"exists_time_in_millis" : 6917878,
"missing_total" : 558036,
"missing_time_in_millis" : 2543663,
"current" : 0
},
"search" : {
"open_contexts" : 200,
"query_total" : 1421264,
"query_time_in_millis" : 84711977,
"query_current" : 0,
"fetch_total" : 47121,
"fetch_time_in_millis" : 55494456,
"fetch_current" : 2,
"scroll_total" : 282690,
"scroll_time_in_millis" : 6909135621,
"scroll_current" : 200,
"suggest_total" : 0,
"suggest_time_in_millis" : 0,
"suggest_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 8563,
"total_time_in_millis" : 30676821,
"total_docs" : 452795172,
"total_size_in_bytes" : 273814327525,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 6337362,
"total_auto_throttle_in_bytes" : 3305840977
},
"refresh" : {
"total" : 47329,
"total_time_in_millis" : 46367778,
"external_total" : 43783,
"external_total_time_in_millis" : 98641382,
"listeners" : 0
},
"flush" : {
"total" : 298,
"periodic" : 98,
"total_time_in_millis" : 210368
},
"warmer" : {
"current" : 0,
"total" : 43760,
"total_time_in_millis" : 52941301
},
"query_cache" : {
"memory_size_in_bytes" : 3882183058,
"total_count" : 10826442,
"hit_count" : 2195511,
"miss_count" : 8630931,
"cache_size" : 66063,
"cache_count" : 66884,
"evictions" : 821
},
"fielddata" : {
"memory_size_in_bytes" : 4524309840,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 6121,
"memory_in_bytes" : 1359222728,
"terms_memory_in_bytes" : 1155693088,
"stored_fields_memory_in_bytes" : 4324024,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 166294144,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 32911472,
"index_writer_memory_in_bytes" : 5666776518,
"version_map_memory_in_bytes" : 26231773,
"fixed_bit_set_memory_in_bytes" : 887417576,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
},
"translog" : {
"operations" : 31206542,
"size_in_bytes" : 28262050766,
"uncommitted_operations" : 31206542,
"uncommitted_size_in_bytes" : 28262050766,
"earliest_last_modified_age" : 10
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 1296,
"miss_count" : 13655
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 229545608
}
}
},
"indices" : {
"segment_index_570de84a4f0d925f98343571" : {
"uuid" : "fhZUqTwfSeum3hHlyFaILw",
"primaries" : {
"docs" : {
"count" : 2764210965,
"deleted" : 768121801
},
"store" : {
"size_in_bytes" : 1882036902899,
"reserved_in_bytes" : 0
},
"indexing" : {
"index_total" : 35049143,
"index_time_in_millis" : 93342006,
"index_current" : 1,
"index_failed" : 7,
"delete_total" : 28158400,
"delete_time_in_millis" : 2241164,
"delete_current" : 0,
"noop_update_total" : 108,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 8272612,
"time_in_millis" : 9407739,
"exists_total" : 7714730,
"exists_time_in_millis" : 6864869,
"missing_total" : 557882,
"missing_time_in_millis" : 2542870,
"current" : 0
},
"search" : {
"open_contexts" : 99,
"query_total" : 781661,
"query_time_in_millis" : 46180985,
"query_current" : 0,
"fetch_total" : 25828,
"fetch_time_in_millis" : 31922549,
"fetch_current" : 0,
"scroll_total" : 150005,
"scroll_time_in_millis" : 3934488045,
"scroll_current" : 99,
"suggest_total" : 0,
"suggest_time_in_millis" : 0,
"suggest_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 5069,
"total_time_in_millis" : 15670663,
"total_docs" : 232287891,
"total_size_in_bytes" : 144734357228,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 3012035,
"total_auto_throttle_in_bytes" : 1635281344
},
"refresh" : {
"total" : 26258,
"total_time_in_millis" : 24294337,
"external_total" : 23116,
"external_total_time_in_millis" : 51934585,
"listeners" : 0
},
"flush" : {
"total" : 129,
"periodic" : 29,
"total_time_in_millis" : 83553
},
"warmer" : {
"current" : 0,
"total" : 23099,
"total_time_in_millis" : 28150896
},
"query_cache" : {
"memory_size_in_bytes" : 1971367836,
"total_count" : 5941141,
"hit_count" : 1281540,
"miss_count" : 4659601,
"cache_size" : 34136,
"cache_count" : 34655,
"evictions" : 519
},
"fielddata" : {
"memory_size_in_bytes" : 2270860360,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 3062,
"memory_in_bytes" : 686053834,
"terms_memory_in_bytes" : 583830952,
"stored_fields_memory_in_bytes" : 2159936,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 84022720,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 16040226,
"index_writer_memory_in_bytes" : 3083169874,
"version_map_memory_in_bytes" : 14212574,
"fixed_bit_set_memory_in_bytes" : 441678080,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
},
"translog" : {
"operations" : 1145991,
"size_in_bytes" : 1862599220,
"uncommitted_operations" : 1145991,
"uncommitted_size_in_bytes" : 1862599220,
"earliest_last_modified_age" : 10
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 692,
"miss_count" : 7824
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 146589584
}
},
"total" : {
"docs" : {
"count" : 5528419715,
"deleted" : 1568758887
},
"store" : {
"size_in_bytes" : 3779599075512,
"reserved_in_bytes" : 0
},
"indexing" : {
"index_total" : 65246167,
"index_time_in_millis" : 116379853,
"index_current" : 2,
"index_failed" : 7,
"delete_total" : 56316800,
"delete_time_in_millis" : 4569453,
"delete_current" : 0,
"noop_update_total" : 108,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 8279717,
"time_in_millis" : 9461541,
"exists_total" : 7721681,
"exists_time_in_millis" : 6917878,
"missing_total" : 558036,
"missing_time_in_millis" : 2543663,
"current" : 0
},
"search" : {
"open_contexts" : 200,
"query_total" : 1421264,
"query_time_in_millis" : 84711977,
"query_current" : 0,
"fetch_total" : 47121,
"fetch_time_in_millis" : 55494456,
"fetch_current" : 2,
"scroll_total" : 282690,
"scroll_time_in_millis" : 6909135621,
"scroll_current" : 200,
"suggest_total" : 0,
"suggest_time_in_millis" : 0,
"suggest_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 8563,
"total_time_in_millis" : 30676821,
"total_docs" : 452795172,
"total_size_in_bytes" : 273814327525,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 6337362,
"total_auto_throttle_in_bytes" : 3305840977
},
"refresh" : {
"total" : 47329,
"total_time_in_millis" : 46367778,
"external_total" : 43783,
"external_total_time_in_millis" : 98641382,
"listeners" : 0
},
"flush" : {
"total" : 298,
"periodic" : 98,
"total_time_in_millis" : 210368
},
"warmer" : {
"current" : 0,
"total" : 43760,
"total_time_in_millis" : 52941301
},
"query_cache" : {
"memory_size_in_bytes" : 3882183058,
"total_count" : 10826442,
"hit_count" : 2195511,
"miss_count" : 8630931,
"cache_size" : 66063,
"cache_count" : 66884,
"evictions" : 821
},
"fielddata" : {
"memory_size_in_bytes" : 4524309840,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 6121,
"memory_in_bytes" : 1359222728,
"terms_memory_in_bytes" : 1155693088,
"stored_fields_memory_in_bytes" : 4324024,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 166294144,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 32911472,
"index_writer_memory_in_bytes" : 5666776518,
"version_map_memory_in_bytes" : 26231773,
"fixed_bit_set_memory_in_bytes" : 887417576,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
},
"translog" : {
"operations" : 31206542,
"size_in_bytes" : 28262050766,
"uncommitted_operations" : 31206542,
"uncommitted_size_in_bytes" : 28262050766,
"earliest_last_modified_age" : 10
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 1296,
"miss_count" : 13655
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 229545608
}
}
}
}
}
Would request to guide me with an appropriate approach as to what are best ways to deal with this and optimise the search performance
What I usually do in such cases is to run forcemerge and only expunge deleted docs
POST _forcemerge?only_expunge_deletes=true
Since the ratio of deleted/total docs is ~30% that should allow you to regain some space...
Try it out on a single index first. Record the size before and after and you should see some space gained.
I have the following query.explain(1)-Output. It is a verbose output and my question is how to read that. How is the order of the operations? Does it starts with GEO_NEAR_2DSPHERE or with LIMIT? What does the field advanced express?
And most important, where is this documented? Could not find this in the mongoDB-manual :(
Query:
db.nodesWays.find(
{
geo:{
$nearSphere:{
$geometry:{
type: "Point",
coordinates: [lon, lat]
}
}
},
"amenity":"restaurant"
},
{name:1}
).limit(10).explain(1)
The output:
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 10,
"nscannedObjects" : 69582,
"nscanned" : 69582,
"nscannedObjectsAllPlans" : 69582,
"nscannedAllPlans" : 69582,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 543,
"nChunkSkips" : 0,
"millis" : 606,
"indexBounds" : {
},
"allPlans" : [
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 10,
"nscannedObjects" : 69582,
"nscanned" : 69582,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
}
}
],
"server" : "DBTest:27017",
"filterSet" : false,
"stats" : {
"type" : "LIMIT",
"works" : 69582,
"yields" : 543,
"unyields" : 543,
"invalidates" : 0,
"advanced" : 10,
"needTime" : 69572,
"needFetch" : 0,
"isEOF" : 1,
"children" : [
{
"type" : "PROJECTION",
"works" : 69582,
"yields" : 543,
"unyields" : 543,
"invalidates" : 0,
"advanced" : 10,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 0,
"children" : [
{
"type" : "FETCH",
"works" : 69582,
"yields" : 543,
"unyields" : 543,
"invalidates" : 0,
"advanced" : 10,
"needTime" : 69572,
"needFetch" : 0,
"isEOF" : 0,
"alreadyHasObj" : 4028,
"forcedFetches" : 0,
"matchTested" : 10,
"children" : [
{
"type" : "GEO_NEAR_2DSPHERE",
"works" : 69582,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 4028,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 0,
"children" : [ ]
}
]
}
]
}
]
}
}
By looking at the stats array, the sequence should be
GEO_NEAR_2DSPHERE -> scans 69582 index objects.
Fetch and limit -> Fetches matched documents up to limited number of documents.
Projection -> Project to return only required fields.
The reason why MongoDB wrap all actions in LIMIT is to align with the query's syntax for easier interpretation.
The query uses an unknown index of type S2NearCursor. In addition to the index, it also retrieved whole document for further reduction on amenity. You may want to explore indexing that as well.
BTW, this is a known bug in MongoDB. It misses the index name when using S2NearCursor index.
As for detailed documentation, I myself also don't find much, but a few online blogs you can browse around.
explain.explain() – Understanding Mongo Query Behavior
Speeding Up Queries: Understanding Query Plans
I especially want to recommend you to pay attention to the last paragraph of the two blog posts. Tune, generate the query plan and try to explain the plan yourself. Doing this a number of rounds, you'll get some idea how it works.
Happy explaining. : )