Related
I have an ElasticSearch index that I was using to store a whole bunch of data spread out across 2 data nodes. One of the data nodes accidentally filled up on hard drive space and caused ES to crash. When I cleared the drive (of extra stuff, I didn't touch the ES data folder) and restarted ES node, the index shards didn't recover. It knows there should be shards on this node, but the recovery logic seems to be stalled. Here is the full return from _recovery?human&detailed=true
{
"events_v4": {
"shards": [
{
"id": 3,
"type": "EMPTY_STORE",
"stage": "DONE",
"primary": true,
"start_time": "2018-09-26T17:35:35.275Z",
"start_time_in_millis": 1537983335275,
"stop_time": "2018-09-26T17:35:35.764Z",
"stop_time_in_millis": 1537983335764,
"total_time": "489ms",
"total_time_in_millis": 489,
"source": {},
"target": {
"id": "wu_A6DNPSiqrg6JfzAW28Q",
"host": "10.8.81.41",
"transport_address": "10.8.81.41:9300",
"ip": "10.8.81.41",
"name": "wu_A6DN"
},
"index": {
"size": {
"total": "0b",
"total_in_bytes": 0,
"reused": "0b",
"reused_in_bytes": 0,
"recovered": "0b",
"recovered_in_bytes": 0,
"percent": "0.0%"
},
"files": {
"total": 0,
"reused": 0,
"recovered": 0,
"percent": "0.0%"
},
"total_time": "0s",
"total_time_in_millis": 0,
"source_throttle_time": "-1",
"source_throttle_time_in_millis": 0,
"target_throttle_time": "-1",
"target_throttle_time_in_millis": 0
},
"translog": {
"recovered": 0,
"total": 0,
"percent": "100.0%",
"total_on_start": 0,
"total_time": "471ms",
"total_time_in_millis": 471
},
"verify_index": {
"check_index_time": "0s",
"check_index_time_in_millis": 0,
"total_time": "0s",
"total_time_in_millis": 0
}
},
{
"id": 7,
"type": "EMPTY_STORE",
"stage": "DONE",
"primary": true,
"start_time": "2018-09-26T17:35:35.242Z",
"start_time_in_millis": 1537983335242,
"stop_time": "2018-09-26T17:35:35.481Z",
"stop_time_in_millis": 1537983335481,
"total_time": "238ms",
"total_time_in_millis": 238,
"source": {},
"target": {
"id": "wu_A6DNPSiqrg6JfzAW28Q",
"host": "10.8.81.41",
"transport_address": "10.8.81.41:9300",
"ip": "10.8.81.41",
"name": "wu_A6DN"
},
"index": {
"size": {
"total": "0b",
"total_in_bytes": 0,
"reused": "0b",
"reused_in_bytes": 0,
"recovered": "0b",
"recovered_in_bytes": 0,
"percent": "0.0%"
},
"files": {
"total": 0,
"reused": 0,
"recovered": 0,
"percent": "0.0%"
},
"total_time": "0s",
"total_time_in_millis": 0,
"source_throttle_time": "-1",
"source_throttle_time_in_millis": 0,
"target_throttle_time": "-1",
"target_throttle_time_in_millis": 0
},
"translog": {
"recovered": 0,
"total": 0,
"percent": "100.0%",
"total_on_start": 0,
"total_time": "204ms",
"total_time_in_millis": 204
},
"verify_index": {
"check_index_time": "0s",
"check_index_time_in_millis": 0,
"total_time": "0s",
"total_time_in_millis": 0
}
},
{
"id": 1,
"type": "EMPTY_STORE",
"stage": "DONE",
"primary": true,
"start_time": "2018-09-26T17:35:35.261Z",
"start_time_in_millis": 1537983335261,
"stop_time": "2018-09-26T17:35:35.760Z",
"stop_time_in_millis": 1537983335760,
"total_time": "498ms",
"total_time_in_millis": 498,
"source": {},
"target": {
"id": "wu_A6DNPSiqrg6JfzAW28Q",
"host": "10.8.81.41",
"transport_address": "10.8.81.41:9300",
"ip": "10.8.81.41",
"name": "wu_A6DN"
},
"index": {
"size": {
"total": "0b",
"total_in_bytes": 0,
"reused": "0b",
"reused_in_bytes": 0,
"recovered": "0b",
"recovered_in_bytes": 0,
"percent": "0.0%"
},
"files": {
"total": 0,
"reused": 0,
"recovered": 0,
"percent": "0.0%"
},
"total_time": "0s",
"total_time_in_millis": 0,
"source_throttle_time": "-1",
"source_throttle_time_in_millis": 0,
"target_throttle_time": "-1",
"target_throttle_time_in_millis": 0
},
"translog": {
"recovered": 0,
"total": 0,
"percent": "100.0%",
"total_on_start": 0,
"total_time": "460ms",
"total_time_in_millis": 460
},
"verify_index": {
"check_index_time": "0s",
"check_index_time_in_millis": 0,
"total_time": "0s",
"total_time_in_millis": 0
}
},
{
"id": 5,
"type": "EMPTY_STORE",
"stage": "DONE",
"primary": true,
"start_time": "2018-09-26T17:35:35.290Z",
"start_time_in_millis": 1537983335290,
"stop_time": "2018-09-26T17:35:35.784Z",
"stop_time_in_millis": 1537983335784,
"total_time": "493ms",
"total_time_in_millis": 493,
"source": {},
"target": {
"id": "wu_A6DNPSiqrg6JfzAW28Q",
"host": "10.8.81.41",
"transport_address": "10.8.81.41:9300",
"ip": "10.8.81.41",
"name": "wu_A6DN"
},
"index": {
"size": {
"total": "0b",
"total_in_bytes": 0,
"reused": "0b",
"reused_in_bytes": 0,
"recovered": "0b",
"recovered_in_bytes": 0,
"percent": "0.0%"
},
"files": {
"total": 0,
"reused": 0,
"recovered": 0,
"percent": "0.0%"
},
"total_time": "0s",
"total_time_in_millis": 0,
"source_throttle_time": "-1",
"source_throttle_time_in_millis": 0,
"target_throttle_time": "-1",
"target_throttle_time_in_millis": 0
},
"translog": {
"recovered": 0,
"total": 0,
"percent": "100.0%",
"total_on_start": 0,
"total_time": "465ms",
"total_time_in_millis": 465
},
"verify_index": {
"check_index_time": "0s",
"check_index_time_in_millis": 0,
"total_time": "0s",
"total_time_in_millis": 0
}
}
]
}
}
The recovery type EMPTY_STORE is undocumented and I can't find anything but a single unanswered question in the ES forums about it. Does anyone know if the index is hosed or if I just need to wait a bit? Is there anything I can do to trigger the recovery to advance?
This is ES version 5.4.3
I am using AWS elastic search service and having each node with 16GB RAM, 4 cores and 160GB EBS. While looking at the
GET _nodes/stats
I found that I have only 0.27GB of memory left in my node. I understand the fact that approximately the following amount of memory is dedicated for these processes:
Heap : 8GB
Swap: 2GB
Fielddate + Segments: 1GB
I don't understand what happens with the rest of the 4-5GB of memory, which process is consuming this memory?
The following is the stat of one node which I got from the api mentioned above:
{
"timestamp": 1536212516381,
"name": "",
"roles": [
"master",
"data",
"ingest"
],
"indices": {
"docs": {
"count": 40811869,
"deleted": 12313549
},
"store": {
"size_in_bytes": 21211684802
},
"indexing": {
"index_total": 17141254,
"index_time_in_millis": 23271050,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 11,
"time_in_millis": 4,
"exists_total": 10,
"exists_time_in_millis": 4,
"missing_total": 1,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 60495650,
"query_time_in_millis": 164996296,
"query_current": 0,
"fetch_total": 1338,
"fetch_time_in_millis": 3947,
"fetch_current": 0,
"scroll_total": 1541,
"scroll_time_in_millis": 147381782,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 752561,
"total_time_in_millis": 156448427,
"total_docs": 6278661063,
"total_size_in_bytes": 2061451815435,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 64499,
"total_auto_throttle_in_bytes": 231968018126
},
"refresh": {
"total": 9621793,
"total_time_in_millis": 126038098,
"listeners": 0
},
"flush": {
"total": 135925,
"total_time_in_millis": 2306674
},
"warmer": {
"current": 0,
"total": 8576200,
"total_time_in_millis": 3784313
},
"query_cache": {
"memory_size_in_bytes": 47531137,
"total_count": 315917952,
"hit_count": 69088483,
"miss_count": 246829469,
"cache_size": 16561,
"cache_count": 740755,
"evictions": 724194
},
"fielddata": {
"memory_size_in_bytes": 19006048,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 2256,
"memory_in_bytes": 62556325,
"terms_memory_in_bytes": 50154620,
"stored_fields_memory_in_bytes": 6587256,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 557504,
"points_memory_in_bytes": 2812593,
"doc_values_memory_in_bytes": 2444352,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 6743824,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 322600,
"size_in_bytes": 511751430,
"uncommitted_operations": 22528,
"uncommitted_size_in_bytes": 35107521
},
"request_cache": {
"memory_size_in_bytes": 84792534,
"evictions": 13922143,
"hit_count": 7667542,
"miss_count": 35693456
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 952628
}
},
"os": {
"timestamp": 1536212516872,
"cpu": {
"percent": 23,
"load_average": {
"1m": 1.29,
"5m": 1.19,
"15m": 1.16
}
},
"mem": {
"total_in_bytes": 16823488512,
"free_in_bytes": 297766912,
"used_in_bytes": 16525721600,
"free_percent": 2,
"used_percent": 98
},
"swap": {
"total_in_bytes": 2147479552,
"free_in_bytes": 2143141888,
"used_in_bytes": 4337664
}
},
"process": {
"timestamp": 1536212516872,
"open_file_descriptors": 2573,
"max_file_descriptors": 128000,
"cpu": {
"percent": 19,
"total_in_millis": 4177826450
},
"mem": {
"total_virtual_in_bytes": 34029547520
}
},
"jvm": {
"timestamp": 1536212516874,
"uptime_in_millis": 6132906547,
"mem": {
"heap_used_in_bytes": 5277036968,
"heap_used_percent": 61,
"heap_committed_in_bytes": 8555069440,
"heap_max_in_bytes": 8555069440,
"non_heap_used_in_bytes": 278172640,
"non_heap_committed_in_bytes": 304259072,
"pools": {
"young": {
"used_in_bytes": 246875928,
"max_in_bytes": 279183360,
"peak_used_in_bytes": 279183360,
"peak_max_in_bytes": 279183360
},
"survivor": {
"used_in_bytes": 6813256,
"max_in_bytes": 34865152,
"peak_used_in_bytes": 34865152,
"peak_max_in_bytes": 34865152
},
"old": {
"used_in_bytes": 5023347784,
"max_in_bytes": 8241020928,
"peak_used_in_bytes": 6966292552,
"peak_max_in_bytes": 8241020928
}
}
},
"threads": {
"count": 234,
"peak_count": 250
},
"gc": {
"collectors": {
"young": {
"collection_count": 4474572,
"collection_time_in_millis": 127468649
},
"old": {
"collection_count": 920,
"collection_time_in_millis": 325448
}
}
},
"buffer_pools": {
"direct": {
"count": 463,
"used_in_bytes": 154976141,
"total_capacity_in_bytes": 154976139
},
"mapped": {
"count": 4914,
"used_in_bytes": 21027708516,
"total_capacity_in_bytes": 21027708516
}
},
"classes": {
"current_loaded_count": 26154,
"total_loaded_count": 26712,
"total_unloaded_count": 558
}
},
"thread_pool": {
"bulk": {
"threads": 4,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 4,
"completed": 17141525
},
"fetch_shard_started": {
"threads": 0,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 0,
"completed": 0
},
"fetch_shard_store": {
"threads": 0,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 0,
"completed": 0
},
"flush": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 271881
},
"force_merge": {
"threads": 0,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 0,
"completed": 0
},
"generic": {
"threads": 32,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 128,
"completed": 43205055
},
"get": {
"threads": 4,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 4,
"completed": 8
},
"index": {
"threads": 3,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 3,
"completed": 3
},
"listener": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 4
},
"management": {
"threads": 5,
"queue": 0,
"active": 1,
"rejected": 0,
"largest": 5,
"completed": 28115022
},
"refresh": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 1023804661
},
"search": {
"threads": 7,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 7,
"completed": 161702240
},
"snapshot": {
"threads": 1,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 21133
},
"warmer": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 95146934
}
},
"fs": {
"timestamp": 1536212516874,
"total": {
"total_in_bytes": 168968957952,
"free_in_bytes": 147126472704,
"available_in_bytes": 138519760896
},
"data": [
{
"type": "ext4",
"total_in_bytes": 168968957952,
"free_in_bytes": 147126472704,
"available_in_bytes": 138519760896
}
],
"io_stats": {
"devices": [
{
"device_name": "xvdf",
"operations": 133367878,
"read_operations": 1456763,
"write_operations": 131911115,
"read_kilobytes": 104740824,
"write_kilobytes": 3178855500
}
],
"total": {
"operations": 133367878,
"read_operations": 1456763,
"write_operations": 131911115,
"read_kilobytes": 104740824,
"write_kilobytes": 3178855500
}
}
},
"breakers": {
"request": {
"limit_size_in_bytes": 5133041664,
"limit_size": "4.7gb",
"estimated_size_in_bytes": 0,
"estimated_size": "0b",
"overhead": 1,
"tripped": 0
},
"fielddata": {
"limit_size_in_bytes": 5133041664,
"limit_size": "4.7gb",
"estimated_size_in_bytes": 19006048,
"estimated_size": "18.1mb",
"overhead": 1.03,
"tripped": 0
},
"in_flight_requests": {
"limit_size_in_bytes": 8555069440,
"limit_size": "7.9gb",
"estimated_size_in_bytes": 1657,
"estimated_size": "1.6kb",
"overhead": 1,
"tripped": 0
},
"accounting": {
"limit_size_in_bytes": 8555069440,
"limit_size": "7.9gb",
"estimated_size_in_bytes": 62556325,
"estimated_size": "59.6mb",
"overhead": 1,
"tripped": 0
},
"parent": {
"limit_size_in_bytes": 5988548608,
"limit_size": "5.5gb",
"estimated_size_in_bytes": 81564030,
"estimated_size": "77.7mb",
"overhead": 1,
"tripped": 0
}
},
"script": {
"compilations": 5,
"cache_evictions": 0
},
"discovery": {
"cluster_state_queue": {
"total": 0,
"pending": 0,
"committed": 0
},
"published_cluster_states": {
"full_states": 1,
"incompatible_diffs": 0,
"compatible_diffs": 70303
}
},
"ingest": {
"total": {
"count": 0,
"time_in_millis": 0,
"current": 0,
"failed": 0
},
"pipelines": {}
},
"adaptive_selection": {
"qhqf-YADRF2gS1Vu6EV_8Q": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 6520537,
"avg_response_time_ns": 15403676,
"rank": "15.4"
},
"I21NiLksQqCNV3-vP3uCNA": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 3079216,
"avg_response_time_ns": 1115616,
"rank": "1.1"
},
"T2_cLfzrQBKd7WTR0p6jWQ": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 12493125,
"avg_response_time_ns": 1141514,
"rank": "1.1"
},
"ZZYgl3WARsKE-80HXhRbVw": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 3919880,
"avg_response_time_ns": 6063593,
"rank": "6.1"
}
}
}
Can anyone please help explain the memory distribution in this node? Is it okay to have only 0.27GB free memory space or what should I do to make more free memory available in the node?
I'm very new with elastic search and kibana . I'm using vega plugin in kibana visualization.
But not able to create Bar Chart using elastic search aggs.
I'm getting proper result when I'm using kibana dev tools.
I'am attaching the following details with the sample code after run this I'm getting a blank page
Visualization Section:
{
"$schema": "https://vega.github.io/schema/vega/v3.0.json",
"autosize": "fit",
"padding": 6,
"data": [
{
"name": "traffic-revenue",
"url": {
"index": "brnl_tms_plaza",
"body": {
"size": "0",
"aggs": {
"group_by_vehicle_subcat": {
"terms": {
"field": "VehicleSubCatCode.keyword"
}
}
}
},
"format": {
"property": "aggregations.group_by_vehicle_subcat.buckets"
}
}
}
],
"scales": [
{
"name": "xscale",
"type": "band",
"domain": {
"data": "traffic-revenue",
"field": "key"
},
"range": "width",
"padding": 0.05,
"round": true
},
{
"name": "yscale",
"domain": {
"data": "traffic-revenue",
"field": "doc_count"
},
"nice": true,
"range": "height"
}
],
"axes": [
{
"orient": "bottom",
"scale": "xscale"
},
{"orient": "left", "scale": "yscale"}
],
"marks": [
{
"type": "rect",
"from": {
"data": "traffic-revenue"
},
"encode": {
"enter": {
"x": {
"scale": "xscale",
"field": "key",
"axis": {"title": "Vehicle category"}
},
"width": {
"scale": "xscale",
"band": 1
},
"y": {
"scale": "yscale",
"field": "doc_count",
"axis": {"title": "Vehicle Rate Count"}
},
"y2": {
"scale": "yscale",
"value": 0
}
},
"update": {
"fill": {"value": "steelblue"}
},
"hover": {"fill": {"value": "red"}}
}
}
]
}
Data Set
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 48,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_vehicle_subcat": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "LMV",
"doc_count": 35
},
{
"key": "BUS",
"doc_count": 3
},
{
"key": "LCV",
"doc_count": 3
},
{
"key": "MAV-5",
"doc_count": 3
},
{
"key": "MAV-4 with trailer",
"doc_count": 2
},
{
"key": "MAV-3 without trailer",
"doc_count": 1
},
{
"key": "MINI-BUS",
"doc_count": 1
}
]
}
}
}
I would recommend debugging your vega code using static data to make sure it is defined properly.
I'm not sure why, but I was able to get your visualization to draw when I set the autosize property to none and set the height and width explicitly.
Here is a vega specification based off of the one you provided which should run in the online vega editor.
{
"$schema": "https://vega.github.io/schema/vega/v3.0.json",
"autosize": "none",
"width": 400,
"height": 500,
"padding": 20,
"data": [
{
"name": "traffic-revenue",
"values": [
{"key": "a", "doc_count": 5},
{"key": "b", "doc_count": 22},
{"key": "c", "doc_count": 1},
{"key": "d", "doc_count": 7},
{"key": "e", "doc_count": 12},
{"key": "f", "doc_count": 2}
]
}
],
"scales": [
{
"name": "xscale",
"type": "band",
"domain": {
"data": "traffic-revenue",
"field": "key"
},
"range": "width",
"padding": 0.05,
"round": true
},
{
"name": "yscale",
"domain": {
"data": "traffic-revenue",
"field": "doc_count"
},
"nice": true,
"range": "height"
}
],
"axes": [
{
"orient": "bottom",
"scale": "xscale"
},
{"orient": "left", "scale": "yscale"}
],
"marks": [
{
"type": "rect",
"from": {
"data": "traffic-revenue"
},
"encode": {
"enter": {
"x": {
"scale": "xscale",
"field": "key",
"axis": {"title": "Vehicle category"}
},
"width": {
"scale": "xscale",
"band": 1
},
"y": {
"scale": "yscale",
"field": "doc_count",
"axis": {"title": "Vehicle Rate Count"}
},
"y2": {
"scale": "yscale",
"value": 0
}
},
"update": {
"fill": {"value": "steelblue"}
},
"hover": {"fill": {"value": "red"}}
}
}
]
}
You may already know this since you have the format tag on your elasticsearch data, but if your visualization is working with statically defined data, and not when you pull data from an elasticsearch query, try looking at the data source directly using the vega debuggging functions described here https://vega.github.io/vega/docs/api/debugging/.
Running the following in the browser console should let you look at the data in the format vega is receiving it. VEGA_DEBUG.view.data("")
Note: This is cross-posted on the elasticsearch forum (https://discuss.elastic.co/t/store-size-1-000-times-the-document-byte-size/74258/4).
I am experiencing a roughly 1,000x increase in store.size over the document byte size. I've got a very simple mapping with very small documents (less than 1kb) and I've compared my mapping to Elasticsearch's internal mapping and they are the same, so it does not appear that there is any dynamic mapping going on.
So far, I have ingested 60,437 documents and have a store.size of 19.6Gb (average of 300kb per document), but the average byte size (String.getBytes().length) of the JSON is 300-400 bytes per document. In another run, the documents were averaging about 1MB - 3MB per document.
I'm using Elasticsearch 5.2 on an M4.2xlarge EC2 instance. Elasticsearch was installed with mostly all defaults, except what I needed to do in order to pass the boostrap checks and bind to a non-local IP. I've allocated 16GB (half of my physical memory) to Elasticsearch.
I used to run Elasticsearch 2.x and was ingesting FAR more fields and much larger documents than just these handful of fields and was only experiencing about 20k / document, which was still substantial, though manageable.
If anyone can point out anything that would fix this, I would appreciate it. Or is there an ES 5.x configuration I haven't seen that will resolve this?
Below is my mapping.
{
"settings": {
"index.query.default_field": "tweetText"
},
"mappings": {
"tweet": {
"_all": {
"enabled": false
},
"properties": {
"tweetDate": {
"type": "date",
"format": "EEE MMM dd HH:mm:ss Z YYYY||strict_date_optional_time||epoch_millis"
},
"userId": {
"type": "text",
"index": "not_analyzed"
},
"screenName": {
"type": "text",
"index": "not_analyzed"
},
"tweetText": {
"type": "text"
},
"cleanedText": {
"type": "text"
},
"tweetId": {
"type": "text",
"index": "not_analyzed"
},
"location": {
"type": "geo_point",
"ignore_malformed": true
},
"placeName": {
"type": "keyword",
"doc_values": true,
"eager_global_ordinals": false
},
"placeCountry": {
"type": "keyword",
"doc_values": true,
"eager_global_ordinals": true
},
"placeCountryCode": {
"type": "keyword",
"doc_values": false,
"eager_global_ordinals": false,
"index": false
},
"placeBoundingBox": {
"type": "geo_shape",
"tree": "quadtree",
"precision": "1m"
},
"resolvedUrls": {
"type": "text",
"index": "not_analyzed"
},
"hashtags": {
"type": "text"
},
"mentions": {
"type": "text"
},
"geoInferences": {
"properties": {
"matchedName": {
"type": "text"
},
"asciiName": {
"type": "keyword",
"doc_values": true,
"eager_global_ordinals": false
},
"country": {
"type": "keyword",
"doc_values": true,
"eager_global_ordinals": true
},
"county": {
"type": "text"
},
"countryCode": {
"type": "keyword",
"doc_values": false,
"eager_global_ordinals": false,
"index": false
},
"city": {
"type": "text"
},
"admin1Code": {
"type": "keyword",
"doc_values": false,
"eager_global_ordinals": false,
"index": false
},
"admin2Code": {
"type": "keyword",
"doc_values": false,
"eager_global_ordinals": false,
"index": false
},
"admin3Code": {
"type": "keyword",
"doc_values": false,
"eager_global_ordinals": false,
"index": false
},
"admin4Code": {
"type": "keyword",
"doc_values": false,
"eager_global_ordinals": false,
"index": false
},
"confidence": {
"type": "float",
"doc_values": false,
"ignore_malformed": false,
"index": false
},
"coordinates": {
"type": "geo_point",
"ignore_malformed": true
}
}
},
"temporalInferences": {
"type": "date",
"ignore_malformed": true
}
}
}
}
}
A sample document:
{
"_index": "twitter",
"_type": "tweet",
"_id": "AVoZivLca9LOhnR10_ll",
"_score": null,
"_source": {
"tweetDate": 1486487211000,
"userId": "123456789",
"screenName": "removed",
"tweetText": "RT #wef: America’s dominance is over. By 2030, we'll have a handful of global powers https://www.weforum.org/agenda/2016/11/america-s-dominance-is-over/?utm_content=buffer73cd5&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer #wef17 https://twitter.com/wef/status/828994745200435200/photo/1",
"cleanedText": "RT #wef: America s dominance is over. By 2030, we'll have a handful of global powers https://www.weforum.org/agenda/2016/11/america-s-dominance-is-over/?utm_content=buffer73cd5&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer #wef17 https://twitter.com/wef/status/828994745200435200/photo/1",
"tweetId": "829013568288796672",
"resolvedUrls": [
"https://www.weforum.org/agenda/2016/11/america-s-dominance-is-over/?utm_content=buffer73cd5&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer"
],
"hashtags": [
"wef17"
],
"mentions": [
"wef"
],
"geoInferences": [
{
"matchedName": "America",
"asciiName": "United States",
"country": "United States",
"countryCode": "US",
"coordinates": [
-98.5,
39.76
],
"admin1Code": "00",
"admin2Code": "",
"admin3Code": "",
"admin4Code": "",
"confidence": 1
}
],
"temporalInferences": [
1893474000000
]
},
"fields": {
"temporalInferences": [
1893474000000
],
"tweetDate": [
1486487211000
]
},
"sort": [
1486487211000
]
}
The output from
GET /_cat/indices/twitter?pri&v&h=health,index,pri,rep,docs.count,mt,pri,rep,docs.count,store.size,pri.store.size
health | index | pri | rep | docs.count | mt | pri.mt | store.size | pri.store.size | pri.store.size
yellow | twitter | 5 | 1 | 26860 | 74 | 74 | 10.1gb | 10.1gb | 10.1gb
The output from:
GET /twitter/_stats
{
"_shards": {
"total": 10,
"successful": 5,
"failed": 0
},
"_all": {
"primaries": {
"docs": {
"count": 26860,
"deleted": 0
},
"store": {
"size_in_bytes": 11027965678,
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 27397,
"index_time_in_millis": 3568991,
"index_current": 1,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 195961
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 55,
"query_time_in_millis": 294,
"query_current": 0,
"fetch_total": 36,
"fetch_time_in_millis": 3209,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 76,
"total_time_in_millis": 350987,
"total_docs": 45409,
"total_size_in_bytes": 4027595474,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 48633,
"total_auto_throttle_in_bytes": 82233108
},
"refresh": {
"total": 857,
"total_time_in_millis": 2994887,
"listeners": 0
},
"flush": {
"total": 15,
"total_time_in_millis": 291939
},
"warmer": {
"current": 0,
"total": 876,
"total_time_in_millis": 534
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 24808,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 139,
"memory_in_bytes": 186032131,
"terms_memory_in_bytes": 185758725,
"stored_fields_memory_in_bytes": 43976,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 77888,
"points_memory_in_bytes": 714,
"doc_values_memory_in_bytes": 150828,
"index_writer_memory_in_bytes": 1316180948,
"version_map_memory_in_bytes": 42250,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {
}
},
"translog": {
"operations": 11997,
"size_in_bytes": 5555179
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 195,
"miss_count": 195
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
},
"total": {
"docs": {
"count": 26860,
"deleted": 0
},
"store": {
"size_in_bytes": 11027965678,
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 27397,
"index_time_in_millis": 3568991,
"index_current": 1,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 195961
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 55,
"query_time_in_millis": 294,
"query_current": 0,
"fetch_total": 36,
"fetch_time_in_millis": 3209,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 76,
"total_time_in_millis": 350987,
"total_docs": 45409,
"total_size_in_bytes": 4027595474,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 48633,
"total_auto_throttle_in_bytes": 82233108
},
"refresh": {
"total": 857,
"total_time_in_millis": 2994887,
"listeners": 0
},
"flush": {
"total": 15,
"total_time_in_millis": 291939
},
"warmer": {
"current": 0,
"total": 876,
"total_time_in_millis": 534
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 24808,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 139,
"memory_in_bytes": 186032131,
"terms_memory_in_bytes": 185758725,
"stored_fields_memory_in_bytes": 43976,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 77888,
"points_memory_in_bytes": 714,
"doc_values_memory_in_bytes": 150828,
"index_writer_memory_in_bytes": 1316180948,
"version_map_memory_in_bytes": 42250,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {
}
},
"translog": {
"operations": 11997,
"size_in_bytes": 5555179
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 195,
"miss_count": 195
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
}
},
"indices": {
"twitter": {
"primaries": {
"docs": {
"count": 26860,
"deleted": 0
},
"store": {
"size_in_bytes": 11027965678,
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 27397,
"index_time_in_millis": 3568991,
"index_current": 1,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 195961
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 55,
"query_time_in_millis": 294,
"query_current": 0,
"fetch_total": 36,
"fetch_time_in_millis": 3209,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 76,
"total_time_in_millis": 350987,
"total_docs": 45409,
"total_size_in_bytes": 4027595474,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 48633,
"total_auto_throttle_in_bytes": 82233108
},
"refresh": {
"total": 857,
"total_time_in_millis": 2994887,
"listeners": 0
},
"flush": {
"total": 15,
"total_time_in_millis": 291939
},
"warmer": {
"current": 0,
"total": 876,
"total_time_in_millis": 534
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 24808,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 139,
"memory_in_bytes": 186032131,
"terms_memory_in_bytes": 185758725,
"stored_fields_memory_in_bytes": 43976,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 77888,
"points_memory_in_bytes": 714,
"doc_values_memory_in_bytes": 150828,
"index_writer_memory_in_bytes": 1316180948,
"version_map_memory_in_bytes": 42250,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {
}
},
"translog": {
"operations": 11997,
"size_in_bytes": 5555179
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 195,
"miss_count": 195
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
},
"total": {
"docs": {
"count": 26860,
"deleted": 0
},
"store": {
"size_in_bytes": 11027965678,
"throttle_time_in_millis": 0
},
"indexing": {
"index_total": 27397,
"index_time_in_millis": 3568991,
"index_current": 1,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 195961
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 55,
"query_time_in_millis": 294,
"query_current": 0,
"fetch_total": 36,
"fetch_time_in_millis": 3209,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 76,
"total_time_in_millis": 350987,
"total_docs": 45409,
"total_size_in_bytes": 4027595474,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 48633,
"total_auto_throttle_in_bytes": 82233108
},
"refresh": {
"total": 857,
"total_time_in_millis": 2994887,
"listeners": 0
},
"flush": {
"total": 15,
"total_time_in_millis": 291939
},
"warmer": {
"current": 0,
"total": 876,
"total_time_in_millis": 534
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 24808,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 139,
"memory_in_bytes": 186032131,
"terms_memory_in_bytes": 185758725,
"stored_fields_memory_in_bytes": 43976,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 77888,
"points_memory_in_bytes": 714,
"doc_values_memory_in_bytes": 150828,
"index_writer_memory_in_bytes": 1316180948,
"version_map_memory_in_bytes": 42250,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {
}
},
"translog": {
"operations": 11997,
"size_in_bytes": 5555179
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 195,
"miss_count": 195
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
}
}
}
}
EDIT 1
I've discovered the source of this issue. It seems that it's the bounding box that is at fault, though I've no idea why.
Once I remove the bounding box from the data being ingested, the index is a normal size (600 documents --> 550kb), but as soon as I add the bounding box back in (with a brand new index), the size skyrockets (3,593 documents --> 1.6GB) with only 84 documents containing a bounding box.
Below is the JSON of the bounding box:
"placeBoundingBox": {
"type": "polygon",
"coordinates": [
[
[
-71.191421,
42.227797
],
[
-71.191421,
42.399542
],
[
-70.986004,
42.399542
],
[
-70.986004,
42.227797
],
[
-71.191421,
42.227797
]
]
]
}
The mapping associated with the bounding box (from calling GET /INDEX_NAME):
"placeBoundingBox": {
"type": "geo_shape",
"tree": "quadtree",
"precision": "1.0m"
}
To demonstrate that the mapping does infact work and is creating a proper geo_shape (even though Kibana doesn't recognize it as a geo_shape), I ran the following query and got back a successful hit:
GET /_search
{
"query": {
"bool": {
"must": {
"match_all": {
}
},
"filter": {
"geo_shape": {
"placeBoundingBox": {
"shape": {
"type": "polygon",
"coordinates": [
[
[
-71.191421,
42.227797
],
[
-71.191421,
42.399542
],
[
-70.986004,
42.399542
],
[
-70.986004,
42.227797
],
[
-71.191421,
42.227797
]
]
]
},
"relation": "within"
}
}
}
}
}
}
I'd like to have the bounding box kept in, is there something wrong with either the mapping or the data? Is 1.0m too fine-grained?
The problem was the precision in the mapping, which was simply a typo (Our index for Elasticsearch 2.x had the precision as 1km). One tiny letter made all the difference...
A 1 meter ("1m") precision creates an extremely bloated index.
Removing the "precision" field from the mapping altogether will default to 50m and a well-sized index.
I am writing to ES from Spark streaming at a rate of around 80,000 EPS.
which was running fine earlier, but recently the indexing rate slows down gradually and the results in spark-jobs piling up.
what are the cluster settings which I can tweak/verify to address this issue
cluster settings :-
{
"persistent": {
"cluster": {
"routing": {
"allocation": {
"cluster_concurrent_rebalance": "160",
"node_concurrent_recoveries": "2",
"disk": {
"threshold_enabled": "true",
"watermark": {
"low": "85%",
"high": "95%"
}
},
"node_initial_primaries_recoveries": "40",
"enable": "all"
}
}
},
"indices": {
"breaker": {
"fielddata": {
"limit": "50%"
}
},
"recovery": {
"concurrent_streams": "80",
"max_bytes_per_sec": "50mb"
},
"store": {
"throttle": {
"max_bytes_per_sec": "500gb"
}
}
},
"threadpool": {
"bulk": {
"queue_size": "5000",
"size": "40",
"type": "fixed"
}
},
"discovery": {
"zen": {
"minimum_master_nodes": "2"
}
}
},
"transient": {
"cluster": {
"routing": {
"allocation": {
"enable": "all"
}
}
}
}
}
cluster status:-
{
"cluster_name": "**********",
"status": "green",
"timed_out": false,
"number_of_nodes": 105,
"number_of_data_nodes": 100,
"active_primary_shards": 7315,
"active_shards": 7330,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 100
}