ElasticSearch Aggregations Query - elasticsearch

I already spent way too much time on this. I have this aggregation that I can't get to work like it should.
I have a ton of documents with a structure like this (I've ommited some of the parts that aren't relevant for this agg):
{
"url": "THE_URL",
"url_params": {},
"title": "Het Nieuwsblad",
"referrer": null,
"time": "2015-08-25T08:35:15.729Z",
"referrerHost": null,
"timeOnSite": 16,
"blocks": [{
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-header"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-headline"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-top"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-top-left"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-top-right"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-right"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-sidebar"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "gentenaar__gentenaar-footer"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news"
}, {
"viewTime": 11,
"click": 1,
"view": 1,
"block": "news__breaking-news-1"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-2"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-3"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-4"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-5"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-6"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-7"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-sidebar-1"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-sidebar-2-left"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-sidebar-2-right"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-sidebar-3"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__breaking-news-footer"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-headline"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-head-1"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-head-2"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-head-3"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-head-4"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-head-5"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-head-6"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-1"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-2-left"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-2-right"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-3"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-4-left"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-4-right"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-5"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-6-left"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-6-right"
}, {
"viewTime": 1,
"click": 1,
"view": 1,
"block": "news__fast-news-right"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-bottom"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-1"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-2-left"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-2-right"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-3"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-4-left"
}, {
"viewTime": 11,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-4-right"
}, {
"viewTime": 1,
"click": 0,
"view": 1,
"block": "news__fast-news-sidebar-5"
}, {
"viewTime": 0,
"click": 0,
"view": 0,
"block": "news__fast-news-sidebar-6-left"
}, ...],
"activeAds": [{
"viewed": 1,
"clicked": 0,
"type": "button",
"width": 317,
"height": 75,
"timeViewed": 10
}, {
"viewed": 1,
"clicked": 0,
"type": "xlleaderboard",
"width": 990,
"height": 122,
"timeViewed": 10
}, ...]...
}
It's the blocks array that I'm having problems with. I want to know per block the sum of clicks, views and viewTime.
I'm trying to use this query:
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"range": {
"dt": {
"from": "2015-08-25T00:00:00+00:00",
"to": "2015-08-26T23:59:59+00:00"
}
}
},
{
"term": {
"url": "http://www.nieuwsblad.be/"
}
}
]
}
}
}
},
"aggs": {
"per_block": {
"term": {
"field": "blocks.blocks"
},
"aggs": {
"clicks": {
"sum": {
"field": "blocks.click"
}
}
}
}
}
}
For the activeAds, I'm using nearly identical code but there it's working perfectly:
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"range": {
"dt": {
"from": "2015-08-25T00:00:00+00:00",
"to": "2015-08-26T23:59:59+00:00"
}
}
},
{
"terms": {
"page.page_type": [
"home"
]
}
}
],
"must_not": {
"term": {
"activeAds.timeViewed": 0
}
}
}
}
}
},
"aggs": {
"per_ad": {
"terms": {
"field": "activeAds.type"
},
"aggs": {
"clicks": {
"sum": {
"field": "activeAds.clicked"
}
},
"views": {
"sum": {
"field": "activeAds.viewed"
}
},
"total_time_viewed": {
"sum": {
"field": "activeAds.timeViewed"
}
}
}
}
}
}
Anyone have any idea on what I'm doing wrong?

Related

ElasticSearch difference aggregation

This is a piece of my data stored :
[
{
"name": "disk.device.write.requests",
"type": "cumulative",
"unit": "request",
"volume": 0,
"user_id": "b0407ee332f6474c87d1e666262d4783",
"project_id": "75ebb9556f8c4e36b0d3e722a57ff3bb",
"resource_id": "7837ab92-5eb7-4cdc-9da3-5f1d2a385841-hda",
"timestamp": "2021-11-14T13:28:45.873289",
"resource_metadata": {
"display_name": "ali",
"name": "instance-00000004",
"instance_id": "7837ab92-5eb7-4cdc-9da3-5f1d2a385841",
"instance_type": "Tochal",
"host": "b34b47c6129603ae3d0387bfa8bf8fe487b0a8424d7e3debb6c69b6d",
"instance_host": "os",
"flavor": {
"id": "t1",
"name": "Tochal",
"vcpus": 4,
"ram": 4096,
"disk": 40,
"ephemeral": 0,
"swap": 0
},
"status": "active",
"state": "running",
"task_state": "",
"image": {
"id": "f77ec16e-1c4e-4ed7-b340-b537ab008367"
},
"image_ref": "f77ec16e-1c4e-4ed7-b340-b537ab008367",
"image_ref_url": null,
"architecture": "x86_64",
"os_type": "hvm",
"vcpus": 4,
"memory_mb": 4096,
"disk_gb": 40,
"ephemeral_gb": 0,
"root_gb": 40,
"disk_name": "hda"
},
"source": "openstack",
"id": "cafd91ab-454e-11ec-b8ba-3b125e027b37",
"monotonic_time": null
}
...]
I've written an aggregation to group data in one hour by name field for a project:
{
index: 'cm',
size: 0,
pretty: true,
body: {
query: {
bool: {
must: [
{
match: {
project_id: '75ebb9556f8c4e36b0d3e722a57ff3bb',
},
},
{
range: {
timestamp: {
gte: 'now-1H',
},
},
},
],
},
},
aggs: {
names: {
terms: { field: 'name' },
aggs: {
myvalue: { sum: { field: 'volume' } },
},
},
},
},
}
And it's output is:
"aggregations": {
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 4,
"buckets": [
{
"key": "disk.device.read.bytes",
"doc_count": 8,
"hatprice": {
"value": 2311073040
}
},
{
"key": "disk.device.read.requests",
"doc_count": 8,
"hatprice": {
"value": 68796
}
},
{
"key": "disk.device.write.bytes",
"doc_count": 8,
"hatprice": {
"value": 13117853696
}
},
{
"key": "disk.device.write.requests",
"doc_count": 8,
"hatprice": {
"value": 776618
}
},
{
"key": "cpu",
"doc_count": 4,
"hatprice": {
"value": 4541150000000
}
},
{
"key": "memory.usage",
"doc_count": 4,
"hatprice": {
"value": 1741
}
},
{
"key": "network.incoming.bytes",
"doc_count": 4,
"hatprice": {
"value": 532735722
}
},
{
"key": "network.incoming.packets",
"doc_count": 4,
"hatprice": {
"value": 864945
}
},
{
"key": "network.outgoing.bytes",
"doc_count": 4,
"hatprice": {
"value": 58562803
}
},
{
"key": "network.outgoing.packets",
"doc_count": 4,
"hatprice": {
"value": 439204
}
}
]
}
}
I need to aggregate base on volume field minus previous hour volume value.
Is it possible?
e.g: network.incoming.bytes in current hour. (not from first).
I think I've solved the problem using SERIAL_DIFF but not sure:
{
index: 'cm',
size: 0,
pretty: true,
body: {
query: {
bool: {
must: [
{
match: {
project_id: '75ebb9556f8c4e36b0d3e722a57ff3bb',
},
},
{
range: {
timestamp: {
gte: 'now-2H',
},
},
},
],
},
},
aggs: {
names: {
terms: { field: 'name' },
aggs: {
mydateh: {
date_histogram: {
field: 'timestamp',
calendar_interval: 'hour',
},
aggs: {
volrate: { max: { field: 'volume' } },
diff: {
serial_diff: {
buckets_path: 'volrate',
lag: 1,
},
},
},
},
},
},
},
},
}

How to use Nifi QueryRecord to lookup for values in a flowfile?

I have a scenario where a flowfile contains the json transformed, I need to now combine fields that are in the json, for example, I want to get the R and PA from the Original.ScoreInfo.Team per ID and get Enriched.Team.Source.HV and Enriched.Team.SourceMapped.HV based on the Team.ID. My desired json looks like:
[{
TeamID: 1000,
ValueR: 3,
ValuePA: 13},
{
TeamID: 2000,
ValueR: 1,
ValuePA: 14}
]
Flowfile example:
[
{
"Mapping": {
"ScoreInfo": {
"Team": [
{
"ID": 1,
"Source": {
"HV": 1,
"ID": 1
}
},
{
"ID": 3,
"Source": {
"HV": 2,
"ID": 3
}
}
]
}
},
"Original": {
"ScoreInfo": {
"Team": [
{
"HV": 1,
"ID": 1,
"R": 1,
"PA": 8,
"H": 2,
"BB": 2,
"SB": 0,
"E": 0,
"Score": [
{
"Inn": 1,
"TB": 2,
"R": 0,
"H": 0,
"BB": 1
},
{
"Inn": 2,
"TB": 2,
"R": 1,
"H": 2,
"BB": 1
}
]
},
{
"HV": 2,
"ID": 3,
"R": 1,
"PA": 10,
"H": 3,
"BB": 0,
"SB": 0,
"E": 0,
"Score": [
{
"Inn": 1,
"TB": 1,
"R": 1,
"H": 2,
"BB": 0
},
{
"Inn": 2,
"TB": 1,
"R": 0,
"H": 0,
"BB": 0
}
]
}
]
}
},
"MatchValue": "99999999",
"Enriched": {
"Team": [
{
"ID": 1,
"Source": {
"HV": 1,
"ID": 1
},
"SourceMapped": {
"HV": 1000,
"ID": 1000
}
},
{
"ID": 2,
"Source": {
"HV": 2,
"ID": 2
},
"SourceMapped": {
"HV": 2000,
"ID": 2000
}
}
],
"MatchValue": "99999999"
}
}
]
I was trying to do it using the QueryRecord and combined with the RPATH but it is returning empty when try this SELECT * FROM FLOWFILE WHERE RPATH(Original, '/ScoreInfo/Team/ID') = RPATH(Enriched, '/Team/ID')
Any idea how can I use the QueryRecord to do it?

Memory distribution in elasticsearch

I am using AWS elastic search service and having each node with 16GB RAM, 4 cores and 160GB EBS. While looking at the
GET _nodes/stats
I found that I have only 0.27GB of memory left in my node. I understand the fact that approximately the following amount of memory is dedicated for these processes:
Heap : 8GB
Swap: 2GB
Fielddate + Segments: 1GB
I don't understand what happens with the rest of the 4-5GB of memory, which process is consuming this memory?
The following is the stat of one node which I got from the api mentioned above:
{
"timestamp": 1536212516381,
"name": "",
"roles": [
"master",
"data",
"ingest"
],
"indices": {
"docs": {
"count": 40811869,
"deleted": 12313549
},
"store": {
"size_in_bytes": 21211684802
},
"indexing": {
"index_total": 17141254,
"index_time_in_millis": 23271050,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 11,
"time_in_millis": 4,
"exists_total": 10,
"exists_time_in_millis": 4,
"missing_total": 1,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 60495650,
"query_time_in_millis": 164996296,
"query_current": 0,
"fetch_total": 1338,
"fetch_time_in_millis": 3947,
"fetch_current": 0,
"scroll_total": 1541,
"scroll_time_in_millis": 147381782,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 752561,
"total_time_in_millis": 156448427,
"total_docs": 6278661063,
"total_size_in_bytes": 2061451815435,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 64499,
"total_auto_throttle_in_bytes": 231968018126
},
"refresh": {
"total": 9621793,
"total_time_in_millis": 126038098,
"listeners": 0
},
"flush": {
"total": 135925,
"total_time_in_millis": 2306674
},
"warmer": {
"current": 0,
"total": 8576200,
"total_time_in_millis": 3784313
},
"query_cache": {
"memory_size_in_bytes": 47531137,
"total_count": 315917952,
"hit_count": 69088483,
"miss_count": 246829469,
"cache_size": 16561,
"cache_count": 740755,
"evictions": 724194
},
"fielddata": {
"memory_size_in_bytes": 19006048,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 2256,
"memory_in_bytes": 62556325,
"terms_memory_in_bytes": 50154620,
"stored_fields_memory_in_bytes": 6587256,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 557504,
"points_memory_in_bytes": 2812593,
"doc_values_memory_in_bytes": 2444352,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 6743824,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 322600,
"size_in_bytes": 511751430,
"uncommitted_operations": 22528,
"uncommitted_size_in_bytes": 35107521
},
"request_cache": {
"memory_size_in_bytes": 84792534,
"evictions": 13922143,
"hit_count": 7667542,
"miss_count": 35693456
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 952628
}
},
"os": {
"timestamp": 1536212516872,
"cpu": {
"percent": 23,
"load_average": {
"1m": 1.29,
"5m": 1.19,
"15m": 1.16
}
},
"mem": {
"total_in_bytes": 16823488512,
"free_in_bytes": 297766912,
"used_in_bytes": 16525721600,
"free_percent": 2,
"used_percent": 98
},
"swap": {
"total_in_bytes": 2147479552,
"free_in_bytes": 2143141888,
"used_in_bytes": 4337664
}
},
"process": {
"timestamp": 1536212516872,
"open_file_descriptors": 2573,
"max_file_descriptors": 128000,
"cpu": {
"percent": 19,
"total_in_millis": 4177826450
},
"mem": {
"total_virtual_in_bytes": 34029547520
}
},
"jvm": {
"timestamp": 1536212516874,
"uptime_in_millis": 6132906547,
"mem": {
"heap_used_in_bytes": 5277036968,
"heap_used_percent": 61,
"heap_committed_in_bytes": 8555069440,
"heap_max_in_bytes": 8555069440,
"non_heap_used_in_bytes": 278172640,
"non_heap_committed_in_bytes": 304259072,
"pools": {
"young": {
"used_in_bytes": 246875928,
"max_in_bytes": 279183360,
"peak_used_in_bytes": 279183360,
"peak_max_in_bytes": 279183360
},
"survivor": {
"used_in_bytes": 6813256,
"max_in_bytes": 34865152,
"peak_used_in_bytes": 34865152,
"peak_max_in_bytes": 34865152
},
"old": {
"used_in_bytes": 5023347784,
"max_in_bytes": 8241020928,
"peak_used_in_bytes": 6966292552,
"peak_max_in_bytes": 8241020928
}
}
},
"threads": {
"count": 234,
"peak_count": 250
},
"gc": {
"collectors": {
"young": {
"collection_count": 4474572,
"collection_time_in_millis": 127468649
},
"old": {
"collection_count": 920,
"collection_time_in_millis": 325448
}
}
},
"buffer_pools": {
"direct": {
"count": 463,
"used_in_bytes": 154976141,
"total_capacity_in_bytes": 154976139
},
"mapped": {
"count": 4914,
"used_in_bytes": 21027708516,
"total_capacity_in_bytes": 21027708516
}
},
"classes": {
"current_loaded_count": 26154,
"total_loaded_count": 26712,
"total_unloaded_count": 558
}
},
"thread_pool": {
"bulk": {
"threads": 4,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 4,
"completed": 17141525
},
"fetch_shard_started": {
"threads": 0,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 0,
"completed": 0
},
"fetch_shard_store": {
"threads": 0,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 0,
"completed": 0
},
"flush": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 271881
},
"force_merge": {
"threads": 0,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 0,
"completed": 0
},
"generic": {
"threads": 32,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 128,
"completed": 43205055
},
"get": {
"threads": 4,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 4,
"completed": 8
},
"index": {
"threads": 3,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 3,
"completed": 3
},
"listener": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 4
},
"management": {
"threads": 5,
"queue": 0,
"active": 1,
"rejected": 0,
"largest": 5,
"completed": 28115022
},
"refresh": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 1023804661
},
"search": {
"threads": 7,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 7,
"completed": 161702240
},
"snapshot": {
"threads": 1,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 21133
},
"warmer": {
"threads": 2,
"queue": 0,
"active": 0,
"rejected": 0,
"largest": 2,
"completed": 95146934
}
},
"fs": {
"timestamp": 1536212516874,
"total": {
"total_in_bytes": 168968957952,
"free_in_bytes": 147126472704,
"available_in_bytes": 138519760896
},
"data": [
{
"type": "ext4",
"total_in_bytes": 168968957952,
"free_in_bytes": 147126472704,
"available_in_bytes": 138519760896
}
],
"io_stats": {
"devices": [
{
"device_name": "xvdf",
"operations": 133367878,
"read_operations": 1456763,
"write_operations": 131911115,
"read_kilobytes": 104740824,
"write_kilobytes": 3178855500
}
],
"total": {
"operations": 133367878,
"read_operations": 1456763,
"write_operations": 131911115,
"read_kilobytes": 104740824,
"write_kilobytes": 3178855500
}
}
},
"breakers": {
"request": {
"limit_size_in_bytes": 5133041664,
"limit_size": "4.7gb",
"estimated_size_in_bytes": 0,
"estimated_size": "0b",
"overhead": 1,
"tripped": 0
},
"fielddata": {
"limit_size_in_bytes": 5133041664,
"limit_size": "4.7gb",
"estimated_size_in_bytes": 19006048,
"estimated_size": "18.1mb",
"overhead": 1.03,
"tripped": 0
},
"in_flight_requests": {
"limit_size_in_bytes": 8555069440,
"limit_size": "7.9gb",
"estimated_size_in_bytes": 1657,
"estimated_size": "1.6kb",
"overhead": 1,
"tripped": 0
},
"accounting": {
"limit_size_in_bytes": 8555069440,
"limit_size": "7.9gb",
"estimated_size_in_bytes": 62556325,
"estimated_size": "59.6mb",
"overhead": 1,
"tripped": 0
},
"parent": {
"limit_size_in_bytes": 5988548608,
"limit_size": "5.5gb",
"estimated_size_in_bytes": 81564030,
"estimated_size": "77.7mb",
"overhead": 1,
"tripped": 0
}
},
"script": {
"compilations": 5,
"cache_evictions": 0
},
"discovery": {
"cluster_state_queue": {
"total": 0,
"pending": 0,
"committed": 0
},
"published_cluster_states": {
"full_states": 1,
"incompatible_diffs": 0,
"compatible_diffs": 70303
}
},
"ingest": {
"total": {
"count": 0,
"time_in_millis": 0,
"current": 0,
"failed": 0
},
"pipelines": {}
},
"adaptive_selection": {
"qhqf-YADRF2gS1Vu6EV_8Q": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 6520537,
"avg_response_time_ns": 15403676,
"rank": "15.4"
},
"I21NiLksQqCNV3-vP3uCNA": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 3079216,
"avg_response_time_ns": 1115616,
"rank": "1.1"
},
"T2_cLfzrQBKd7WTR0p6jWQ": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 12493125,
"avg_response_time_ns": 1141514,
"rank": "1.1"
},
"ZZYgl3WARsKE-80HXhRbVw": {
"outgoing_searches": 0,
"avg_queue_size": 0,
"avg_service_time_ns": 3919880,
"avg_response_time_ns": 6063593,
"rank": "6.1"
}
}
}
Can anyone please help explain the memory distribution in this node? Is it okay to have only 0.27GB free memory space or what should I do to make more free memory available in the node?

Elasticsearch : Indexing rate slows down gradually

I am writing to ES from Spark streaming at a rate of around 80,000 EPS.
which was running fine earlier, but recently the indexing rate slows down gradually and the results in spark-jobs piling up.
what are the cluster settings which I can tweak/verify to address this issue
cluster settings :-
{
"persistent": {
"cluster": {
"routing": {
"allocation": {
"cluster_concurrent_rebalance": "160",
"node_concurrent_recoveries": "2",
"disk": {
"threshold_enabled": "true",
"watermark": {
"low": "85%",
"high": "95%"
}
},
"node_initial_primaries_recoveries": "40",
"enable": "all"
}
}
},
"indices": {
"breaker": {
"fielddata": {
"limit": "50%"
}
},
"recovery": {
"concurrent_streams": "80",
"max_bytes_per_sec": "50mb"
},
"store": {
"throttle": {
"max_bytes_per_sec": "500gb"
}
}
},
"threadpool": {
"bulk": {
"queue_size": "5000",
"size": "40",
"type": "fixed"
}
},
"discovery": {
"zen": {
"minimum_master_nodes": "2"
}
}
},
"transient": {
"cluster": {
"routing": {
"allocation": {
"enable": "all"
}
}
}
}
}
cluster status:-
{
"cluster_name": "**********",
"status": "green",
"timed_out": false,
"number_of_nodes": 105,
"number_of_data_nodes": 100,
"active_primary_shards": 7315,
"active_shards": 7330,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 100
}

Morelike this Query eats up memory

I have 3 nodes 2 masters with data and 1 is master=false and data=false used for facets,aggregation.
All nodes are having 30gb ram with 16gb allocated to elastic search and remaining OS
I have some 600 indices with 5 shards each storing content of documents with vector=true.
Now when I execute a simple more like this query,first 2 nodes starts eating up memory resulting into out of memory exception.Am i missing any settings? or is this a bug?
This is the error log
[2015-06-12 04:19:35,729][DEBUG][action.search.type ] [Master1] [113] Failed to execute fetch phase
org.elasticsearch.ElasticsearchException: Java heap space
at org.elasticsearch.ExceptionsHelper.convertToRuntime(ExceptionsHelper.java:44)
at org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:513)
at org.elasticsearch.search.action.SearchServiceTransportAction$17.call(SearchServi ceTransportAction.java:452)
at org.elasticsearch.search.action.SearchServiceTransportAction$17.call(SearchServi ceTransportAction.java:449)
at org.elasticsearch.search.action.SearchServiceTransportAction$23.run(SearchServic eTransportAction.java:559)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOfRange(Unknown Source)
at java.lang.String.(Unknown Source)
at java.lang.StringBuilder.toString(Unknown Source)
at org.elasticsearch.common.jackson.core.util.TextBuffer.contentsAsString(TextBuffe r.java:349)
at org.elasticsearch.common.jackson.core.json.UTF8StreamJsonParser.getText(UTF8StreamJsonParser.java:281)
at org.elasticsearch.common.xcontent.json.JsonXContentParser.text(JsonXContentParser.java:86)
at org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:293)
at org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:275)
at org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:254)
at org.elasticsearch.common.xcontent.support.AbstractXContentParser.map(AbstractXContentParser.java:208)
at org.elasticsearch.common.xcontent.support.AbstractXContentParser.mapAndClose(AbstractXContentParser.java:219)
at org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:125)
at org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:79)
at org.elasticsearch.search.lookup.SourceLookup.sourceAsMapAndType(SourceLookup.java:87)
at org.elasticsearch.search.lookup.SourceLookup.loadSourceIfNeeded(SourceLookup.java:63)
at org.elasticsearch.search.lookup.SourceLookup.extractRawValues(SourceLookup.java:145)
at org.elasticsearch.search.fetch.FetchPhase.createSearchHit(FetchPhase.java:245)
at org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:182)
at org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:501)
This is my master node statistics
{
"timestamp": 1435219276968,
"cluster_name": "cluster1",
"status": "green",
"indices": {
"count": 537,
"shards": {
"total": 5314,
"primaries": 2657,
"replication": 1,
"index": {
"shards": {
"min": 2,
"max": 10,
"avg": 9.895716945996275
},
"primaries": {
"min": 1,
"max": 5,
"avg": 4.947858472998138
},
"replication": {
"min": 1,
"max": 1,
"avg": 1
}
}
},
"docs": {
"count": 60900916,
"deleted": 50541
},
"store": {
"size": "1tb",
"size_in_bytes": 1192366468003,
"throttle_time": "6.2m",
"throttle_time_in_millis": 372988
},
"fielddata": {
"memory_size": "0b",
"memory_size_in_bytes": 0,
"evictions": 11175
},
"filter_cache": {
"memory_size": "1.6mb",
"memory_size_in_bytes": 1705012,
"evictions": 0
},
"id_cache": {
"memory_size": "0b",
"memory_size_in_bytes": 0
},
"completion": {
"size": "0b",
"size_in_bytes": 0
},
"segments": {
"count": 36771,
"memory": "3gb",
"memory_in_bytes": 3236984326,
"index_writer_memory": "0b",
"index_writer_memory_in_bytes": 0,
"index_writer_max_memory": "2.5gb",
"index_writer_max_memory_in_bytes": 2720768000,
"version_map_memory": "0b",
"version_map_memory_in_bytes": 0,
"fixed_bit_set": "0b",
"fixed_bit_set_memory_in_bytes": 0
},
"percolate": {
"total": 0,
"get_time": "0s",
"time_in_millis": 0,
"current": 0,
"memory_size_in_bytes": -1,
"memory_size": "-1b",
"queries": 0
}
},
"nodes": {
"count": {
"total": 3,
"master_only": 0,
"data_only": 0,
"master_data": 2,
"client": 0
},
"versions": [
"1.5.2"
],
"os": {
"available_processors": 24,
"mem": {
"total": "95.9gb",
"total_in_bytes": 103077617664
},
"cpu": [
{
"vendor": "Intel",
"model": "Xeon",
"mhz": 2400,
"total_cores": 8,
"total_sockets": 2,
"cores_per_socket": 4,
"cache_size": "-1b",
"cache_size_in_bytes": -1,
"count": 3
}
]
},
"process": {
"cpu": {
"percent": 17
},
"open_file_descriptors": {
"min": 1361,
"max": 57055,
"avg": 38465
}
},
"jvm": {
"max_uptime": "2.9d",
"max_uptime_in_millis": 251100991,
"versions": [
{
"version": "1.8.0_45",
"vm_name": "Java HotSpot(TM) 64-Bit Server VM",
"vm_version": "25.45-b02",
"vm_vendor": "Oracle Corporation",
"count": 3
}
],
"mem": {
"heap_used": "18.2gb",
"heap_used_in_bytes": 19577530528,
"heap_max": "47.8gb",
"heap_max_in_bytes": 51330416640
},
"threads": 325
},
"fs": {},
"plugins": []
}
}
EDIT:
{
"size": 100,
"fields": [
"docid"
],
"explain": false,
"query": {
"more_like_this": {
"fields": [
"content"
],
"ids": [
"AU2h82DxWiT16vBNxqGo"
],
"min_term_freq": 1,
"min_doc_freq": 1,
"include": "true",
"max_query_terms": 1000,
"boost_terms": 1
}
}
}

Resources