In our system, 5000-15000 records written to elasticsearch(1.7.3) per minute, for first 4-5 hours its fine but after then it starts slowing down and almost no response any read or write request. After we restart elastic service its works fine for 4-5 hours again..
When elasticsearch starts slowing I checked the memory its about %95 so I think this is the reason why its broken but I dont understand why its loading RAM and never flush it.. No point of increasing ram, We have 8 GB RAM and elastic reached %95 of it in 5 hours..
Is there anything handle this? flush memory or auto restart elastic etc. ?
EDITED
Here is sample document in index:
"hits": {
"total": 18083446,
"max_score": 1,
"hits": [
{
"_index": "userlogs",
"_type": "userlogstype",
"_id": "AVMZEEYwW1W7iq27fTcE",
"_score": 1,
"_source": {
"domain": "http://gatr.hit.gemius.pl/",
"url": "http://gatr.hit.gemius.pl/_1456414408406/rexdot.js?",
"filetype": "-",
"clientname": "NOTINDOMAIN",
"clientmac": "00:0c:29:8f:c4:4f",
"hierarchycode": "HIER_DIRECT/188.165.145.88",
"user": "-",
"duration": "168",
"respsize": "1059",
"clientip": "10.6.1.130",
"loggingdate": "25/02/2016 17:33:28",
"resultcode": "TCP_MISS/301",
"reqmethod": "GET"
}
},
{
"_index": "userlogs",
"_type": "userlogstype",
"_id": "AVMZEEYwW1W7iq27fTcI",
"_score": 1,
"_source": {
"domain": "http://10.6.2.212/",
and here is mapping of index:
{
"webproxylog": {
"mappings": {
"update_by_query": {
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"term": {
"properties": {
"url": {
"type": "string"
}
}
}
}
}
}
}
}
},
"script": {
"type": "string"
}
}
},
"accesslog": {
"properties": {
"action": {
"type": "string",
"index": "not_analyzed"
},
"action4cat": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"domain": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"index": {
"properties": {
"_index": {
"type": "string"
},
"_type": {
"type": "string"
}
}
},
"loggingdate": {
"type": "date",
"format": "dd/MM/yyyy HH:mm:ss"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
query sample :
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"exists": {
"field": "instock"
}
},
{}
]
}
}
}
},
"aggs": {
"aggs1": {
"terms": {
"field": "clientname",
"size": 5,
"order": {
"aggs2": "desc"
}
},
"aggs": {
"aggs2": {
"sum": {
"field": "respsize"
}
}
}
}
}
}
Related
Hello I have a problem with the combination of multiple queries within Elasticsearch.
The problem only occurs whenever I try to combine a multi_match query with the geo_distance query. The multi_match query works when the geo_distance query is not present and the geo_distance query works when the multi_match query is not present.
Whenever I execute the multi_match query without the geo_distance query I get the results that I expect. I also get the expected results when I try the geo_distance query without the multi_match query.
Boths results contain the dataset that I would expect to receive when both queries are executed together. But whenever I execute them together I receive 0 results.
When I combine the geo_distance query with a simple term query the search works. So I presume it is problem with the combination of queries.
I would appreciate any ideas.
My query is the following:
{
"query": {
"bool": {
"must": {
"bool": {
"should": {
"multi_match": {
"query": "CompanyName GmbH",
"fields": [
"originalName",
"legalName"
],
"type": "cross_fields",
"operator": "AND"
}
}
}
},
"filter": {
"bool": {
"should": {
"geo_distance": {
"location": [
9.87107,
51.69915
],
"distance": "30.0km",
"distance_type": "arc"
}
}
}
}
}
}
}
The mapping behind all of that is:
{
"customer": {
"aliases": {
},
"mappings": {
"customer-entity": {
"properties": {
"communication": {
"properties": {
"domain": {
"type": "string"
},
"email": {
"type": "string"
},
"landline": {
"type": "string"
},
"mobile": {
"type": "string"
}
}
},
"id": {
"type": "long"
},
"legalName": {
"type": "string",
"store": true
},
"location": {
"type": "geo_point"
},
"operatingModes": {
"type": "string"
},
"originalName": {
"type": "string",
"store": true
}
}
},
"homepage-entity": {
"_parent": {
"type": "customer-entity"
},
"_routing": {
"required": true
},
"properties": {
"customerId": {
"type": "string",
"store": true
},
"id": {
"type": "long"
},
"metas": {
"type": "string",
"store": true
}
}
},
"person-entity": {
"_parent": {
"type": "customer-entity"
},
"_routing": {
"required": true
},
"properties": {
"customerId": {
"type": "string",
"store": true
},
"firstName": {
"type": "string",
"store": true
},
"id": {
"type": "long"
},
"lastName": {
"type": "string",
"store": true
},
"personId": {
"type": "string",
"store": true
}
}
}
},
"settings": {
"index": {
"refresh_interval": "-1",
"number_of_shards": "1",
"creation_date": "1488920698118",
"store": {
"type": "fs"
},
"number_of_replicas": "0",
"uuid": "ZcLN5sxASXGUnKZMg8mBpw",
"version": {
"created": "2040499"
}
}
},
"warmers": {
}
}
}
I'm trying to delete documents with a date that is lower than december 1st but it doesn't look like it actually deletes anything.
I tried using the delete by query API:
curl -XPOST "http://localhost:9200/mediadata/events/_delete_by_query" -d'
{
"query": {
"range": {
"created_at": {
"lt": "2016-12-01 00:00:00"
}
}
}
}'
Or this syntax:
curl -XDELETE 'http://localhost:9200/mediadata/events/_query' -d ...
I obtain this kind of result:
{"_index":"mediadata","_type":"events","_id":"_delete_by_query","_version":10,"_shards":{"total":3,"successful":2,"failed":0},"created":false}
Thanks in advance.
EDIT: Here is the mapping:
{
"mediadata": {
"mappings": {
"events": {
"properties": {
"channels": {
"properties": {
"kdata": {
"type": "string",
"index": "not_analyzed"
},
"mail": {
"type": "string",
"index": "not_analyzed"
},
"md5": {
"type": "string",
"index": "not_analyzed"
},
"mobile": {
"type": "string",
"index": "not_analyzed"
},
"ssp": {
"type": "string",
"index": "not_analyzed"
}
}
},
"contents": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"editor": {
"type": "string",
"index": "not_analyzed"
},
"end": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"location": {
"type": "geo_point"
},
"message": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "double"
},
"quantity": {
"type": "long"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"must": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"filtered": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
},
"lte": {
"type": "string"
}
}
}
}
}
}
},
"reference": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"type": "string",
"index": "not_analyzed"
},
"start": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
}
}
}
}
}
}
Your syntax is indeed correct. In version 5.x the deletion by query is as follow .
POST mediadata/events/_delete_by_query?conflicts=proceed
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}
Now , based on the response that you're getting from ES
{"_index":"mediadata","_type":"events","_id":"_delete_by_query","_version":10,"_shards":{"total":3,"successful":2,"failed":0},"created":false}
I will assume that you're running version 2.x , where the syntax is different.
First of all , in version 2.x the deletion by query is a plugin that you need to install using :
plugin install delete-by-query
Then you run it :
curl -XDELETE "http://localhost:9200/mediadata/events/_query" -d'
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}'
The response looks like :
{
"took": 0,
"timed_out": false,
"_indices": {
"_all": {
"found": 1,
"deleted": 1,
"missing": 0,
"failed": 0
},
"mediadata": {
"found": 1,
"deleted": 1,
"missing": 0,
"failed": 0
}
},
"failures": []
}
Full example :
PUT mediadata
{
"mappings": {
"events": {
"properties": {
"channels": {
"properties": {
"kdata": {
"type": "string",
"index": "not_analyzed"
},
"mail": {
"type": "string",
"index": "not_analyzed"
},
"md5": {
"type": "string",
"index": "not_analyzed"
},
"mobile": {
"type": "string",
"index": "not_analyzed"
},
"ssp": {
"type": "string",
"index": "not_analyzed"
}
}
},
"contents": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"editor": {
"type": "string",
"index": "not_analyzed"
},
"end": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"location": {
"type": "geo_point"
},
"message": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "double"
},
"quantity": {
"type": "long"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"must": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"filtered": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
},
"lte": {
"type": "string"
}
}
}
}
}
}
},
"reference": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"type": "string",
"index": "not_analyzed"
},
"start": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
}
}
}
}
}
PUT mediadata/events/1
{
"created_at" : "2016-11-02 00:00:00"
}
PUT mediadata/events/3
{
"created_at" : "2016-11-03 00:00:00"
}
#The one to delete
PUT mediadata/events/4
{
"created_at" : "2016-10-03 00:00:00"
}
#to verify that the documents are in the index
GET mediadata/events/_search
{
"query": {
"range": {
"created_at": {
"lt": "2016-11-02 00:00:00"
}
}
}
}
DELETE /mediadata/events/_query
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}
I have this mapping & query. everything is working, except when i want to filter those contents with mentioned "tagid"s. it returns zero results.
i want to filter contents based on tag ids.
{
"mappings": {
"video": {
"_all": {
"enabled": true
},
"properties": {
"title": {
"type": "string"
},
"en_title": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagname": {
"type": "string"
},
"tagid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"metadescription": {
"type": "string"
},
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"items": {
"type": "nested",
"properties": {
"item_title": {
"type": "string"
},
"item_duration": {
"type": "string",
"index": "not_analyzed"
}
}
},
"isfeatured": {
"type": "string",
"index": "not_analyzed"
},
"image": {
"type": "string",
"index": "not_analyzed"
},
"contenttype": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"categoryalias": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"authorid": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"publishdate": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
and this is the query:
{
"index": "content",
"type": "video",
"body": {
"query": {
"filtered": {
"query": {
"match_all": { }
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [
{
"term": {
"tagid": "193"
}
},
{
"term": {
"tagid": "194"
}
}
]
}
}
}
},
{
"term": {
"categoryalias": "digilife"
}
},
{
"term": {
"price": 0
}
}
]
}
}
}
},
"from": 0,
"size": 9,
"sort": [
"_score"
]
}
}
Your nested filter in your query is not quite correct. For the field names where you have tagid, it should be tags.tagid. Full query should be
{
"index": "content",
"type": "video",
"body": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [{
"term": {
"tags.tagid": "193"
}
}, {
"term": {
"tags.tagid": "194"
}
}]
}
}
}
}, {
"term": {
"categoryalias": "digilife"
}
}, {
"term": {
"price": 0
}
}]
}
}
}
},
"from": 0,
"size": 9,
"sort": [
"_score"
]
}
}
EDIT:
Here's a complete working example to get you started. I have used Sense for this but you can use cURL or the language client of you choice.
For the mapping
curl -XPUT "http://localhost:9200/content" -d'
{
"mappings": {
"video": {
"_all": {
"enabled": true
},
"properties": {
"title": {
"type": "string"
},
"en_title": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagname": {
"type": "string"
},
"tagid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"metadescription": {
"type": "string"
},
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"items": {
"type": "nested",
"properties": {
"item_title": {
"type": "string"
},
"item_duration": {
"type": "string",
"index": "not_analyzed"
}
}
},
"isfeatured": {
"type": "string",
"index": "not_analyzed"
},
"image": {
"type": "string",
"index": "not_analyzed"
},
"contenttype": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"categoryalias": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"authorid": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"publishdate": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}'
We can check the mapping is as expected with
curl -XGET "http://localhost:9200/content/video/_mapping"
Now, let's index some documents into the index
// document with id 1
curl -XPOST "http://localhost:9200/content/video/1" -d'
{
"tags": [
{
"tagname" : "tag 193",
"tagid": "193"
}
],
"price": 0,
"categoryalias": "digilife"
}'
// document with id 2
curl -XPOST "http://localhost:9200/content/video/2" -d'
{
"tags": [
{
"tagname" : "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "digilife"
}'
// document with id 3
curl -XPOST "http://localhost:9200/content/video/3" -d'
{
"tags": [
{
"tagname" : "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "different category alias"
}'
Now, let's run the query. I've removed the superfluous parts of the query and simplified it
curl -XGET "http://localhost:9200/content/video/_search" -d'
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "tags",
"query": {
"terms": {
"tags.tagid": [
"193",
"194"
]
}
}
}
},
{
"term": {
"categoryalias": "digilife"
}
},
{
"term": {
"price": 0
}
}
]
}
}
}
},
"size": 9
}'
Only documents with ids 1 and 2 should be returned. This is confirmed with the results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "content",
"_type": "video",
"_id": "1",
"_score": 1,
"_source": {
"tags": [
{
"tagname": "tag 193",
"tagid": "193"
}
],
"price": 0,
"categoryalias": "digilife"
}
},
{
"_index": "content",
"_type": "video",
"_id": "2",
"_score": 1,
"_source": {
"tags": [
{
"tagname": "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "digilife"
}
}
]
}
}
I have a field in my index contain a string data.. I run dsl query below expected documents which category fields is not equal to "-" character.. but as you see pic it returns..
What is the way of retrieve these data ?
GET webproxylog/_search
{
"query": {
"filtered": {
"query": {"match_all": {}},
"filter": {
"not": {
"filter": {
"term": {
"category": "-"
}
}
}
}
}
}
}
mappings:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
My test with ES 1.7.1:
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"user": {
"properties": {
"number": { "type": "integer" },
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
docs:
{"number":1, "name":"abc"}
{"number":2, "name":"-"}
Query:
{
"size": 2,
"query": {
"filtered": {
"filter": {
"not": {
"term": {
"name": "-"
}
}
}
}
}
}
Result:
{
took: 1
timed_out: false
_shards: {
total: 1
successful: 1
failed: 0
}
hits: {
total: 1
max_score: 1
hits: [
{
_index: test_index
_type: user
_id: AVAiYtEjMfj2vcjSSqVr
_score: 1
_source: {
number: 1
name: abc
}
}
]
}
}
Without "index": "not_analyzed" I see the reported behavior, I didn't check how "-" gets tokenized in that case (forgot the query to do that :P)
I`m new in elasticsearch and I have problem.
I have 1 million rows of data and query result take too long.
Went I have 150k it was taking 0.5s , now is taking 10sec.
Each days, number of data is different (One day can be 150k, other 1 million and etc.)
I need advice how to make it faster.
Mapping
{
"mappings": {
"Jobs": {
"_ttl": {
"enabled": true,
"default": "1d"
},
"properties": {
"id": {
"type": "integer"
},
"advertiser_id": {
"type": "integer"
},
"company_id": {
"type": "integer"
},
"feed_id": {
"type": "integer"
},
"description_unique": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"city": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"county": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"country": {
"type": "integer"
},
"description": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"store": true
}
}
},
"company": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"url": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"premium": {
"type": "integer"
},
"bid": {
"type": "integer"
},
"created": {
"type": "date",
"format": "dateOptionalTime",
"default": "basic_date"
},
"updated": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
Query
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Survey Developer",
"type": "best_fields",
"fields": [
"title",
"description"
],
"operator": "and"
}
}
]
}
},
"highlight": {
"boundary_chars": ".,!? \t\n",
"tag_schema": "styled",
"pre_tags": [
"<b>"
],
"post_tags": [
"</b>"
],
"fields": {
"description": {
"fragment_size": 200,
"number_of_fragments": 3
}
}
},
"sort": [
{
"premium": {
"order": "desc"
}
},
{
"bid": {
"order": "desc"
}
}
]
}
Server parameters:
CPU 1 vCPU
RAM 1 GB
System Disk 40 GB
Network 120 Mb/s