not equal query in elasticsearch - elasticsearch

I have a field in my index contain a string data.. I run dsl query below expected documents which category fields is not equal to "-" character.. but as you see pic it returns..
What is the way of retrieve these data ?
GET webproxylog/_search
{
"query": {
"filtered": {
"query": {"match_all": {}},
"filter": {
"not": {
"filter": {
"term": {
"category": "-"
}
}
}
}
}
}
}
mappings:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}

My test with ES 1.7.1:
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"user": {
"properties": {
"number": { "type": "integer" },
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
docs:
{"number":1, "name":"abc"}
{"number":2, "name":"-"}
Query:
{
"size": 2,
"query": {
"filtered": {
"filter": {
"not": {
"term": {
"name": "-"
}
}
}
}
}
}
Result:
{
took: 1
timed_out: false
_shards: {
total: 1
successful: 1
failed: 0
}
hits: {
total: 1
max_score: 1
hits: [
{
_index: test_index
_type: user
_id: AVAiYtEjMfj2vcjSSqVr
_score: 1
_source: {
number: 1
name: abc
}
}
]
}
}
Without "index": "not_analyzed" I see the reported behavior, I didn't check how "-" gets tokenized in that case (forgot the query to do that :P)

Related

Delete by query not working

I'm trying to delete documents with a date that is lower than december 1st but it doesn't look like it actually deletes anything.
I tried using the delete by query API:
curl -XPOST "http://localhost:9200/mediadata/events/_delete_by_query" -d'
{
"query": {
"range": {
"created_at": {
"lt": "2016-12-01 00:00:00"
}
}
}
}'
Or this syntax:
curl -XDELETE 'http://localhost:9200/mediadata/events/_query' -d ...
I obtain this kind of result:
{"_index":"mediadata","_type":"events","_id":"_delete_by_query","_version":10,"_shards":{"total":3,"successful":2,"failed":0},"created":false}
Thanks in advance.
EDIT: Here is the mapping:
{
"mediadata": {
"mappings": {
"events": {
"properties": {
"channels": {
"properties": {
"kdata": {
"type": "string",
"index": "not_analyzed"
},
"mail": {
"type": "string",
"index": "not_analyzed"
},
"md5": {
"type": "string",
"index": "not_analyzed"
},
"mobile": {
"type": "string",
"index": "not_analyzed"
},
"ssp": {
"type": "string",
"index": "not_analyzed"
}
}
},
"contents": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"editor": {
"type": "string",
"index": "not_analyzed"
},
"end": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"location": {
"type": "geo_point"
},
"message": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "double"
},
"quantity": {
"type": "long"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"must": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"filtered": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
},
"lte": {
"type": "string"
}
}
}
}
}
}
},
"reference": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"type": "string",
"index": "not_analyzed"
},
"start": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
}
}
}
}
}
}
Your syntax is indeed correct. In version 5.x the deletion by query is as follow .
POST mediadata/events/_delete_by_query?conflicts=proceed
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}
Now , based on the response that you're getting from ES
{"_index":"mediadata","_type":"events","_id":"_delete_by_query","_version":10,"_shards":{"total":3,"successful":2,"failed":0},"created":false}
I will assume that you're running version 2.x , where the syntax is different.
First of all , in version 2.x the deletion by query is a plugin that you need to install using :
plugin install delete-by-query
Then you run it :
curl -XDELETE "http://localhost:9200/mediadata/events/_query" -d'
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}'
The response looks like :
{
"took": 0,
"timed_out": false,
"_indices": {
"_all": {
"found": 1,
"deleted": 1,
"missing": 0,
"failed": 0
},
"mediadata": {
"found": 1,
"deleted": 1,
"missing": 0,
"failed": 0
}
},
"failures": []
}
Full example :
PUT mediadata
{
"mappings": {
"events": {
"properties": {
"channels": {
"properties": {
"kdata": {
"type": "string",
"index": "not_analyzed"
},
"mail": {
"type": "string",
"index": "not_analyzed"
},
"md5": {
"type": "string",
"index": "not_analyzed"
},
"mobile": {
"type": "string",
"index": "not_analyzed"
},
"ssp": {
"type": "string",
"index": "not_analyzed"
}
}
},
"contents": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"editor": {
"type": "string",
"index": "not_analyzed"
},
"end": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"location": {
"type": "geo_point"
},
"message": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "double"
},
"quantity": {
"type": "long"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"must": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"filtered": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
},
"lte": {
"type": "string"
}
}
}
}
}
}
},
"reference": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"type": "string",
"index": "not_analyzed"
},
"start": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
}
}
}
}
}
PUT mediadata/events/1
{
"created_at" : "2016-11-02 00:00:00"
}
PUT mediadata/events/3
{
"created_at" : "2016-11-03 00:00:00"
}
#The one to delete
PUT mediadata/events/4
{
"created_at" : "2016-10-03 00:00:00"
}
#to verify that the documents are in the index
GET mediadata/events/_search
{
"query": {
"range": {
"created_at": {
"lt": "2016-11-02 00:00:00"
}
}
}
}
DELETE /mediadata/events/_query
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}

Unable to drop result bucket in terms aggregation - Elasticsearch

I have documents in Elasticsearch with the following structure:
"mappings": {
"document": {
"properties": {
"#timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"#version": {
"type": "string"
},
"id_secuencia": {
"type": "long"
},
"event": {
"properties": {
"elapsedTime": {
"type": "double"
},
"requestTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"error": {
"properties": {
"errorCode": {
"type": "string",
"index": "not_analyzed"
},
"failureDetail": {
"type": "string"
},
"fault": {
"type": "string"
}
}
},
"file": {
"type": "string",
"index": "not_analyzed"
},
"messageId": {
"type": "string"
},
"request": {
"properties": {
"body": {
"type": "string"
},
"header": {
"type": "string"
}
}
},
"responseTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"service": {
"properties": {
"operation": {
"type": "string",
"index": "not_analyzed"
},
"project": {
"type": "string",
"index": "not_analyzed"
},
"proxy": {
"type": "string",
"index": "not_analyzed"
},
"version": {
"type": "string",
"index": "not_analyzed"
}
}
},
"timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
},
"type": {
"type": "string"
}
}
}
}
And I need to retrieve a list of unique values for the field "event.file" (to show in a Kibana Data Table) according to the following criteria:
There is more than one document with the same value for the field "event.file"
All the occurences for that value of "event.file" have resulted in error (field "event.error.errorCode" exists in all documents)
For that purpose the approach I've been testing is the use of terms aggregation, so I can get a list of buckets with all documents for a single file name. What I haven't been able to achieve is to drop some of the resulting buckets in the aggregation according to the previous criteria (if at least one of them does not have an error the bucket should be discarded).
Is this the correct approach or is there a better/easier way to get this type of result?
Thanks a lot.
After trying out several queries I found the following approach (see query below) to be valid for my purpose. The problem I see now is that apparently it is not possible to do this in Kibana, as it has no support for pipeline aggregations (see https://github.com/elastic/kibana/issues/4584).
{
"query": {
"bool": {
"must": [
{
"filtered": {
"filter": {
"exists": {
"field": "event.file"
}
}
}
}
]
}
},
"size": 0,
"aggs": {
"file-events": {
"terms": {
"field": "event.file",
"size": 0,
"min_doc_count": 2
},
"aggs": {
"files": {
"filter": {
"exists": {
"field": "event.file"
}
},
"aggs": {
"totalFiles": {
"value_count": {
"field": "event.file"
}
}
}
},
"errors": {
"filter": {
"exists": {
"field": "event.error.errorCode"
}
},
"aggs": {
"totalErrors": {
"value_count": {
"field": "event.error.errorCode"
}
}
}
},
"exhausted": {
"bucket_selector": {
"buckets_path": {
"total_files":"files>totalFiles",
"total_errors":"errors>totalErrors"
},
"script": "total_errors == total_files"
}
}
}
}
}
}
Again, if I'm missing something feedback will be appreciated :)

elasticsearch exceeds memory after few hours

In our system, 5000-15000 records written to elasticsearch(1.7.3) per minute, for first 4-5 hours its fine but after then it starts slowing down and almost no response any read or write request. After we restart elastic service its works fine for 4-5 hours again..
When elasticsearch starts slowing I checked the memory its about %95 so I think this is the reason why its broken but I dont understand why its loading RAM and never flush it.. No point of increasing ram, We have 8 GB RAM and elastic reached %95 of it in 5 hours..
Is there anything handle this? flush memory or auto restart elastic etc. ?
EDITED
Here is sample document in index:
"hits": {
"total": 18083446,
"max_score": 1,
"hits": [
{
"_index": "userlogs",
"_type": "userlogstype",
"_id": "AVMZEEYwW1W7iq27fTcE",
"_score": 1,
"_source": {
"domain": "http://gatr.hit.gemius.pl/",
"url": "http://gatr.hit.gemius.pl/_1456414408406/rexdot.js?",
"filetype": "-",
"clientname": "NOTINDOMAIN",
"clientmac": "00:0c:29:8f:c4:4f",
"hierarchycode": "HIER_DIRECT/188.165.145.88",
"user": "-",
"duration": "168",
"respsize": "1059",
"clientip": "10.6.1.130",
"loggingdate": "25/02/2016 17:33:28",
"resultcode": "TCP_MISS/301",
"reqmethod": "GET"
}
},
{
"_index": "userlogs",
"_type": "userlogstype",
"_id": "AVMZEEYwW1W7iq27fTcI",
"_score": 1,
"_source": {
"domain": "http://10.6.2.212/",
and here is mapping of index:
{
"webproxylog": {
"mappings": {
"update_by_query": {
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"term": {
"properties": {
"url": {
"type": "string"
}
}
}
}
}
}
}
}
},
"script": {
"type": "string"
}
}
},
"accesslog": {
"properties": {
"action": {
"type": "string",
"index": "not_analyzed"
},
"action4cat": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"domain": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"index": {
"properties": {
"_index": {
"type": "string"
},
"_type": {
"type": "string"
}
}
},
"loggingdate": {
"type": "date",
"format": "dd/MM/yyyy HH:mm:ss"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
query sample :
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"exists": {
"field": "instock"
}
},
{}
]
}
}
}
},
"aggs": {
"aggs1": {
"terms": {
"field": "clientname",
"size": 5,
"order": {
"aggs2": "desc"
}
},
"aggs": {
"aggs2": {
"sum": {
"field": "respsize"
}
}
}
}
}
}

ElasticSearch: How to create a complex query & filter for nested object?

I have this mapping & query. everything is working, except when i want to filter those contents with mentioned "tagid"s. it returns zero results.
i want to filter contents based on tag ids.
{
"mappings": {
"video": {
"_all": {
"enabled": true
},
"properties": {
"title": {
"type": "string"
},
"en_title": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagname": {
"type": "string"
},
"tagid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"metadescription": {
"type": "string"
},
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"items": {
"type": "nested",
"properties": {
"item_title": {
"type": "string"
},
"item_duration": {
"type": "string",
"index": "not_analyzed"
}
}
},
"isfeatured": {
"type": "string",
"index": "not_analyzed"
},
"image": {
"type": "string",
"index": "not_analyzed"
},
"contenttype": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"categoryalias": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"authorid": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"publishdate": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
and this is the query:
{
"index": "content",
"type": "video",
"body": {
"query": {
"filtered": {
"query": {
"match_all": { }
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [
{
"term": {
"tagid": "193"
}
},
{
"term": {
"tagid": "194"
}
}
]
}
}
}
},
{
"term": {
"categoryalias": "digilife"
}
},
{
"term": {
"price": 0
}
}
]
}
}
}
},
"from": 0,
"size": 9,
"sort": [
"_score"
]
}
}
Your nested filter in your query is not quite correct. For the field names where you have tagid, it should be tags.tagid. Full query should be
{
"index": "content",
"type": "video",
"body": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [{
"term": {
"tags.tagid": "193"
}
}, {
"term": {
"tags.tagid": "194"
}
}]
}
}
}
}, {
"term": {
"categoryalias": "digilife"
}
}, {
"term": {
"price": 0
}
}]
}
}
}
},
"from": 0,
"size": 9,
"sort": [
"_score"
]
}
}
EDIT:
Here's a complete working example to get you started. I have used Sense for this but you can use cURL or the language client of you choice.
For the mapping
curl -XPUT "http://localhost:9200/content" -d'
{
"mappings": {
"video": {
"_all": {
"enabled": true
},
"properties": {
"title": {
"type": "string"
},
"en_title": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagname": {
"type": "string"
},
"tagid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"metadescription": {
"type": "string"
},
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"items": {
"type": "nested",
"properties": {
"item_title": {
"type": "string"
},
"item_duration": {
"type": "string",
"index": "not_analyzed"
}
}
},
"isfeatured": {
"type": "string",
"index": "not_analyzed"
},
"image": {
"type": "string",
"index": "not_analyzed"
},
"contenttype": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"categoryalias": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"authorid": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"publishdate": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}'
We can check the mapping is as expected with
curl -XGET "http://localhost:9200/content/video/_mapping"
Now, let's index some documents into the index
// document with id 1
curl -XPOST "http://localhost:9200/content/video/1" -d'
{
"tags": [
{
"tagname" : "tag 193",
"tagid": "193"
}
],
"price": 0,
"categoryalias": "digilife"
}'
// document with id 2
curl -XPOST "http://localhost:9200/content/video/2" -d'
{
"tags": [
{
"tagname" : "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "digilife"
}'
// document with id 3
curl -XPOST "http://localhost:9200/content/video/3" -d'
{
"tags": [
{
"tagname" : "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "different category alias"
}'
Now, let's run the query. I've removed the superfluous parts of the query and simplified it
curl -XGET "http://localhost:9200/content/video/_search" -d'
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "tags",
"query": {
"terms": {
"tags.tagid": [
"193",
"194"
]
}
}
}
},
{
"term": {
"categoryalias": "digilife"
}
},
{
"term": {
"price": 0
}
}
]
}
}
}
},
"size": 9
}'
Only documents with ids 1 and 2 should be returned. This is confirmed with the results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "content",
"_type": "video",
"_id": "1",
"_score": 1,
"_source": {
"tags": [
{
"tagname": "tag 193",
"tagid": "193"
}
],
"price": 0,
"categoryalias": "digilife"
}
},
{
"_index": "content",
"_type": "video",
"_id": "2",
"_score": 1,
"_source": {
"tags": [
{
"tagname": "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "digilife"
}
}
]
}
}

elasticsearch "having not" query

Some documents has category fields.. Some of these docs has category fields its value equals to "-1". I need a query return documents which have category fields and "not equal to -1".
I tried this:
GET webproxylog/_search
{
"query": {
"filtered": {
"filter": {
"not":{
"filter": {"and": {
"filters": [
{"term": {
"category": "-1"
}
},
{
"missing": {
"field": "category"
}
}
]
}}
}
}
}
}
}
But not work.. returns docs not have "category field"
EDIT
Mapping:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string",
"index": "not_analyzed"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"analyzer": "slash_analyzer"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
If your category field is string and is analyzed by default, then your -1 will be indexed as 1 (stripping the minus sign).
You will need that field to be not_analyzed or to add a sub-field which is not analyzed (as my solution below).
Something like this:
DELETE test
PUT /test
{
"mappings": {
"test": {
"properties": {
"category": {
"type": "string",
"fields": {
"notAnalyzed": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST /test/test/1
{"category": "-1"}
POST /test/test/2
{"category": "2"}
POST /test/test/3
{"category": "3"}
POST /test/test/4
{"category": "4"}
POST /test/test/5
{"category2": "-1"}
GET /test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"category.notAnalyzed": {
"value": "-1"
}
}
},
{
"filtered": {
"filter": {
"missing": {
"field": "category"
}
}
}
}
]
}
}
}

Resources