Some documents has category fields.. Some of these docs has category fields its value equals to "-1". I need a query return documents which have category fields and "not equal to -1".
I tried this:
GET webproxylog/_search
{
"query": {
"filtered": {
"filter": {
"not":{
"filter": {"and": {
"filters": [
{"term": {
"category": "-1"
}
},
{
"missing": {
"field": "category"
}
}
]
}}
}
}
}
}
}
But not work.. returns docs not have "category field"
EDIT
Mapping:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string",
"index": "not_analyzed"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"analyzer": "slash_analyzer"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
If your category field is string and is analyzed by default, then your -1 will be indexed as 1 (stripping the minus sign).
You will need that field to be not_analyzed or to add a sub-field which is not analyzed (as my solution below).
Something like this:
DELETE test
PUT /test
{
"mappings": {
"test": {
"properties": {
"category": {
"type": "string",
"fields": {
"notAnalyzed": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST /test/test/1
{"category": "-1"}
POST /test/test/2
{"category": "2"}
POST /test/test/3
{"category": "3"}
POST /test/test/4
{"category": "4"}
POST /test/test/5
{"category2": "-1"}
GET /test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"category.notAnalyzed": {
"value": "-1"
}
}
},
{
"filtered": {
"filter": {
"missing": {
"field": "category"
}
}
}
}
]
}
}
}
Related
I have been trying to fetch a document using multiple filters.
Im currently using ES 1.7 Is it possible to use match_phrase twice on a filter?
example: people document
q=aaron&address=scarborough - searching a person by name and address, works fine.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"nested": {
"path": "addresses",
"query": {
"match_phrase": {
"address": "scarborough"
}
}
}
}
}
},
q=aaron&phone=813-689-6889 - searching a person by name and phone number works fine as well.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}
}
}
However, When I try to use both filters, address and phone I get a No filter registered for [match_phrase] error
for example: q=aaron&address=scarborough&phone=813-689-6889
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"nested": {
"path": "addresses",
"query": {
"match_phrase": {
"address": "scarborough"
}
}
},
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}
}
}
the error, when using address and phone filters together:
nested: QueryParsingException[[pl_people] No filter registered for [match_phrase]]; }]","status":400}):
index mapping (person) as requested:
{
"pl_people": {
"mappings": {
"person": {
"properties": {
"ac_name": {
"type": "string",
"analyzer": "autocomplete"
},
"date_of_birth": {
"type": "date",
"format": "dateOptionalTime"
},
"email": {
"type": "string"
},
"first_name": {
"type": "string",
"fields": {
"na_first_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"last_name": {
"type": "string",
"fields": {
"na_last_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"middle_name": {
"type": "string",
"fields": {
"na_middle_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"name": {
"type": "string",
"fields": {
"na_name": {
"type": "string",
"index": "not_analyzed"
},
"ngram_name": {
"type": "string",
"analyzer": "my_start"
},
"ns_name": {
"type": "string",
"analyzer": "no_stopwords"
}
}
},
"phone": {
"type": "string"
},
"time": {
"type": "date",
"format": "dateOptionalTime"
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
}
Maybe you can use term-filter, instead of match_phrase as a filter.
See here.
I'm trying to delete documents with a date that is lower than december 1st but it doesn't look like it actually deletes anything.
I tried using the delete by query API:
curl -XPOST "http://localhost:9200/mediadata/events/_delete_by_query" -d'
{
"query": {
"range": {
"created_at": {
"lt": "2016-12-01 00:00:00"
}
}
}
}'
Or this syntax:
curl -XDELETE 'http://localhost:9200/mediadata/events/_query' -d ...
I obtain this kind of result:
{"_index":"mediadata","_type":"events","_id":"_delete_by_query","_version":10,"_shards":{"total":3,"successful":2,"failed":0},"created":false}
Thanks in advance.
EDIT: Here is the mapping:
{
"mediadata": {
"mappings": {
"events": {
"properties": {
"channels": {
"properties": {
"kdata": {
"type": "string",
"index": "not_analyzed"
},
"mail": {
"type": "string",
"index": "not_analyzed"
},
"md5": {
"type": "string",
"index": "not_analyzed"
},
"mobile": {
"type": "string",
"index": "not_analyzed"
},
"ssp": {
"type": "string",
"index": "not_analyzed"
}
}
},
"contents": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"editor": {
"type": "string",
"index": "not_analyzed"
},
"end": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"location": {
"type": "geo_point"
},
"message": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "double"
},
"quantity": {
"type": "long"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"must": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"filtered": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
},
"lte": {
"type": "string"
}
}
}
}
}
}
},
"reference": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"type": "string",
"index": "not_analyzed"
},
"start": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
}
}
}
}
}
}
Your syntax is indeed correct. In version 5.x the deletion by query is as follow .
POST mediadata/events/_delete_by_query?conflicts=proceed
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}
Now , based on the response that you're getting from ES
{"_index":"mediadata","_type":"events","_id":"_delete_by_query","_version":10,"_shards":{"total":3,"successful":2,"failed":0},"created":false}
I will assume that you're running version 2.x , where the syntax is different.
First of all , in version 2.x the deletion by query is a plugin that you need to install using :
plugin install delete-by-query
Then you run it :
curl -XDELETE "http://localhost:9200/mediadata/events/_query" -d'
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}'
The response looks like :
{
"took": 0,
"timed_out": false,
"_indices": {
"_all": {
"found": 1,
"deleted": 1,
"missing": 0,
"failed": 0
},
"mediadata": {
"found": 1,
"deleted": 1,
"missing": 0,
"failed": 0
}
},
"failures": []
}
Full example :
PUT mediadata
{
"mappings": {
"events": {
"properties": {
"channels": {
"properties": {
"kdata": {
"type": "string",
"index": "not_analyzed"
},
"mail": {
"type": "string",
"index": "not_analyzed"
},
"md5": {
"type": "string",
"index": "not_analyzed"
},
"mobile": {
"type": "string",
"index": "not_analyzed"
},
"ssp": {
"type": "string",
"index": "not_analyzed"
}
}
},
"contents": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"editor": {
"type": "string",
"index": "not_analyzed"
},
"end": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"location": {
"type": "geo_point"
},
"message": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "double"
},
"quantity": {
"type": "long"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"must": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"filtered": {
"properties": {
"filter": {
"properties": {
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
}
}
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"range": {
"properties": {
"created_at": {
"properties": {
"lt": {
"type": "string"
},
"lte": {
"type": "string"
}
}
}
}
}
}
},
"reference": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"type": "string",
"index": "not_analyzed"
},
"start": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd' 'HH:mm:ss"
}
}
}
}
}
PUT mediadata/events/1
{
"created_at" : "2016-11-02 00:00:00"
}
PUT mediadata/events/3
{
"created_at" : "2016-11-03 00:00:00"
}
#The one to delete
PUT mediadata/events/4
{
"created_at" : "2016-10-03 00:00:00"
}
#to verify that the documents are in the index
GET mediadata/events/_search
{
"query": {
"range": {
"created_at": {
"lt": "2016-11-02 00:00:00"
}
}
}
}
DELETE /mediadata/events/_query
{
"query": {
"range": {
"created_at": {
"gt": "2016-11-02 00:00:00"
}
}
}
}
I have documents in Elasticsearch with the following structure:
"mappings": {
"document": {
"properties": {
"#timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"#version": {
"type": "string"
},
"id_secuencia": {
"type": "long"
},
"event": {
"properties": {
"elapsedTime": {
"type": "double"
},
"requestTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"error": {
"properties": {
"errorCode": {
"type": "string",
"index": "not_analyzed"
},
"failureDetail": {
"type": "string"
},
"fault": {
"type": "string"
}
}
},
"file": {
"type": "string",
"index": "not_analyzed"
},
"messageId": {
"type": "string"
},
"request": {
"properties": {
"body": {
"type": "string"
},
"header": {
"type": "string"
}
}
},
"responseTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"service": {
"properties": {
"operation": {
"type": "string",
"index": "not_analyzed"
},
"project": {
"type": "string",
"index": "not_analyzed"
},
"proxy": {
"type": "string",
"index": "not_analyzed"
},
"version": {
"type": "string",
"index": "not_analyzed"
}
}
},
"timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
},
"type": {
"type": "string"
}
}
}
}
And I need to retrieve a list of unique values for the field "event.file" (to show in a Kibana Data Table) according to the following criteria:
There is more than one document with the same value for the field "event.file"
All the occurences for that value of "event.file" have resulted in error (field "event.error.errorCode" exists in all documents)
For that purpose the approach I've been testing is the use of terms aggregation, so I can get a list of buckets with all documents for a single file name. What I haven't been able to achieve is to drop some of the resulting buckets in the aggregation according to the previous criteria (if at least one of them does not have an error the bucket should be discarded).
Is this the correct approach or is there a better/easier way to get this type of result?
Thanks a lot.
After trying out several queries I found the following approach (see query below) to be valid for my purpose. The problem I see now is that apparently it is not possible to do this in Kibana, as it has no support for pipeline aggregations (see https://github.com/elastic/kibana/issues/4584).
{
"query": {
"bool": {
"must": [
{
"filtered": {
"filter": {
"exists": {
"field": "event.file"
}
}
}
}
]
}
},
"size": 0,
"aggs": {
"file-events": {
"terms": {
"field": "event.file",
"size": 0,
"min_doc_count": 2
},
"aggs": {
"files": {
"filter": {
"exists": {
"field": "event.file"
}
},
"aggs": {
"totalFiles": {
"value_count": {
"field": "event.file"
}
}
}
},
"errors": {
"filter": {
"exists": {
"field": "event.error.errorCode"
}
},
"aggs": {
"totalErrors": {
"value_count": {
"field": "event.error.errorCode"
}
}
}
},
"exhausted": {
"bucket_selector": {
"buckets_path": {
"total_files":"files>totalFiles",
"total_errors":"errors>totalErrors"
},
"script": "total_errors == total_files"
}
}
}
}
}
}
Again, if I'm missing something feedback will be appreciated :)
I have this mapping & query. everything is working, except when i want to filter those contents with mentioned "tagid"s. it returns zero results.
i want to filter contents based on tag ids.
{
"mappings": {
"video": {
"_all": {
"enabled": true
},
"properties": {
"title": {
"type": "string"
},
"en_title": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagname": {
"type": "string"
},
"tagid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"metadescription": {
"type": "string"
},
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"items": {
"type": "nested",
"properties": {
"item_title": {
"type": "string"
},
"item_duration": {
"type": "string",
"index": "not_analyzed"
}
}
},
"isfeatured": {
"type": "string",
"index": "not_analyzed"
},
"image": {
"type": "string",
"index": "not_analyzed"
},
"contenttype": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"categoryalias": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"authorid": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"publishdate": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
and this is the query:
{
"index": "content",
"type": "video",
"body": {
"query": {
"filtered": {
"query": {
"match_all": { }
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [
{
"term": {
"tagid": "193"
}
},
{
"term": {
"tagid": "194"
}
}
]
}
}
}
},
{
"term": {
"categoryalias": "digilife"
}
},
{
"term": {
"price": 0
}
}
]
}
}
}
},
"from": 0,
"size": 9,
"sort": [
"_score"
]
}
}
Your nested filter in your query is not quite correct. For the field names where you have tagid, it should be tags.tagid. Full query should be
{
"index": "content",
"type": "video",
"body": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [{
"term": {
"tags.tagid": "193"
}
}, {
"term": {
"tags.tagid": "194"
}
}]
}
}
}
}, {
"term": {
"categoryalias": "digilife"
}
}, {
"term": {
"price": 0
}
}]
}
}
}
},
"from": 0,
"size": 9,
"sort": [
"_score"
]
}
}
EDIT:
Here's a complete working example to get you started. I have used Sense for this but you can use cURL or the language client of you choice.
For the mapping
curl -XPUT "http://localhost:9200/content" -d'
{
"mappings": {
"video": {
"_all": {
"enabled": true
},
"properties": {
"title": {
"type": "string"
},
"en_title": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagname": {
"type": "string"
},
"tagid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"metadescription": {
"type": "string"
},
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"items": {
"type": "nested",
"properties": {
"item_title": {
"type": "string"
},
"item_duration": {
"type": "string",
"index": "not_analyzed"
}
}
},
"isfeatured": {
"type": "string",
"index": "not_analyzed"
},
"image": {
"type": "string",
"index": "not_analyzed"
},
"contenttype": {
"type": "string",
"index": "not_analyzed"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"categoryalias": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"authorid": {
"type": "string",
"index": "not_analyzed"
},
"price": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"publishdate": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}'
We can check the mapping is as expected with
curl -XGET "http://localhost:9200/content/video/_mapping"
Now, let's index some documents into the index
// document with id 1
curl -XPOST "http://localhost:9200/content/video/1" -d'
{
"tags": [
{
"tagname" : "tag 193",
"tagid": "193"
}
],
"price": 0,
"categoryalias": "digilife"
}'
// document with id 2
curl -XPOST "http://localhost:9200/content/video/2" -d'
{
"tags": [
{
"tagname" : "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "digilife"
}'
// document with id 3
curl -XPOST "http://localhost:9200/content/video/3" -d'
{
"tags": [
{
"tagname" : "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "different category alias"
}'
Now, let's run the query. I've removed the superfluous parts of the query and simplified it
curl -XGET "http://localhost:9200/content/video/_search" -d'
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "tags",
"query": {
"terms": {
"tags.tagid": [
"193",
"194"
]
}
}
}
},
{
"term": {
"categoryalias": "digilife"
}
},
{
"term": {
"price": 0
}
}
]
}
}
}
},
"size": 9
}'
Only documents with ids 1 and 2 should be returned. This is confirmed with the results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "content",
"_type": "video",
"_id": "1",
"_score": 1,
"_source": {
"tags": [
{
"tagname": "tag 193",
"tagid": "193"
}
],
"price": 0,
"categoryalias": "digilife"
}
},
{
"_index": "content",
"_type": "video",
"_id": "2",
"_score": 1,
"_source": {
"tags": [
{
"tagname": "tag 194",
"tagid": "194"
}
],
"price": 0,
"categoryalias": "digilife"
}
}
]
}
}
I have a field in my index contain a string data.. I run dsl query below expected documents which category fields is not equal to "-" character.. but as you see pic it returns..
What is the way of retrieve these data ?
GET webproxylog/_search
{
"query": {
"filtered": {
"query": {"match_all": {}},
"filter": {
"not": {
"filter": {
"term": {
"category": "-"
}
}
}
}
}
}
}
mappings:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
My test with ES 1.7.1:
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"user": {
"properties": {
"number": { "type": "integer" },
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
docs:
{"number":1, "name":"abc"}
{"number":2, "name":"-"}
Query:
{
"size": 2,
"query": {
"filtered": {
"filter": {
"not": {
"term": {
"name": "-"
}
}
}
}
}
}
Result:
{
took: 1
timed_out: false
_shards: {
total: 1
successful: 1
failed: 0
}
hits: {
total: 1
max_score: 1
hits: [
{
_index: test_index
_type: user
_id: AVAiYtEjMfj2vcjSSqVr
_score: 1
_source: {
number: 1
name: abc
}
}
]
}
}
Without "index": "not_analyzed" I see the reported behavior, I didn't check how "-" gets tokenized in that case (forgot the query to do that :P)