delete all documents where id start with a number Elasticsearch - elasticsearch

What is the fastest way to get all _ids ?
I need a query to delete all documents where _id start with a number in elasticsearch.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "_2432475",
"_score" : 1.0,
"_source" : {
"name" : "999",
"file" : null,
"age" : null,
}
},

Your best bet is to first copy the internal _id into a doc-level field (let's call it internal_id:
POST myindex/_update_by_query
{
"query": {
"match_all": {}
},
"script": {
"source": "ctx._source.internal_id = ctx._id",
"lang": "painless"
}
}
and then use a match_phrase_prefix query like so:
GET myindex/_search
{
"query": {
"match_phrase_prefix": {
"internal_id": "_24"
}
}
}

POST /myindex/_delete_by_query' \
-H 'Content-Type: application/json' \
-d '{
"query": {
"terms": {
"_id": [ "1", "2" ]
}
}
}'
wild card on _id is not supported in elasticsearch, either you have to index similar key explictly into the doc or
you can update doc using _update_by_query and add _id key into it

Related

why elasticsearch can not search a document contains one word?

I am using default settings for one index, follow DSL is how to create the documents and searching.
### create index
PUT /mk_test
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"_doc": {
"properties": {
"nickName": {
"type": "text"
}
}
}
}
}
### get index
GET /mk_test/_mapping
### create document
POST /mk_test/_doc
{
"nickName": "C.BP"
}
### create document
POST /mk_test/_doc
{
"nickName": "BP"
}
### create document
POST /mk_test/_doc
{
"nickName": "C.B"
}
### create document
POST /mk_test/_doc
{
"nickName": "你好,中国"
}
now I have 4 document in mk_test index,
and I have 2 search query, give me different answers.
I want to query docs contains "中国"
GET /mk_test/_search
{
"query": {
"bool": {
"must": [
{"match_phrase": {"nickName": "中国"}}
]
}
}
}
server responses:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.5779729,
"hits" : [
{
"_index" : "mk_test",
"_type" : "_doc",
"_id" : "c2gwwX0BTkUG9klh1b8k",
"_score" : 1.5779729,
"_source" : {
"nickName" : "你好,中国"
}
}
]
}
}
I want to query docs contains "BP", I can't get "C.BP",
GET /mk_test/_search
{
"query": {
"bool": {
"must": [
{"match_phrase": {"nickName": "BP"}}
]
}
}
}
server give me only "BP", but "C.BP" not found
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.4599355,
"hits" : [
{
"_index" : "mk_test",
"_type" : "_doc",
"_id" : "TmguwX0BTkUG9klhAJ_S",
"_score" : 1.4599355,
"_source" : {
"nickName" : "BP"
}
}
]
}
}
How can I find both "BP" and "C.BP" ?

ElasticSearch aggregation shows unexpected result for SUM

Trying to apply sum aggregation in ES 7.14 and get unexpected result
1. prepare dataset
$cat products.json
{"index":{"_id":"1"}}
{"productId": 10,"shopId": 45,"prices": {"retailPrice": 525000000.02,"sumRetailPrice": 5250000000.2},"count": 10}
{"index":{"_id":"2"}}
{"productId": 10,"shopId": 48,"prices": {"retailPrice": 26250000004,"sumRetailPrice": 5250000000.8},"count": 20}
2. bulk insert
curl -XPOST localhost:9200/25products/_bulk -H "Content-Type: application/x-ndjson" --data-binary #./products.json
3. view mapping
curl -XGET "http://localhost:9200/25products/_mapping?pretty"
{
"25products" : {
"mappings" : {
"properties" : {
"count" : {
"type" : "long"
},
"prices" : {
"properties" : {
"retailPrice" : {
"type" : "float"
},
"sumRetailPrice" : {
"type" : "float"
}
}
},
"productId" : {
"type" : "long"
},
"shopId" : {
"type" : "long"
}
}
}
}
}
4. Sum field "prices.sumRetailPrice" in Painless
curl --location --request POST 'http://localhost:9200/25products/_search?pretty' \
--header 'Content-Type: application/json' \
--data-raw '{
"aggs": {"sumSupplyPrice": {
"sum": {"script": {
"source": "(!doc.containsKey('\''prices.sumRetailPrice'\'') ? 0 : (doc['\''prices.sumRetailPrice'\''].size() == 0 ? 0: doc['\''prices.sumRetailPrice'\''].value))"
}}
}},
"query": {"bool": {
"filter": [
{"terms": {"shopId": [45]}},
{"terms": {"productId": [10]}}
]
}},
"from": 0, "size": 10
}'
result is
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "25products",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.0,
"_source" : {
"productId" : 10,
"shopId" : 45,
"prices" : {
"retailPrice" : 5.2500000002E8,
"sumRetailPrice" : 5.2500000002E9
},
"count" : 10
}
}
]
},
"aggregations" : {
"sumSupplyPrice" : {
"value" : 5.249999872E9
}
}
}
4. Expectation
as well as I have a single record, expecting to have the same value as sumRetailPrice
"aggregations" : {
"sumSupplyPrice" : {
"value" : **5.2500000002E9**
}
}
But, actual result is not as expected.
"aggregations" : {
"sumSupplyPrice" : {
"value" : **5.249999872E9**
}
}
Where am I wrong?
Thanks!

Query for value in object

I have multiple documents like:
{
labels: {
label1Key: "label1Value",
label2Key: "label2Value",
...
},
...
}
The keys of the labels object are arbitrary. I would like to query for the existence of specific values in the labels object without knowing the key, e.g. I want all data that contain label2Value as a value in the labels object.
I've tried to solve this via an exists query, but this way I can only access the key of an object. Is there a way to query for values?
With a Multimatch query you can use wildcards on the field names
Ingest data
POST test_bene/_doc
{
"labels": {
"label1Key": "label1Value",
"label2Key": "label2Value"
}
}
Query
POST test_bene/_search
{
"query": {
"multi_match": {
"query": "label1Value",
"fields": ["labels.*"]
}
}
}
Response
{
"took" : 24,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.2876821,
"hits" : [
{
"_index" : "test_bene",
"_type" : "_doc",
"_id" : "RtBd_ncB46EpgstaHy3Y",
"_score" : 0.2876821,
"_source" : {
"labels" : {
"label1Key" : "label1Value",
"label2Key" : "label2Value"
}
}
}
]
}
}

Elasticsearch: retrieve only document _id where field doesn't exist

I would like to retrieve all document _ids (without other fields) where field "name" doesn't exist:
I know I can search for where field "name" doesn't exist like this:
"query": {
"bool": {
"must_not": {
"exists": {
"field": "name"
}
}
}
}
and I think that to get the _id of the document only without any fields i need to use (correct me if I'm wrong):
"fields": []
How do I combine these 2 parts to make a query that works?
You can just add _source and set to false as Elasticsearch will return the entire JSON object in that field by default
"_source": false,
"query":{
...
}
and this will retrieve just the metadata from your specified index, so your hits array will contain _index, _type, _id and _score for each result
e.g
{
"took" : 11,
"timed_out" : false,
"_shards" : {
"total" : 12,
"successful" : 12,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "filebeat-7.8.1-2021.01.28",
"_type" : "_doc"
"_id" : "SomeUniqeuId86aa",
"_score" : 1.0
},
{
"_index" : "filebeat-7.8.1-2021.01.28",
"_type" : "_doc"
"_id" : "An0therrUniqueiD",
"_score" : 1.0
}
]
}
}

enabled fielddata on text field in ElasticSearch but aggregation is not working

According to the documentation you can run ElasticSearch aggregations on fields that are type keyword or not a text field or which have fielddata set to true in the index mapping.
I am trying to count city_names in an nginx log. It works fine with the int field result. But it does not work with the field city_name even when I updated the index mapping for that to put fielddata=true. The should have been not required as it was of type keyword.
To say it does not work means that:
"aggregations" : {
"cities" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
}
Here is the field mapping:
"city_name" : {
"type" : "text",
"fielddata" : true
},
And here is the aggression query:
curl -XGET --user $pwd --header 'Content-Type: application/json' https://58571402f5464923883e7be42a037917.eu-central-1.aws.cloud.es.io:9243/logstash/_search?pretty -d '{
"aggs" : {
"cities": {
"terms" : { "field": "city_name"}
}
}
}'
If you don't get any error when executing your search it seems that is more like a problem with the data. Are you sure you have, at least, one document with the field city_name filled?
I tried to reproduce your issue with ElasticSearch 6.6.2.
I created an index
PUT cities
{
"mappings": {
"city": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
},
"city_name": {
"type": "text",
"fielddata": true
}
}
}
}
}
I added one document without the city_name
PUT cities/city/1
{
"id": "1"
}
When i performed the search:
GET cities/_search
{
"aggs": {
"cities": {
"terms" : { "field": "city_name"}
}
}
}
I got no buckets in the cities aggregation. But when I added one document with the city name filled:
PUT cities/city/2
{
"id": "2",
"city_name": "London"
}
I got the expected result:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 1.0,
"hits" : [
{
"_index" : "cities",
"_type" : "city",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"id" : "2",
"city_name" : "london"
}
},
{
"_index" : "cities",
"_type" : "city",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"id" : "1"
}
}
]
},
"aggregations" : {
"cities" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "london",
"doc_count" : 1
}
]
}
}
}

Resources