Elasticsearch faceting query - elasticsearch

I was wondering if I can write a faceting query for something like this
My document structure
UserID, AnswerID[] (int array)
1 , [9,10,11,56,78,99]
2 , [10,11,56,78,99]
3 , [8,10,12,56, 79,99]
4 , [9,10,11,56,78,99]
If I just want the count of users who answered 9,56 I can write a query. But I have two lists
List A - 9,10,11
ListB - 56,78,99
I want the permutation of the two lists.
Count of users who answered [9,56], [9,78], [9,99], [10,56], [10,78], [10,99], [11,56]...
How do I write a query to achieve something like this.
Any help is appreciated,
Thanks.

It can work when not using lists:
# Print ES Version
curl 'http://localhost:9200/'
# Delete the index `testindex`
curl -XDELETE 'http://localhost:9200/testindex'
# Create the index `testindex`
curl -XPUT 'http://localhost:9200/testindex' -d '{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
}'
# Wait for yellow
curl -XGET 'http://localhost:9200/_cluster/health?wait_for_status=yellow'
# Index docs
curl -XPUT http://localhost:9200/testindex/type/1 -d '{ "listA":"value1", "listB":"value2" }'
curl -XPUT http://localhost:9200/testindex/type/2 -d '{ "listA":"value1", "listB":"value3" }'
curl -XPUT http://localhost:9200/testindex/type/3 -d '{ "listA":"value1", "listB":"value2" }'
# Refresh docs
curl -XPOST 'http://localhost:9200/testindex/_refresh'
# TermFacet
curl -XPOST 'http://localhost:9200/testindex/type/_search?pretty' -d '
{
"query": { "match_all" : {} },
"facets" : {
"tag" : {
"terms" : {
"script_field" : "_source.listA + \" - \" + _source.listB"
}
}
}
}'
Gives:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [ {
"_index" : "testindex",
"_type" : "type",
"_id" : "1",
"_score" : 1.0, "_source" : { "listA":"value1", "listB":"value2" }
}, {
"_index" : "testindex",
"_type" : "type",
"_id" : "2",
"_score" : 1.0, "_source" : { "listA":"value1", "listB":"value3" }
}, {
"_index" : "testindex",
"_type" : "type",
"_id" : "3",
"_score" : 1.0, "_source" : { "listA":"value1", "listB":"value2" }
} ]
},
"facets" : {
"tag" : {
"_type" : "terms",
"missing" : 0,
"total" : 0,
"other" : -3,
"terms" : [ {
"term" : "value1 - value2",
"count" : 2
}, {
"term" : "value1 - value3",
"count" : 1
} ]
}
}
}
But I have no idea when using list... I would love to know if it can be done...

Related

ElasticSearch aggregation shows unexpected result for SUM

Trying to apply sum aggregation in ES 7.14 and get unexpected result
1. prepare dataset
$cat products.json
{"index":{"_id":"1"}}
{"productId": 10,"shopId": 45,"prices": {"retailPrice": 525000000.02,"sumRetailPrice": 5250000000.2},"count": 10}
{"index":{"_id":"2"}}
{"productId": 10,"shopId": 48,"prices": {"retailPrice": 26250000004,"sumRetailPrice": 5250000000.8},"count": 20}
2. bulk insert
curl -XPOST localhost:9200/25products/_bulk -H "Content-Type: application/x-ndjson" --data-binary #./products.json
3. view mapping
curl -XGET "http://localhost:9200/25products/_mapping?pretty"
{
"25products" : {
"mappings" : {
"properties" : {
"count" : {
"type" : "long"
},
"prices" : {
"properties" : {
"retailPrice" : {
"type" : "float"
},
"sumRetailPrice" : {
"type" : "float"
}
}
},
"productId" : {
"type" : "long"
},
"shopId" : {
"type" : "long"
}
}
}
}
}
4. Sum field "prices.sumRetailPrice" in Painless
curl --location --request POST 'http://localhost:9200/25products/_search?pretty' \
--header 'Content-Type: application/json' \
--data-raw '{
"aggs": {"sumSupplyPrice": {
"sum": {"script": {
"source": "(!doc.containsKey('\''prices.sumRetailPrice'\'') ? 0 : (doc['\''prices.sumRetailPrice'\''].size() == 0 ? 0: doc['\''prices.sumRetailPrice'\''].value))"
}}
}},
"query": {"bool": {
"filter": [
{"terms": {"shopId": [45]}},
{"terms": {"productId": [10]}}
]
}},
"from": 0, "size": 10
}'
result is
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "25products",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.0,
"_source" : {
"productId" : 10,
"shopId" : 45,
"prices" : {
"retailPrice" : 5.2500000002E8,
"sumRetailPrice" : 5.2500000002E9
},
"count" : 10
}
}
]
},
"aggregations" : {
"sumSupplyPrice" : {
"value" : 5.249999872E9
}
}
}
4. Expectation
as well as I have a single record, expecting to have the same value as sumRetailPrice
"aggregations" : {
"sumSupplyPrice" : {
"value" : **5.2500000002E9**
}
}
But, actual result is not as expected.
"aggregations" : {
"sumSupplyPrice" : {
"value" : **5.249999872E9**
}
}
Where am I wrong?
Thanks!

delete all documents where id start with a number Elasticsearch

What is the fastest way to get all _ids ?
I need a query to delete all documents where _id start with a number in elasticsearch.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "_2432475",
"_score" : 1.0,
"_source" : {
"name" : "999",
"file" : null,
"age" : null,
}
},
Your best bet is to first copy the internal _id into a doc-level field (let's call it internal_id:
POST myindex/_update_by_query
{
"query": {
"match_all": {}
},
"script": {
"source": "ctx._source.internal_id = ctx._id",
"lang": "painless"
}
}
and then use a match_phrase_prefix query like so:
GET myindex/_search
{
"query": {
"match_phrase_prefix": {
"internal_id": "_24"
}
}
}
POST /myindex/_delete_by_query' \
-H 'Content-Type: application/json' \
-d '{
"query": {
"terms": {
"_id": [ "1", "2" ]
}
}
}'
wild card on _id is not supported in elasticsearch, either you have to index similar key explictly into the doc or
you can update doc using _update_by_query and add _id key into it

Elasticsearch Sort By Epoch MilliSeconds Timestamp

I have the ES document structure as below.
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "566d9a9d-62d4-4dcd-b3f3-c0598638fa43",
"_score" : 1.0,
"_source" : {
"values" : {
"isActive" : "false",
"length" : 18.49,
"latitude" : 33.69076,
"accuracy" : 7
},
"metadata" : {
"name" : "866425030270849",
"type" : "BAT-M1",
"ts" : "1572493157000"
}
}
},
To sort the ES index based on the metadata.ts (date field with format 'epoch_millis'). I am using the following query to get latest record.
curl -X GET "https://localhost:9200/testindex/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query" : {
"term" : { "metadata.name" : "866425030270849" }
},
"sort": [
{ "devicedata.metadata.ts": "desc" }
],
"size": 1
}
'
But, I am unable to sort the recent record. Please help!
devicedata in query is the nested object of metadata.

only basic searches are working

I'm playing with curl querying elasticsearch db through my mac console. But I have troubles to execute more complex searches. So far I can query for match_all like this:
curl -XGET 'localhost:9200/products/fashion/_search?pretty' -d'
{
"query" : { "match_all" : {} }
}'
And I receive the following data:
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 915503,
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "fashion",
"_id" : "57d49ee494efcfdfe0f3abfe",
"_score" : 1.0,
"_source" : {
"doc" : {
"id" : "57d49ee494efcfdfe0f3abfe",
"name" : "Shorts",
"price" : {
"value" : 35
}
}
}
},
...........
}
I don't have problems to request a mapping like this:
curl -XGET 'localhost:9200/products/_mapping/fashion?pretty'
And the result for price is:
.......
"price" : {
"properties" : {
"currency" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "long"
}
}
},
....
But all my attempts to query with filter on "price.value" did not hit.
curl -XGET 'localhost:9200/products/fashion/_search?pretty' -d'
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"price.value" : 35
}
}
}
}
}'
{
"took" : 26,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
This query I took from elasticsearch guide
I ran out of ideas and examples how to write this query to return what I obviously have in database. As you might noticed I have at least one document with price.value = 35
That's because your price field is within another field named doc, so you need to query doc.price.value like this:
curl -XPOST 'localhost:9200/products/fashion/_search?pretty' -d'
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"doc.price.value" : 35
}
}
}
}
}'

How to read the JSON output of a faceted search query?

I am having Movies that belong to a genre and have multiple ratings. With ElasticSearch, I want to do a faceted search on Genres first, and then Ratings.
I was reading about the idea here: http://www.elasticsearch.org/guide/reference/api/search/facets/
But I am confused how to understand the output of this Curl query:
curl -X POST "http://localhost:9200/movies/_search?pretty=true" -d '
{
"query" : { "query_string" : {"query" : "T*"} },
"facets" : {
"categories" : { "terms" : {"field" : "categories"} }
}
}
'
{
"took" : 35,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [ {
"_index" : "movies",
"_type" : "movie",
"_id" : "13",
"_score" : 1.0, "_source" : {"category_id":2,"created_at":"2013-05-03T16:40:21Z","description":null,"title":"Tiny Plastic Men","updated_at":"2013-05-03T16:40:21Z","user_id":null}
}, {
"_index" : "movies",
"_type" : "movie",
"_id" : "32",
"_score" : 1.0, "_source" : {"category_id":14,"created_at":"2013-05-03T16:55:02Z","description":null,"title":"The Extreme Truth","updated_at":"2013-05-03T16:55:02Z","user_id":null}
}, {
"_index" : "movies",
"_type" : "movie",
"_id" : "39",
"_score" : 1.0, "_source" : {"category_id":7,"created_at":"2013-05-03T16:55:02Z","description":null,"title":"A Time of Day","updated_at":"2013-05-03T16:55:02Z","user_id":null}
} ]
},
"facets" : {
"categories" : {
"_type" : "terms",
"missing" : 3,
"total" : 0,
"other" : 0,
"terms" : [ ]
}
}
I am having some movies that start with a 'T', but additionally I would expect movies from the Genre/Category 'Thriller'.
Therefore, what can I read from the JSON above?
It seems like your facet does not match any fields in your document you should probably use:
curl -X POST "http://localhost:9200/movies/_search?pretty=true" -d '
{
"query" : { "query_string" : {"query" : "T*"} },
"facets" : {
"categories" : { "terms" : {"field" : "category_id"} }
}
}
'
then you sould get a list of category_id and a count of documents in each category_id
Facets are deprecated. See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-facets.html
Better alternative is to use aggregations: https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-aggregations.html

Resources