I was wondering if I can write a faceting query for something like this
My document structure
UserID, AnswerID[] (int array)
1 , [9,10,11,56,78,99]
2 , [10,11,56,78,99]
3 , [8,10,12,56, 79,99]
4 , [9,10,11,56,78,99]
If I just want the count of users who answered 9,56 I can write a query. But I have two lists
List A - 9,10,11
ListB - 56,78,99
I want the permutation of the two lists.
Count of users who answered [9,56], [9,78], [9,99], [10,56], [10,78], [10,99], [11,56]...
How do I write a query to achieve something like this.
Any help is appreciated,
Thanks.
It can work when not using lists:
# Print ES Version
curl 'http://localhost:9200/'
# Delete the index `testindex`
curl -XDELETE 'http://localhost:9200/testindex'
# Create the index `testindex`
curl -XPUT 'http://localhost:9200/testindex' -d '{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
}'
# Wait for yellow
curl -XGET 'http://localhost:9200/_cluster/health?wait_for_status=yellow'
# Index docs
curl -XPUT http://localhost:9200/testindex/type/1 -d '{ "listA":"value1", "listB":"value2" }'
curl -XPUT http://localhost:9200/testindex/type/2 -d '{ "listA":"value1", "listB":"value3" }'
curl -XPUT http://localhost:9200/testindex/type/3 -d '{ "listA":"value1", "listB":"value2" }'
# Refresh docs
curl -XPOST 'http://localhost:9200/testindex/_refresh'
# TermFacet
curl -XPOST 'http://localhost:9200/testindex/type/_search?pretty' -d '
{
"query": { "match_all" : {} },
"facets" : {
"tag" : {
"terms" : {
"script_field" : "_source.listA + \" - \" + _source.listB"
}
}
}
}'
Gives:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [ {
"_index" : "testindex",
"_type" : "type",
"_id" : "1",
"_score" : 1.0, "_source" : { "listA":"value1", "listB":"value2" }
}, {
"_index" : "testindex",
"_type" : "type",
"_id" : "2",
"_score" : 1.0, "_source" : { "listA":"value1", "listB":"value3" }
}, {
"_index" : "testindex",
"_type" : "type",
"_id" : "3",
"_score" : 1.0, "_source" : { "listA":"value1", "listB":"value2" }
} ]
},
"facets" : {
"tag" : {
"_type" : "terms",
"missing" : 0,
"total" : 0,
"other" : -3,
"terms" : [ {
"term" : "value1 - value2",
"count" : 2
}, {
"term" : "value1 - value3",
"count" : 1
} ]
}
}
}
But I have no idea when using list... I would love to know if it can be done...
Related
Trying to apply sum aggregation in ES 7.14 and get unexpected result
1. prepare dataset
$cat products.json
{"index":{"_id":"1"}}
{"productId": 10,"shopId": 45,"prices": {"retailPrice": 525000000.02,"sumRetailPrice": 5250000000.2},"count": 10}
{"index":{"_id":"2"}}
{"productId": 10,"shopId": 48,"prices": {"retailPrice": 26250000004,"sumRetailPrice": 5250000000.8},"count": 20}
2. bulk insert
curl -XPOST localhost:9200/25products/_bulk -H "Content-Type: application/x-ndjson" --data-binary #./products.json
3. view mapping
curl -XGET "http://localhost:9200/25products/_mapping?pretty"
{
"25products" : {
"mappings" : {
"properties" : {
"count" : {
"type" : "long"
},
"prices" : {
"properties" : {
"retailPrice" : {
"type" : "float"
},
"sumRetailPrice" : {
"type" : "float"
}
}
},
"productId" : {
"type" : "long"
},
"shopId" : {
"type" : "long"
}
}
}
}
}
4. Sum field "prices.sumRetailPrice" in Painless
curl --location --request POST 'http://localhost:9200/25products/_search?pretty' \
--header 'Content-Type: application/json' \
--data-raw '{
"aggs": {"sumSupplyPrice": {
"sum": {"script": {
"source": "(!doc.containsKey('\''prices.sumRetailPrice'\'') ? 0 : (doc['\''prices.sumRetailPrice'\''].size() == 0 ? 0: doc['\''prices.sumRetailPrice'\''].value))"
}}
}},
"query": {"bool": {
"filter": [
{"terms": {"shopId": [45]}},
{"terms": {"productId": [10]}}
]
}},
"from": 0, "size": 10
}'
result is
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "25products",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.0,
"_source" : {
"productId" : 10,
"shopId" : 45,
"prices" : {
"retailPrice" : 5.2500000002E8,
"sumRetailPrice" : 5.2500000002E9
},
"count" : 10
}
}
]
},
"aggregations" : {
"sumSupplyPrice" : {
"value" : 5.249999872E9
}
}
}
4. Expectation
as well as I have a single record, expecting to have the same value as sumRetailPrice
"aggregations" : {
"sumSupplyPrice" : {
"value" : **5.2500000002E9**
}
}
But, actual result is not as expected.
"aggregations" : {
"sumSupplyPrice" : {
"value" : **5.249999872E9**
}
}
Where am I wrong?
Thanks!
What is the fastest way to get all _ids ?
I need a query to delete all documents where _id start with a number in elasticsearch.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "_2432475",
"_score" : 1.0,
"_source" : {
"name" : "999",
"file" : null,
"age" : null,
}
},
Your best bet is to first copy the internal _id into a doc-level field (let's call it internal_id:
POST myindex/_update_by_query
{
"query": {
"match_all": {}
},
"script": {
"source": "ctx._source.internal_id = ctx._id",
"lang": "painless"
}
}
and then use a match_phrase_prefix query like so:
GET myindex/_search
{
"query": {
"match_phrase_prefix": {
"internal_id": "_24"
}
}
}
POST /myindex/_delete_by_query' \
-H 'Content-Type: application/json' \
-d '{
"query": {
"terms": {
"_id": [ "1", "2" ]
}
}
}'
wild card on _id is not supported in elasticsearch, either you have to index similar key explictly into the doc or
you can update doc using _update_by_query and add _id key into it
I have the ES document structure as below.
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "566d9a9d-62d4-4dcd-b3f3-c0598638fa43",
"_score" : 1.0,
"_source" : {
"values" : {
"isActive" : "false",
"length" : 18.49,
"latitude" : 33.69076,
"accuracy" : 7
},
"metadata" : {
"name" : "866425030270849",
"type" : "BAT-M1",
"ts" : "1572493157000"
}
}
},
To sort the ES index based on the metadata.ts (date field with format 'epoch_millis'). I am using the following query to get latest record.
curl -X GET "https://localhost:9200/testindex/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query" : {
"term" : { "metadata.name" : "866425030270849" }
},
"sort": [
{ "devicedata.metadata.ts": "desc" }
],
"size": 1
}
'
But, I am unable to sort the recent record. Please help!
devicedata in query is the nested object of metadata.
I'm playing with curl querying elasticsearch db through my mac console. But I have troubles to execute more complex searches. So far I can query for match_all like this:
curl -XGET 'localhost:9200/products/fashion/_search?pretty' -d'
{
"query" : { "match_all" : {} }
}'
And I receive the following data:
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 915503,
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "fashion",
"_id" : "57d49ee494efcfdfe0f3abfe",
"_score" : 1.0,
"_source" : {
"doc" : {
"id" : "57d49ee494efcfdfe0f3abfe",
"name" : "Shorts",
"price" : {
"value" : 35
}
}
}
},
...........
}
I don't have problems to request a mapping like this:
curl -XGET 'localhost:9200/products/_mapping/fashion?pretty'
And the result for price is:
.......
"price" : {
"properties" : {
"currency" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "long"
}
}
},
....
But all my attempts to query with filter on "price.value" did not hit.
curl -XGET 'localhost:9200/products/fashion/_search?pretty' -d'
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"price.value" : 35
}
}
}
}
}'
{
"took" : 26,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
This query I took from elasticsearch guide
I ran out of ideas and examples how to write this query to return what I obviously have in database. As you might noticed I have at least one document with price.value = 35
That's because your price field is within another field named doc, so you need to query doc.price.value like this:
curl -XPOST 'localhost:9200/products/fashion/_search?pretty' -d'
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"doc.price.value" : 35
}
}
}
}
}'
I am having Movies that belong to a genre and have multiple ratings. With ElasticSearch, I want to do a faceted search on Genres first, and then Ratings.
I was reading about the idea here: http://www.elasticsearch.org/guide/reference/api/search/facets/
But I am confused how to understand the output of this Curl query:
curl -X POST "http://localhost:9200/movies/_search?pretty=true" -d '
{
"query" : { "query_string" : {"query" : "T*"} },
"facets" : {
"categories" : { "terms" : {"field" : "categories"} }
}
}
'
{
"took" : 35,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [ {
"_index" : "movies",
"_type" : "movie",
"_id" : "13",
"_score" : 1.0, "_source" : {"category_id":2,"created_at":"2013-05-03T16:40:21Z","description":null,"title":"Tiny Plastic Men","updated_at":"2013-05-03T16:40:21Z","user_id":null}
}, {
"_index" : "movies",
"_type" : "movie",
"_id" : "32",
"_score" : 1.0, "_source" : {"category_id":14,"created_at":"2013-05-03T16:55:02Z","description":null,"title":"The Extreme Truth","updated_at":"2013-05-03T16:55:02Z","user_id":null}
}, {
"_index" : "movies",
"_type" : "movie",
"_id" : "39",
"_score" : 1.0, "_source" : {"category_id":7,"created_at":"2013-05-03T16:55:02Z","description":null,"title":"A Time of Day","updated_at":"2013-05-03T16:55:02Z","user_id":null}
} ]
},
"facets" : {
"categories" : {
"_type" : "terms",
"missing" : 3,
"total" : 0,
"other" : 0,
"terms" : [ ]
}
}
I am having some movies that start with a 'T', but additionally I would expect movies from the Genre/Category 'Thriller'.
Therefore, what can I read from the JSON above?
It seems like your facet does not match any fields in your document you should probably use:
curl -X POST "http://localhost:9200/movies/_search?pretty=true" -d '
{
"query" : { "query_string" : {"query" : "T*"} },
"facets" : {
"categories" : { "terms" : {"field" : "category_id"} }
}
}
'
then you sould get a list of category_id and a count of documents in each category_id
Facets are deprecated. See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-facets.html
Better alternative is to use aggregations: https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-aggregations.html