I am new to ElasticSearch and can't quite figure out what I want is possible or not.
I can query like this:
GET entity/_search
{
"query": {
"bool": {
"must": [
{ "match": { "searchField": "searchValue" }}
]
}
},
"aggs" : {
"uniq_Id" : {
"terms" : { "field" : "Id", "size":500 }
}
}
}
and it will return top search results and the term aggregation buckets. But ideally what I would like for the search results to return, is only one (perhaps the top one, does not matter) for each of unique Id's defined in the aggregation terms.
You can make use of Terms Aggregation along with the Top Hits Aggregation to give you the result you are looking for.
Now once you do that, specify the size as 1 in the Top Hits Aggregation
Based on your query I've created sample mapping,documents, aggregation query and the response for your reference.
Mapping:
PUT mysampleindex
{
"mappings": {
"mydocs": {
"properties": {
"searchField":{
"type": "text"
},
"Id": {
"type": "keyword"
}
}
}
}
}
Sample Documents:
POST mysampleindex/mydocs/1
{
"searchField": "elasticsearch",
"Id": "1000"
}
POST mysampleindex/mydocs/2
{
"searchField": "elasticsearch is awesome",
"Id": "1000"
}
POST mysampleindex/mydocs/3
{
"searchField": "elasticsearch is awesome",
"Id": "1001"
}
POST mysampleindex/mydocs/4
{
"searchField": "elasticsearch is pretty cool",
"Id": "1001"
}
POST mysampleindex/mydocs/5
{
"searchField": "elasticsearch is pretty cool",
"Id": "1002"
}
Query:
POST mysampleindex/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"searchField": "elasticsearch"
}
}
]
}
},
"aggs": {
"myUniqueIds": {
"terms": {
"field": "Id",
"size": 10
},
"aggs": {
"myDocs": {
"top_hits": { <---- Top Hits Aggregation
"size": 1 <---- Note this
}
}
}
}
}
}
Sample Response:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"myUniqueIds": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1000",
"doc_count": 2,
"myDocs": {
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "1",
"_score": 0.2876821,
"_source": {
"searchField": "elasticsearch",
"Id": "1000"
}
}
]
}
}
},
{
"key": "1001",
"doc_count": 2,
"myDocs": {
"hits": {
"total": 2,
"max_score": 0.25316024,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "3",
"_score": 0.25316024,
"_source": {
"searchField": "elasticsearch is awesome",
"Id": "1001"
}
}
]
}
}
},
{
"key": "1002",
"doc_count": 1,
"myDocs": {
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "5",
"_score": 0.2876821,
"_source": {
"searchField": "elasticsearch is pretty cool",
"Id": "1002"
}
}
]
}
}
}
]
}
}
}
Notice that I am not returning any bool results in the above, the search result you are looking for comes in the form of Top Hits Aggregation.
Hope this helps!
Related
I Have fields Category & Questions in the Table.
My Requirement is for the below mentioned 3 category against I need the questions which is tagged (SO I want the Category and Questions field in the query) by writing elastic search query
Category :
OLA
BNA
DRG
GET logstash-sdc-feedback/_search? { "_source":["Category.keyword"], "size": 5, "query":{ "bool": { "must": [ {"match":{"Category.keyword"": "OLA","BNA","DRG"}}
],
}
}, "aggs": { "MyBuckets": { "terms": { "field": "questions.keyword","Category.keyword" "order":{ "_count": "asc" }, "size": "5"
} } } }
You can use terms query along with terms aggregation, to achieve your use case.
Adding a working example
Index Data:
{
"category": "XYZ",
"question": "d"
}
{
"category": "OLA",
"question": "a"
}
{
"category": "BNA",
"question": "b"
}
{
"category": "DRG",
"question": "c"
}
Search Query:
{
"query": {
"bool": {
"must": {
"terms": {
"category.keyword": [
"OLA",
"BNA",
"DRG"
]
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "category.keyword"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"question"
]
},
"size": 1
}
}
}
}
}
}
Search Result:
"aggregations": {
"top_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "BNA", // note this
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"question": "b" // note this
}
}
]
}
}
},
{
"key": "DRG",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"question": "c"
}
}
]
}
}
},
{
"key": "OLA",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"question": "a"
}
}
]
}
}
}
]
}
}
I use: Elasticsearch 7.7 , Kibana 7.7
For example, lets take two indexes:
User index with simple mapping:
PUT /user_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"user_phone": { "type": "text" },
"name": { "type": "text" }
}
}
}
Check with simple mapping:
PUT /check_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"price": { "type": "integer" },
"goods_count": {"type": "integer"}
}
}
}
I want to build table visualization like that:
________________________________________________________________________
user_id | user_phone | average_price | sum_goods_count |
___________|_______________|_____________________|______________________
1 | 123 | 512 | 64 |
___________|_______________|_____________________|______________________
2 | 456 | 256 | 16 |
___________|_______________|_____________________|______________________
So my questions are:
Is it real?
Do I understand correctly that I need to query these two indexes, get a list of users, and then in a loop create shopping carts with checks?
First thing first, you should try to de-normalize data in ES as much as possible to get the best performance and capability offered by it, And I went through the samples provided by you and comments in the question and it seems it can be easily achieved in your use-case and shown in below example, by combining user and check index into single index.
Index mapping
{
"mappings": {
"properties": {
"user_id": {
"type": "text",
"fielddata": "true"
},
"price": {
"type": "integer"
},
"goods_count": {
"type": "integer"
}
}
}
}
Index Data:
With the index mapping defined above, index these three documents, where one document is having "user_id":"1" and 2 documents have "user_id":"2"
{
"user_id":"1",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":100,
"goods_count":200
}
Search Query:
Refer to ES official documentation on Terms Aggregation, Top Hits aggregation, Sum aggregation and Avg aggregation to get detailed explanation.
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "user_id"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"user_id"
]
}
}
},
"avg_price": {
"avg": {
"field": "price"
}
},
"goods_count": {
"sum": {
"field": "goods_count"
}
}
}
}
}
}
Search Result:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
]
},
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"user_id": "2"
}
},
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"user_id": "2"
}
}
]
}
},
"avg_price": {
"value": 300.0
},
"goods_count": {
"value": 300.0
}
},
{
"key": "1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"user_id": "1"
}
}
]
}
},
"avg_price": {
"value": 500.0
},
"goods_count": {
"value": 100.0
}
}
]
}
}
}
As you can see in the search results above, for "user_id":"2" the average price is (500+100)/2 = 300 and sum of goods_count is 100+200 = 300.
Similarly for "user_id":"1" the average price is 500/1 = 500 and sum of goods_count is 100.
I am trying to do a search within elasticsearch using the regexp filters. Following is my query:
{
"from": 0,
"size": 10,
"_source":["CODE"],
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"regexp" : {
"CODE" : {
"value" : "[0]?[0]?[0]?[0]?3410086456[0-9]?",
"flags_value" : 0,
"boost" : 20.0
}
}
},
{
"regexp" : {
"CODE" : {
"value" : "[0]?[0]?[0]?[0]?83560900204[0-9]?",
"flags_value" : 0,
"boost" : 20.0
}
}
}
]
}
},
{
"terms": {
"CODETYPE": [
"TYPE1", "TYPE2", "TYPE3"
]
}
}
]
}
}
}
Below is the result of the query:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 20.091797,
"hits": [
{
"_index": "index1",
"_type": "type1",
"_id": "142242",
"_score": 20.091797,
"_source": {
"CODE": "003410086456"
}
},
{
"_index": "index1",
"_type": "type1",
"_id": "375897",
"_score": 20.091797,
"_source": {
"CODE": "083560900204"
}
}
]
}
}
What I need to get additionally in my output is the input term against which each result has matched. Something like this:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 20.091797,
"hits": [
{
"_index": "index1",
"_type": "type1",
"_id": "142242",
"_score": 20.091797,
"_source": {
"CODE": "003410086456",
"INPUT": "3410086456"
}
},
{
"_index": "index1",
"_type": "type1",
"_id": "375897",
"_score": 20.091797,
"_source": {
"CODE": "083560900204",
"INPUT": "83560900204"
}
}
]
}
}
Notice the additional INPUT field above. That way I can map what pattern has mapped to which result. Is there any possibility in elasticsearch I can do this? I am currently unable to find any way of achieving this.
Appreciate your help on this. Let me know if I need to furnish any more information.
you could use highlighting, though it won't in _source, it would create a separate field highlight which gives the field value.
{
"from": 0,
"size": 10,
"_source": [
"CODE"
],
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"regexp": {
"CODE": {
"value": "[0]?[0]?[0]?[0]?3410086456[0-9]?",
"flags_value": 0,
"boost": 20
}
}
},
{
"regexp": {
"CODE": {
"value": "[0]?[0]?[0]?[0]?83560900204[0-9]?",
"flags_value": 0,
"boost": 20
}
}
}
]
}
},
{
"terms": {
"CODETYPE": [
"TYPE1",
"TYPE2",
"TYPE3"
]
}
}
]
}
},
"highlight": {
"fields": {
"CODE": {}
}
}
}
Refer: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-highlighting.html#search-request-highlighting
I cannot seem to aggregate my query results when using my custom query parser. I get a result set by these are not aggregated. When using a standard query parser like match everything turns out well.
What works:
GET pages/_search
{
"query": {
"match": {
"text": "binomial"
}
},
"aggs": {
"docs": {
"terms": {
"field": "rooturl"
}
}
}
}
returns a nice aggregated result:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 10,
"max_score": 11.11176,
"hits": [
...
{
"_index": "pages",
"_type": "doc",
"_id": "AVcq6z6lzDazctHi91RE",
"_score": 3.3503218,
"_source": {
"rooturl": "document",
"type": "equation",
"url": "document:poly",
"text": "coefficient"
}
},
{
"_index": "pages",
"_type": "doc",
"_id": "AVcq6z6xzDazctHi91RF",
"_score": 3.3503218,
"_source": {
"rooturl": document",
"type": "equation",
"url": "document:poly",
"text": "dot"
}
}
...
]
},
"aggregations": {
"docs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "document",
"doc_count": 10
}
]
}
}
}
But when using my custom query parser, The result is not aggregated.
Query:
GET pages/_search
{
"query": {
"my_custom_query_parser": {
"query": "binomial"
}
},
"aggs": {
"docs": {
"terms": {
"field": "rooturl"
}
}
}
}
Can anyone point me into the right direction?
I have data, that has an attribute like this
apiUrl:/REST/endpoint/123
Now I would like to show all the urls and I am trying to use an aggregate function (apiUrl.raw is not_analyzed part of the multifield):
POST /index/type/_search
{
"aggregations": {
"application": {
"terms": {
"field": "apiUrl.raw"
}
}
}
}
When running this query, no results get returned. What am I doing wrong? I would expect something along the lines (and the count of occurence):
/REST/api1/123
/REST/otherApi/345
Thanks!
Your query does return non-empty results. Compare and let us know what was the difference:
PUT index
PUT index/type/_mapping
{
"properties" : {
"apiUrl": {
"type": "multi_field",
"fields": {
"apiUrl": {"type":"string", "index":"analyzed"},
"raw": {"type":"string", "index":"not_analyzed"}
}
}
}
}
GET index/type/_mapping
PUT index/type/1
{
"apiUrl":"/REST/api1/123"
}
PUT index/type/2
{
"apiUrl":"/REST/otherApi/345"
}
GET index/type/_search?fields=apiUrl.raw
GET index/type/_search
{
"aggregations": {
"application": {
"terms": {
"field": "apiUrl.raw"
}
}
}
}
Response:
{
"took": 76,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "index",
"_type": "type",
"_id": "1",
"_score": 1,
"_source": {
"apiUrl": "/REST/api1/123"
}
},
{
"_index": "index",
"_type": "type",
"_id": "2",
"_score": 1,
"_source": {
"apiUrl": "/REST/otherApi/345"
}
}
]
},
"aggregations": {
"application": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/REST/api1/123",
"doc_count": 1
},
{
"key": "/REST/otherApi/345",
"doc_count": 1
}
]
}
}
}