The following is my query for elasticsearch:
GET index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "xx"
}
},
{
"term": {
"level": "level2"
}
},
{
"or": [
{
"term": {
"type": "yyy"
}
},
{
"term": {
"type": "zzzz"
}
}
]
}
]
}
}
},
"aggs": {
"variable": {
"stats": {
"field": "score"
}
}
}
}
But the agg result is as follows:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 68,
"max_score": 0,
"hits": []
},
"aggregations": {
"variable": {
"count": 30,
"min": 0,
"max": 0,
"avg": 0,
"sum": 0
}
}
}
Why the min,max etc are 0. But value is there for score like(0.18,0.25,etc..). Also in mapping the type for score is long. Please help me to solve this. Thanks in advance.
Edit:
value in index:
"score": 0.18
Single document:
{
"_index": "index",
"_type": "ppppp",
"_id": "n0IiTEd2QFCnJUZOSiNu1w",
"_score": 1,
"_source": {
"name_2": "aaa",
"keyid": "bbbb",
"qqq": "cccc",
"level": "level2",
"type": "kkk",
"keytype": "Year",
"org_id": 25,
"tempid": "113",
"id_2": "561",
"name_1": "xxxxx",
"date_obj": [
{
"keyid": "wwwww",
"keytype": "Year",
"value": 21.510617952000004,
"date": "2015",
"id": "ggggggg",
"productid": ""
},
{
"keyid": "rrrrrr",
"keytype": "Year",
"value": 0.13,
"date": "2015",
"id": "iiiiii",
"productid": ""
}
],
"date": "2015",
"ddddd": 21.510617952000004,
"id_1": "29",
"leveltype": "nnnn",
"tttt": 0.13,
"score": 0.13 ------------------->problem
}
}
Mapping:
curl -XPUT ip:9200/index -d '{
"mappings" : {
"places" : {
"properties" : {
"score" : { "type" : "float"}
}
}
}
}'
The fix should be as simple as changing the type of the score field to float (or double) instead of long. long is an integer type and 0.18 will be indexed as 0 under the hood.
"score" : {
"type" : "float",
"null_value" : 0.0
}
Note that you'll need to reindex your data after making the mapping change.
Related
I am using elaticsearch v7.1.0 to perform composite aggregation to paginate my results.
The data in the index I am querying and aggregating looks like this.
{
"sequence": "SEQ-A123",
"timestamp": "2022-05-11T12:26:54Z",
"owner": "b96e1abb08d44a6a9871f567aa392167",
"serialNo": "A5645",
"value": 45,
"ctags": [
{
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
},
{
"sequence": "SEQ-B123",
"timestamp": "2022-05-11T12:26:54Z",
"owner": "b96e1abb08d44a6a9871f567aa392165",
"serialNo": "A8456",
"value": 87,
"ctags": [
{
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
},
{
"sequence": "SEQ-C123",
"timestamp": "2022-05-11T12:26:54Z",
"owner": "b96e1abb08d44a6a9871f567aa392165",
"serialNo": "A59",
"value": 87,
"ctags": [
{
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}, ...
The Query I am executing on elasticsearch is this.
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2022-05-01T00:00:00.000Z",
"lte": "2022-05-30T23:59:59.999Z"
}
}
},
{
"terms": {
"sequence.keyword": [
"SEQ-A123",
"SEQ-B123"
]
}
}
],
"must_not": [
{
"term": {
"serialNo.keyword": "test"
}
}
]
}
},
"size": 0,
"aggs": {
"sequence": {
"composite": {
"sources": [
{
"bkt_sequence": {
"terms": {
"field": "sequence.keyword"
}
}
}
],
"after": {
"bkt_sequence": ""
},
"size": 2
},
"aggs": {
"serialNo": {
"terms": {
"field": "serialNo.keyword"
},
"aggs": {
"usageStats": {
"stats": {
"field": "value"
}
},
"ctags": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"owner",
"ctags"
]
}
}
}
}
}
}
}
}
}
The result I am getting against this query looks like this.
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 94,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"sequence": {
"after_key": {
"bkt_sequence": "SEQ-B123"
},
"buckets": [
{
"key": {
"bkt_sequence": "SEQ-A123"
},
"doc_count": 47,
"serialNo": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37,
"buckets": [
"0": {
"key": "A5645",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "1",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392167",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 45,
"max": 45,
"avg": 45,
"sum": 45
}
},
"1": {
"key": "A5646",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "27",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392169",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 85,
"max": 85,
"avg": 85,
"sum": 85
}
},
"2": {
...
},
"3": {
...
},
"4": {
...
},
"5": {
...
},
"6": {
...
},
"7": {
...
},
"8": {
...
},
"9": {
...
}
]
}
},
{
"key": {
"bkt_sequence": "SEQ-B123"
},
"doc_count": 47,
"serialNo": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 37,
"buckets": [
"0": {
"key": "A8456",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "48",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392167",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 45,
"max": 45,
"avg": 45,
"sum": 45
}
},
"1": {
"key": "A7590",
"doc_count": 1,
"ctags": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 2,
"hits": [
"0": {
"_index": "seq-serial",
"_type": "_doc",
"_id": "74",
"_score": 2,
"_source": {
"owner": "b96e1abb08d44a6a9871f567aa392169",
"ctags": [
"0": {
"name": "project",
"value": "cd8041f817634e7784b8c0cb5b069d4b"
}
]
}
}
]
}
},
"usageStats": {
"count": 1,
"min": 85,
"max": 85,
"avg": 85,
"sum": 85
}
},
"2": {
...
},
"3": {
...
},
"4": {
...
},
"5": {
...
},
"6": {
...
},
"7": {
...
},
"8": {
...
},
"9": {
...
}
]
}
}
]
}
}
}
As you can see there are total 94 documents that are hit with this query.
47 belongs to the SEQ-A123 bucket and other 47 belongs to SEQ-B123 bucket but out of 47 only 10 documents are returned in the response.
How can I get all 47 documents in the result and still use pagination at sequence field level?
Terms Aggregation by default returns only top 10 documents.
Just need to add size in the terms aggregation in the aggregation serialNo.
Below is how your query would look like:
POST test_index/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2022-05-01T00:00:00.000Z",
"lte": "2022-05-30T23:59:59.999Z"
}
}
},
{
"terms": {
"sequence.keyword": [
"SEQ-A123",
"SEQ-B123"
]
}
}
],
"must_not": [
{
"term": {
"serialNo.keyword": "test"
}
}
]
}
},
"size": 0,
"aggs": {
"sequence": {
"composite": {
"sources": [
{
"bkt_sequence": {
"terms": {
"field": "sequence.keyword"
}
}
}
],
"after": {
"bkt_sequence": ""
},
"size": 2
},
"aggs": {
"serialNo": {
"terms": {
"field": "serialNo.keyword",
"size": 100 <----------- Note this here
},
"aggs": {
"usageStats": {
"stats": {
"field": "value"
}
},
"ctags": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"owner",
"ctags"
]
}
}
}
}
}
}
}
}
}
I have the following mapping to my phrase suggester:
{
"settings": {
"analysis": {
"analyzer": {
"suggests_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"shingle_filter"
],
"type": "custom"
}
},
"filter": {
"shingle_filter": {
"min_shingle_size": 2,
"max_shingle_size": 6,
"type": "shingle"
}
}
}
},
"mappings": {
"sample_data": {
"properties": {
"name": {
"type": "string",
"analyzer": "suggests_analyzer"
}
}
}
}
}
I have "lung cancer", "colorectal cancer", "breast cancer" indexed in my index. But when I query for a mispelt query where both words are mispelt like "lhng cancar" returns zero results when I use the collate functionality. My sample query is as follows.
{
"suggest": {
"text": "lhng cancar",
"simple_phrase": {
"phrase": {
"field": "name",
"size": 5,
"real_word_error_likelihood": 0.95,
"max_errors": 0.5,
"direct_generator": [
{
"field": "name",
"suggest_mode": "always",
"size": 5
}
],
"collate": {
"query": {
"inline": {
"match_phrase": {
"{{field_name}}": "{{suggestion}}"
}
}
},
"params": {
"field_name": "name"
},
"prune": false
}
}
}
},
"size": 0
}
The response to the above query is:
{
"took": 17,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1868381,
"max_score": 0,
"hits": []
},
"suggest": {
"simple_phrase": [
{
"text": "lhng cancar",
"offset": 0,
"length": 11,
"options": []
}
]
}
}
What changes do need to do in the query so that I get the expected result as "lung cancer" in the suggestions?
You have to raise max_errors to 0.8 or more.
Same answer is given here
ElasticSearch - Phrase Suggestor
Raising the parameter of max_errors: 2 solved my problem.
I am new to elastic search and requesting some help.
Basically I have some 2 million documents in my elastic search and the documents look like below:
{
"_index": "flipkart",
"_type": "PSAD_ThirdParty",
"_id": "430001_MAM_2016-02-04",
"_version": 1,
"_score": 1,
"_source": {
"metrics": [
{
"id": "Metric1",
"value": 70
},
{
"id": "Metric2",
"value": 90
},
{
"id": "Metric3",
"value": 120
}
],
"primary": true,
"ticketId": 1,
"pliId": 206,
"bookedNumbers": 15000,
"ut": 1454567400000,
"startDate": 1451629800000,
"endDate": 1464589800000,
"tz": "EST"
}
}
I want to write an aggregation query which satisfies below conditions:
1) First query based on "_index", "_type" and "pliId".
2) Do aggregation sum on metrics.value based on metrics.id = "Metric1".
Basically I need to query records based on some fields and aggregate sum on a particular metrics value based on metrics id.
Please can you help me in getting my query right.
Your metrics field needs to be of type nested:
"metrics": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
}
}
}
If you want Metric1 to match, meaning upper-case letter, then as you see above the id needs to be not_analyzed.
Then, if you only want metrics.id = "Metric1" aggregations, you need something like this:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"pliId": 206
}
}
]
}
}
}
},
"aggs": {
"by_metrics": {
"nested": {
"path": "metrics"
},
"aggs": {
"metric1_only": {
"filter": {
"bool": {
"must": [
{
"term": {
"metrics.id": {
"value": "Metric1"
}
}
}
]
}
},
"aggs": {
"by_metric_id": {
"terms": {
"field": "metrics.id"
},
"aggs": {
"total_delivery": {
"sum": {
"field": "metrics.value"
}
}
}
}
}
}
}
}
}
}
Created new index:
Method : PUT ,
URL : http://localhost:9200/google/
Body:
{
"mappings": {
"PSAD_Primary": {
"properties": {
"metrics": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
},
"value": {
"type": "integer",
"index": "not_analyzed"
}
}
}
}
}
}
}
Then I inserted some 200 thousand documents and than ran the query and it worked.
Response:
{
"took": 34,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "google",
"_type": "PSAD_Primary",
"_id": "383701291_MAM_2016-01-06",
"_score": 1,
"_source": {
"metrics": [
{
"id": "Metric1",
"value": 70
},
{
"id": "Metric2",
"value": 90
},
{
"id": "Metric3",
"value": 120
}
],
"primary": true,
"ticketId": 1,
"pliId": 221244,
"bookedNumbers": 15000,
"ut": 1452061800000,
"startDate": 1451629800000,
"endDate": 1464589800000,
"tz": "EST"
}
}
]
},
"aggregations": {
"by_metrics": {
"doc_count": 3,
"metric1_only": {
"doc_count": 1,
"by_metric_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Metric1",
"doc_count": 1,
"total_delivery": {
"value": 70
}
}
]
}
}
}
}
}
This is my document/mapping with a nested prices array:
{
"name": "Foobar",
"type": 1,
"prices": [
{
"date": "2016-03-22",
"price": 100.41
},
{
"date": "2016-03-23",
"price": 200.41
}
]
}
Mapping:
{
"properties": {
"name": {
"index": "not_analyzed",
"type": "string"
},
"type": {
"type": "byte"
},
"prices": {
"type": "nested",
"properties": {
"date": {
"format": "dateOptionalTime",
"type": "date"
},
"price": {
"type": "double"
}
}
}
}
}
I use a top_hits aggregation to get the min price of the nested price array. I also have to filter the prices by date. Here is the query and the response:
POST /index/type/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"prices": {
"nested": {
"path": "prices"
},
"aggs": {
"date_filter": {
"filter": {
"range": {
"prices.date": {
"gte": "2016-03-21"
}
}
},
"aggs": {
"min": {
"top_hits": {
"sort": {
"prices.price": {
"order": "asc"
}
},
"size": 1
}
}
}
}
}
}
}
}
Response:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": [
]
},
"aggregations": {
"prices": {
"doc_count": 4,
"date_filter": {
"doc_count": 4,
"min": {
"hits": {
"total": 4,
"max_score": null,
"hits": [
{
"_index": "index",
"_type": "type",
"_id": "4225796ALL2016061541031",
"_nested": {
"field": "prices",
"offset": 0
},
"_score": null,
"_source": {
"date": "2016-03-22",
"price": 100.41
},
"sort": [
100.41
]
}
]
}
}
}
}
}
}
Is there a way to get the parent source document (or some fields from it) with _id="4225796ALL2016061541031" in the response (e.g. name)? A second query is not an option.
Instead of applying aggregations use query and inner_hits like :
{
"query": {
"nested": {
"path": "prices",
"query": {
"range": {
"prices.date": {
"gte": "2016-03-21"
}
}
},
"inner_hits": {
"sort": {
"prices.price": {
"order": "asc"
}
},
"size": 1
}
}
}
}
Fetch data of parent_documentdata from _source and actual data from inner_hits.
Hope it helps
I have the following query, but highlight is not working.
{
"query": {
"filtered" : {
"filter" : {
"or" : {
"filters" : [
{
"query": {
"multi_match":{
"query":"time",
"fields":[
"display_name_en","display_name_pa","display_name_pr",
"icon_class","in_sidemenu","model_name","name",
"table_name"
],
"operator":"OR"
}
}
},
{
"terms":{
"created_by.id":["11","13","14","16"],
"_name" : "created_by"
}
},
{
"range":{
"created_at":{
"gte":"2016-01-27",
"lte":"2016-03-21",
"format":"YYYY-MM-dd"
}
}
}
],
"_name" : "or"
}
}
}
},
"highlight": {
"fields" : {
"name" : {}
}
}
}
And the result is like this:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "promote_kmp",
"_type": "resources",
"_id": "569e0d84684cc",
"_score": 1,
"_source": {
"id": 106,
"name": "Last time First Update",
"display_name_en": "Last time",
"display_name_pr": "Last time",
"display_name_pa": "Last time",
"table_name": "Last time",
"model_name": "Last time",
"in_sidemenu": "0",
"icon_class": "Last time",
"created_at": "2016-01-18 09:40:51",
"created_by": null,
"updated_at": "2016-01-19 14:48:44",
"updated_by": {
"id": 6,
"first_name": "Laili",
"last_name": "Hamta",
"last_activity": "2016-01-19 14:48:44",
"roles": [
{
"id": 1,
"name": "admin",
"created_at": "2015-09-06 15:19:15",
"updated_at": "2015-09-06 15:19:15",
"pivot": {
"user_id": 6,
"role_id": 1
}
}
]
}
},
"matched_queries": [
"or"
]
}
]
}
}
As you see there is no any highlight keyword inside result, So what is the mistake with this query, and why highlight is not working? But if I put the multi_match part before filter:{} it is working, and on that case how I can use with or operator?
for any help thanks.
The problem with query is that you are only filtering the results, highlight works on queries only. You can also notice that every document has score of 1 because of applying only filters. You need to rewrite your query as something like this
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "time",
"fields": [
"display_name_en",
"display_name_pa",
"display_name_pr",
"icon_class",
"in_sidemenu",
"model_name",
"name",
"table_name"
]
}
},
{
"terms": {
"created_by.id": [
"11",
"13",
"14",
"16"
],
"_name": "created_by"
}
},
{
"range": {
"created_at": {
"gte": "2016-01-27",
"lte": "2016-03-21",
"format": "YYYY-MM-dd"
}
}
}
]
}
},
"highlight": {
"fields": {}
}
}
convert or filters to bool should clause and highlighting will work now.