ElasticSearch highlight with filter query - elasticsearch

I have the following query, but highlight is not working.
{
"query": {
"filtered" : {
"filter" : {
"or" : {
"filters" : [
{
"query": {
"multi_match":{
"query":"time",
"fields":[
"display_name_en","display_name_pa","display_name_pr",
"icon_class","in_sidemenu","model_name","name",
"table_name"
],
"operator":"OR"
}
}
},
{
"terms":{
"created_by.id":["11","13","14","16"],
"_name" : "created_by"
}
},
{
"range":{
"created_at":{
"gte":"2016-01-27",
"lte":"2016-03-21",
"format":"YYYY-MM-dd"
}
}
}
],
"_name" : "or"
}
}
}
},
"highlight": {
"fields" : {
"name" : {}
}
}
}
And the result is like this:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "promote_kmp",
"_type": "resources",
"_id": "569e0d84684cc",
"_score": 1,
"_source": {
"id": 106,
"name": "Last time First Update",
"display_name_en": "Last time",
"display_name_pr": "Last time",
"display_name_pa": "Last time",
"table_name": "Last time",
"model_name": "Last time",
"in_sidemenu": "0",
"icon_class": "Last time",
"created_at": "2016-01-18 09:40:51",
"created_by": null,
"updated_at": "2016-01-19 14:48:44",
"updated_by": {
"id": 6,
"first_name": "Laili",
"last_name": "Hamta",
"last_activity": "2016-01-19 14:48:44",
"roles": [
{
"id": 1,
"name": "admin",
"created_at": "2015-09-06 15:19:15",
"updated_at": "2015-09-06 15:19:15",
"pivot": {
"user_id": 6,
"role_id": 1
}
}
]
}
},
"matched_queries": [
"or"
]
}
]
}
}
As you see there is no any highlight keyword inside result, So what is the mistake with this query, and why highlight is not working? But if I put the multi_match part before filter:{} it is working, and on that case how I can use with or operator?
for any help thanks.

The problem with query is that you are only filtering the results, highlight works on queries only. You can also notice that every document has score of 1 because of applying only filters. You need to rewrite your query as something like this
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "time",
"fields": [
"display_name_en",
"display_name_pa",
"display_name_pr",
"icon_class",
"in_sidemenu",
"model_name",
"name",
"table_name"
]
}
},
{
"terms": {
"created_by.id": [
"11",
"13",
"14",
"16"
],
"_name": "created_by"
}
},
{
"range": {
"created_at": {
"gte": "2016-01-27",
"lte": "2016-03-21",
"format": "YYYY-MM-dd"
}
}
}
]
}
},
"highlight": {
"fields": {}
}
}
convert or filters to bool should clause and highlighting will work now.

Related

Elasticsearch - Is it possible to collapse first then aggregate data of a nested field?

I am using Elasticsearch and I want to group our results by a specific field, returning top n documents per group. The document have a nested filed and I want to aggregate all the documents' nested field for each group.
Example
I have 5 documents and each have a groupId and also a nested field peoples. I want group these documents by the groupId. And then for each group, I want to get top 2 people(some documents may contain same people).
PUT test/_mapping
{
"properties": {
"groupId":{
"type":"keyword"
},
"id":{
"type":"keyword"
},
"name":{
"type":"text"
},
"people":{
"type":"nested",
"properties":{
"email":{
"type":"keyword"
}
}
}
}
}
PUT test/_doc/1
{
"name": "docs1",
"groupId": "1",
"people":[{
"email":"people1#test.com"
}]
}
PUT test/_doc/2
{
"name": "docs2",
"groupId": "1",
"people":[{
"email":"people2.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/3
{
"name": "docs3",
"groupId": "2",
"people":[{
"email":"people3.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/4
{
"name": "docs4",
"groupId": "1",
"people":[{
"email":"people4.1#test.com"
},
{
"email":"people4.2#test.com"
}]
}
PUT test/_doc/5
{
"name": "docs5",
"groupId": "3",
"people":[{
"email":"people5.1#test.com"
},
{
"email":"people5.2#test.com"
}]
}
Search query
GET test/_search
{
"collapse": {
"field": "groupId",
"inner_hits": {
"name":"inner",
"size": 2
}
},
"sort": [
{
"groupId": {
"order": "asc"
}
}
],
"size": 2,
"from": 0
}
Result
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": null,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
},
"fields": {
"groupId": [
"1"
]
},
"sort": [
"1"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": 0,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
}
},
{
"_index": "test",
"_id": "2",
"_score": 0,
"_source": {
"name": "docs2",
"groupId": "1",
"people": [
{
"email": "people2.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
},
{
"_index": "test",
"_id": "3",
"_score": null,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
},
"fields": {
"groupId": [
"2"
]
},
"sort": [
"2"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "3",
"_score": 0,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
}
]
}
}
Expecting is to aggregate a groupPeople field for each group and it contains top n people of that group(should not affected by the inner_hit size, like for groupId=1, it contains 3 documents and 5 people).
The query that you're looking for is this one:
POST test/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId",
"size": 10
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}
If you need pagination, you can achieve the same using the composite aggregation:
POST test/_search
{
"size": 0,
"aggs": {
"pages": {
"composite": {
"sources": [
{
"groups": {
"terms": {
"field": "groupId"
}
}
}
]
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}

Elasticsearch - Nested field sorting

I have an index defined by the following :
{
"mappings": {
"properties": {
"firstName": {
"type": "keyword"
},
"lastName": {
"type": "keyword"
},
"affiliations": {
"type": "nested",
"properties": {
"organisation": {
"type": "keyword"
},
"team": {
"type": "keyword"
},
"dateBeginning": {
"type": "date",
"format": "yyyy-MM-dd"
},
"dateEnding": {
"type": "date",
"format": "yyyy-MM-dd"
},
"country": {
"type": "keyword"
}
}
}
}
}
}
Basically, for each researcher (researchers is how I named my index) I want to sort the the affiliations by dateBeginning, in descending order. I've read about inner hits in the EL official doc, and not being exactly sure how it works I've tried this for researcher with _id : 3 :
{
"query": {
"nested": {
"path": "affiliations",
"query": {
"match": { "_id": 3 }
},
"inner_hits": {
"sort" : [
{
"affiliations.dateBeginning" : {
"order" : "desc",
"nested": {
"path": "affiliations",
"filter": {
"term": { "_id": 3 }
}
}
}
}
]
}
}
}
}
And it doesn't really work.
Having two affiliation for researchers with _id : 3, with one dateBeginning set on 2015-06-30, and the other on 2017-06-30. So I've tried this also :
{
"sort" : [
{
"affiliations.dateBeginning" : {
"order" : "desc",
"nested": {
"path": "affiliations"
}
}
}
],
"query": {
"nested": {
"path": "affiliations",
"query": {
"match": { "_id": 3 }
}
}
}
}
And it doesn't sort the affiliations by dateBeginning.
I've also tried to do it with the SQL API (since I'm more familiar with SQL language), and still, I can't get the data I want.
So I'm quite new to ElasticSearch, I'm using version 7.10, and I don't know what else to do.
Any suggestions about what I'm doing wrong here ?
EDIT
here's an example of a document from that index:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [{
"_index": "researchers",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"firstName": "Kimmich",
"lastName": "Yoshua",
"affiliations": [{
"organisation": "University of Ottawa",
"team": "Neural Network Elite Team",
"dateBeginning": "2015-06-30",
"datEnding": "2017-01-31",
"country": "Canada"
},
{
"organisation": "University of Montréal",
"team": "Picture processing team",
"dateBeginning": "2017-06-30",
"dateEnding": null,
"country": "Canada"
}
]
}
}]
}
}
Once you're inside the nested query, the inner hits don't need the extra nested query. Remove it and the sort will work properly:
{
"query": {
"nested": {
"path": "affiliations",
"query": {
"match": {
"_id": 3
}
},
"inner_hits": {
"sort": [
{
"affiliations.dateBeginning": {
"order": "desc"
}
}
]
}
}
}
}
Note that this wouldn't sort the top-level hits -- only the inner hits.
But you can sort on the top level by the values of affiliations.dateBeginning like so:
POST researchers/_search
{
"sort": [
{
"affiliations.dateBeginning": {
"order": "desc",
"nested_path": "affiliations"
}
}
]
}
but note that the syntax is now slightly different: instead of path we're saying nested_path.

How to filter nested objects on a should query?

I have my mappings as below and I am doing a bool should query on name and other properties as shown below but what I need is that I want to filter CustomerPrices by CustomerId on response.
Each products have same CustomerIds so for eaxample;
product1 -CustomerPrice( CustomerId :1234 -Price:4)
CustomerPrice( CustomerId :567-Price:5)
.
.
Product2 - CustomerPrice(CustomerId :1234 -Price:8)
CustomerPrice(CustomerId :567-Price:10)
.
.
So according to that when I query Product1, response should have only customerPrice for customerId:1234
{
"Product": {
"properties": {
"CustomerPrices": {
"type": "nested",
"properties": {
"Price": {
"store": true,
"type": "float"
},
"CustomerId": {
"type": "integer"
}
}
},
"Name": {
"index": "not_analyzed",
"store": true,
"type": "string"
}
}
}
}
I tried following query but this is not filtering nested objects. I guess it filters product objects as it makes sense because all products have customerId:1234
"query":{
"bool":{
"should":[
{
"multi_match":{
"type":"best_fields",
"query":"product 1",
"fields":[
"Name^7"]
}
},
{
"multi_match":{
"type":"best_fields",
"query":"product 1",
"operator":"and",
"fields":[
"Code^10",
"ShortDescription^6"]
}
},
{
"nested":{
"query":{
"term":{
"CustomerPrices.CustomerId":{
"value":1234
}
}
},
"path":"CustomerPrices"
}
}]
}
},
I've spent some time on your question since it was interesting how this can be achieved and the only solution I found for now is relying on the inner_hits which gives the exact nested object the match was on. I've also deactivated the _source which isn't used anymore.
So given your mapping and having 2 products like:
PUT product/Product/product1
{
"CustomerPrices": [
{
"CustomerId": 1234,
"Price": 4
},
{
"CustomerId": 567,
"Price": 5
}
],
"Name": "John"
}
PUT product/Product/product2
{
"CustomerPrices": [
{
"CustomerId": 1234,
"Price": 8
},
{
"CustomerId": 567,
"Price": 10
}
],
"Name": "Bob"
}
When running the following query: (Used must just to see 1 result, works with should as well)
GET product/_search
{
"_source": false,
"query": {
"bool": {
"must": [
{ "match": { "Name": "Bob"}}
],
"filter": [
{
"nested" : {
"path" : "CustomerPrices",
"score_mode" : "avg",
"query" : {
"bool" : {
"should" : [
{ "match" : {"CustomerPrices.CustomerId" : 1234}}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
I was able to get the result where only "Price" from customer with id 1234 was present:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "product",
"_type": "Product",
"_id": "product2",
"_score": 0.2876821,
"inner_hits": {
"CustomerPrices": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "product",
"_type": "Product",
"_id": "product2",
"_nested": {
"field": "CustomerPrices",
"offset": 0
},
"_score": 1,
"_source": {
"CustomerId": 1234,
"Price": 8
}
}
]
}
}
}
}
]
}
}
Couldn't find an official way of returning partial results of the document by only having the matched nested object. Maybe something that we need to inform elasticsearch guys about to consider for some next releases. Hope it helps you.

elasticsearch aggregation result is 0

The following is my query for elasticsearch:
GET index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "xx"
}
},
{
"term": {
"level": "level2"
}
},
{
"or": [
{
"term": {
"type": "yyy"
}
},
{
"term": {
"type": "zzzz"
}
}
]
}
]
}
}
},
"aggs": {
"variable": {
"stats": {
"field": "score"
}
}
}
}
But the agg result is as follows:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 68,
"max_score": 0,
"hits": []
},
"aggregations": {
"variable": {
"count": 30,
"min": 0,
"max": 0,
"avg": 0,
"sum": 0
}
}
}
Why the min,max etc are 0. But value is there for score like(0.18,0.25,etc..). Also in mapping the type for score is long. Please help me to solve this. Thanks in advance.
Edit:
value in index:
"score": 0.18
Single document:
{
"_index": "index",
"_type": "ppppp",
"_id": "n0IiTEd2QFCnJUZOSiNu1w",
"_score": 1,
"_source": {
"name_2": "aaa",
"keyid": "bbbb",
"qqq": "cccc",
"level": "level2",
"type": "kkk",
"keytype": "Year",
"org_id": 25,
"tempid": "113",
"id_2": "561",
"name_1": "xxxxx",
"date_obj": [
{
"keyid": "wwwww",
"keytype": "Year",
"value": 21.510617952000004,
"date": "2015",
"id": "ggggggg",
"productid": ""
},
{
"keyid": "rrrrrr",
"keytype": "Year",
"value": 0.13,
"date": "2015",
"id": "iiiiii",
"productid": ""
}
],
"date": "2015",
"ddddd": 21.510617952000004,
"id_1": "29",
"leveltype": "nnnn",
"tttt": 0.13,
"score": 0.13 ------------------->problem
}
}
Mapping:
curl -XPUT ip:9200/index -d '{
"mappings" : {
"places" : {
"properties" : {
"score" : { "type" : "float"}
}
}
}
}'
The fix should be as simple as changing the type of the score field to float (or double) instead of long. long is an integer type and 0.18 will be indexed as 0 under the hood.
"score" : {
"type" : "float",
"null_value" : 0.0
}
Note that you'll need to reindex your data after making the mapping change.

elasticsearch retrieving nested objects - not individual fields

When I use the "fields" option of a query I get a separate array for each field. Is it possible to get back the "complete" nested objects rather than just the field?
In the following example if I try to do "fields": ["cast"] it tells me that cast is not a leaf node. And if I do "fields": ["cast.firstName", "cast.middleName", "cast.lastName"] it returns 3 arrays.
Is there another way of retrieving just a partial amount of the document? Or is there a way to "reassemble" the separate fields into a complete "cast" object?
Example Index and Data:
POST /movies
{
"mappings": {
"movie": {
"properties": {
"cast": {
"type": "nested"
}
}
}
}
}
POST /movies/movie
{
"title": "The Matrix",
"cast": [
{
"firstName": "Keanu",
"lastName": "Reeves",
"address": {
"street": "somewhere",
"city": "LA"
}
},
{
"firstName": "Laurence",
"middleName": "John",
"lastName": "Fishburne",
"address": {
"street": "somewhere else",
"city": "NYC"
}
}
]
}
Example Query:
GET /movies/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "cast",
"filter": {
"bool": {
"must": [
{ "term": { "firstName": "laurence"} },
{ "term": { "lastName": "fishburne"} }
]
}
}
}
}
}
},
"fields": [
"cast.address.city",
"cast.firstName",
"cast.middleName",
"cast.lastName"
]
}
Result of example query:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "movies",
"_type": "movie",
"_id": "AU1JeyBseLgwMCOuOLsZ",
"_score": 1,
"fields": {
"cast.firstName": [
"Keanu",
"Laurence"
],
"cast.lastName": [
"Reeves",
"Fishburne"
],
"cast.address.city": [
"LA",
"NYC"
],
"cast.middleName": [
"John"
]
}
}
]
}
}
I think this is what you're looking for:
POST /movies/_search
{
"_source": {
"include": [
"cast.address.city",
"cast.firstName",
"cast.middleName",
"cast.lastName"
]
},
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "cast",
"filter": {
"bool": {
"must": [
{
"term": {
"firstName": "laurence"
}
},
{
"term": {
"lastName": "fishburne"
}
}
]
}
}
}
}
}
}
}
Result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "movies",
"_type": "movie",
"_id": "AU1PIJgBA_0Cyshym7-m",
"_score": 1,
"_source": {
"cast": [
{
"lastName": "Reeves",
"address": {
"city": "LA"
},
"firstName": "Keanu"
},
{
"middleName": "John",
"lastName": "Fishburne",
"address": {
"city": "NYC"
},
"firstName": "Laurence"
}
]
}
}
]
}
}
You can also choose to exclude fields instead of including or both, see documentation here: http://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html

Resources