Aggregation on filtered, nested inner_hits query in ElasticSearch

Aggregation on filtered, nested inner_hits query in ElasticSearch - elasticsearch

I'm only a few days new to ElasticSearch, and as a learning exercise have implemented a rudimentary job scraper that aggregates jobs from a few job listing sites and populates an index with some data for me to play with.
My index contains a document for each website that lists jobs. A property of each of these documents is a 'jobs' array, which contains an object for each job that exists on that site. I am considering indexing each job as its own document (especially since the ElasticSearch documentation says that inner_hits is an experimental feature) but for now, I am trying to see if I can accomplish what I want to do using the inner_hits and nested features of ElasticSearch.
I am able to query, filter, and return back only matching jobs. However, I am not sure how to apply the same inner_hits constraints to an aggregation.
This is my mapping:
{
"jobsitesIdx" : {
"mappings" : {
"sites" : {
"properties" : {
"createdAt" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"jobs" : {
"type" : "nested",
"properties" : {
"company" : {
"type" : "string"
},
"engagement" : {
"type" : "string"
},
"link" : {
"type" : "string",
"index" : "not_analyzed"
},
"location" : {
"type" : "string",
"fields" : {
"raw" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"title" : {
"type" : "string"
}
}
},
"jobscount" : {
"type" : "long"
},
"sitename" : {
"type" : "string"
},
"url" : {
"type" : "string"
}
}
}
}
}
}
This is a query and aggregate that I am trying (from Node.js):
client.search({
"index": 'jobsitesIdx,
"type": 'sites',
"body": {
"aggs" : {
"jobs" : {
"nested" : {
"path" : "jobs"
},
"aggs" : {
"location" : { "terms" : { "field" : "jobs.location.raw", "size": 25 } },
"company" : { "terms" : { "field" : "jobs.company.raw", "size": 25 } }
}
}
},
"query": {
"filtered": {
"query": {"match_all": {}},
"filter": {
"nested": {
"inner_hits" : { "size": 1000 },
"path": "jobs",
"query":{
"filtered": {
"query": { "match_all": {}},
"filter": {
"and": [
{"term": {"jobs.location": "york"}},
{"term": {"jobs.location": "new"}}
]
}
}
}
}
}
}
}
}
}, function (error, response) {
response.hits.hits.forEach(function(jobsite) {
jobs = jobsite.inner_hits.jobs.hits.hits;
jobs.forEach(function(job) {
console.log(job);
});
});
console.log(response.aggregations.jobs.location.buckets);
});
This gives me back all inner_hits of jobs in New York, but the aggregate is showing me counts for every location and company, not just the ones matching the inner_hits.
Any suggestions on how to get the aggregate on only the data contained in the matching inner_hits?
Edit:
I am updating this to include an export of the mapping and index data, as requested. I exported this using Taskrabbit's elasticdump tool, found here:
https://github.com/taskrabbit/elasticsearch-dump
The index: http://pastebin.com/WaZwBwn4
The mapping: http://pastebin.com/ZkGnYN94
The above linked data differs from the sample code in my original question in that the index is named jobsites6 in the data instead of jobsitesIdx as referred to in the question. Also, the type in the data is 'job' whereas in the code above it is 'sites'.
I've filled in the callback in the code above to display the response data. I am seeing only jobs in New York from the foreach loop of the inner_hits, as expected, however I am seeing this aggregation for location:
[ { key: 'New York, NY', doc_count: 243 },
{ key: 'San Francisco, CA', doc_count: 92 },
{ key: 'Chicago, IL', doc_count: 43 },
{ key: 'Boston, MA', doc_count: 39 },
{ key: 'Berlin, Germany', doc_count: 22 },
{ key: 'Seattle, WA', doc_count: 22 },
{ key: 'Los Angeles, CA', doc_count: 20 },
{ key: 'Austin, TX', doc_count: 18 },
{ key: 'Anywhere', doc_count: 16 },
{ key: 'Cupertino, CA', doc_count: 15 },
{ key: 'Washington D.C.', doc_count: 14 },
{ key: 'United States', doc_count: 11 },
{ key: 'Atlanta, GA', doc_count: 10 },
{ key: 'London, UK', doc_count: 10 },
{ key: 'Ulm, Deutschland', doc_count: 10 },
{ key: 'Riverton, UT', doc_count: 9 },
{ key: 'San Diego, CA', doc_count: 9 },
{ key: 'Charlotte, NC', doc_count: 8 },
{ key: 'Irvine, CA', doc_count: 8 },
{ key: 'London', doc_count: 8 },
{ key: 'San Mateo, CA', doc_count: 8 },
{ key: 'Boulder, CO', doc_count: 7 },
{ key: 'Houston, TX', doc_count: 7 },
{ key: 'Palo Alto, CA', doc_count: 7 },
{ key: 'Sydney, Australia', doc_count: 7 } ]
Since my inner_hits are limited to those in New York, I can see that the aggregation is not on my inner_hits because it is giving me counts for all locations.

You can achieve this by adding the same filter in your aggregation to only include New York jobs.
Also note that in your second aggregation you had company.raw but in your mapping the jobs.company field has no not_analyzed part named raw, so you probably need to add it if you want to aggregate on the not analyzed company name.
{
"_source": [
"sitename"
],
"query": {
"filtered": {
"filter": {
"nested": {
"inner_hits": {
"size": 1000
},
"path": "jobs",
"query": {
"filtered": {
"filter": {
"terms": {
"jobs.location": [
"new",
"york"
]
}
}
}
}
}
}
}
},
"aggs": {
"jobs": {
"nested": {
"path": "jobs"
},
"aggs": {
"only_loc": {
"filter": { <----- add this filter
"terms": {
"jobs.location": [
"new",
"york"
]
}
},
"aggs": {
"location": {
"terms": {
"field": "jobs.location.raw",
"size": 25
}
},
"company": {
"terms": {
"field": "jobs.company",
"size": 25
}
}
}
}
}
}
}
}

Related

Elasticsearch - get all nested objects of all documents

Let's imagine Elasticsearch index where each document represents a country. Country has cities field, which is defined as nested.
Sample mapping (simplified for brevity of this example):
{
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"cities": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
// other properties are omitted for brevity
}
}
}
}
The documents which I'm inserting to the index look like this:
{
"name": "Slovakia",
"cities": [
{
"name": "Bratislava"
},
{
"name": "Zilina"
},
...
]
}
{
"name": "Czech Republic",
"cities": [
{
"name": "Praha"
},
{
"name": "Brno"
},
...
]
}
Is it possible to compose a query which returns all cities (over all countries) and supports sorting & pagination? In response, I'd like to have the complete nested objects + some fields of the parent object (so that I can display which country the city belongs to).
The first returned page (response) would contain 10 cities from Czech Republic, the second page would contain 10 cities where four of them are (the last ones) from Czech Republic and six are from Slovakia.
I was looking into composite aggregation, but I don't know how add country name to sources:
{
"query": {
"match_all": {}
},
"aggs": {
"nested_aggs": {
"nested": {
"path": "cities"
},
"aggs": {
"by_name": {
"composite": {
"sources": [
{
"cityName": {
"terms": {
"field": "cities.name.keyword",
"order": "asc"
}
}
}
]
}
}
}
}
}
}
Is it possible to compose such query without modifying the Elasticsearch mapping?

All members of composite aggregations need to be defined under the same context — you cannot intermix nested and non-nested contexts.
The easiest option would be to first aggregate on the countries and then on the cities:
{
"size": 0,
"aggs": {
"by_country": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"nested_cities": {
"nested": {
"path": "cities"
},
"aggs": {
"by_cities": {
"terms": {
"field": "cities.name.keyword",
"size": 10
}
}
}
}
}
}
}
}
If you do have the option of changing the mapping, you can leverage the include_in_root feature which'll enable you to perform composite aggs such as:
{
"size": 0,
"aggs": {
"by_name": {
"composite": {
"sources": [
{
"countryName": {
"terms": {
"field": "name.keyword",
"order": "asc"
}
}
},
{
"cityName": {
"terms": {
"field": "cities.name.keyword",
"order": "asc"
}
}
}
]
}
}
}
}
which can be easily paginated.
Here's what the result would look like:
...
"aggregations" : {
"by_name" : {
"after_key" : {
"countryName" : "Slovakia",
"cityName" : "Zilina"
},
"buckets" : [
{
"key" : {
"countryName" : "Czech Republic",
"cityName" : "Brno"
},
"doc_count" : 1
},
{
"key" : {
"countryName" : "Czech Republic",
"cityName" : "Praha"
},
"doc_count" : 1
},
{
"key" : {
"countryName" : "Slovakia",
"cityName" : "Bratislava"
},
"doc_count" : 1
},
{
"key" : {
"countryName" : "Slovakia",
"cityName" : "Zilina"
},
"doc_count" : 1
}
]
}
}

How to calculate average rating of each products in Elasticsearch

I have products index with following mapping:
{
"products" : {
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"name":{
"type": "text"
},
"price" : {
"type" : "integer"
},
"product_review_rel" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"product" : "review"
}
},
"rate" : {
"type" : "integer"
}
}
}
}
}
This index contains products and reviews, as you can see at product_review_rel field.
Products contain name, price, ... fields.
Reviews contain rate, ... fields.
I want to get average rating of each products. How to do that?
Another question, is it possible to return average rating from products returned by the following query in the same request:
{
"query": {
"nested": {
"path": "translations",
"query": {
"multi_match": {
"query": "kem chống nắng",
"fields": [
"name"
],
"analyzer":"vi_analyzer"
}
}
}
}
}
Update 1: Composite aggregation
GET products/_search
{
"query": {
"nested": {
"path": "translations",
"query": {
"multi_match": {
"query": "kem chống nắng",
"fields": [
"translations.name",
"translations.description"
],
"analyzer": "vi_analyzer"
}
}
}
},
"aggs": {
"products": {
"composite": {
"sources": [
{
"id": {
"terms": {
"field": "_id"
}
}
}
]
},
"aggs": {
"reviews": {
"children": {
"type": "review"
},
"aggs": {
"rating": {
"avg": {
"field": "rate"
}
}
}
}
}
}
}
}```

Elasticsearch aggregation by arrays of String

I have an ElasticSearch index, where I store telephony transactions (SMS, MMS, Calls, etc ) with their associated costs.
The key of these documents are the MSISDN (MSISDN = phone number). In my app, I know that there are group of users. Each users can have one or more MSISDN.
Here is the mapping of this kind of documents :
"mappings" : {
"cdr" : {
"properties" : {
"callDatetime" : {
"type" : "long"
},
"callSource" : {
"type" : "string"
},
"callType" : {
"type" : "string"
},
"callZone" : {
"type" : "string"
},
"calledNumber" : {
"type" : "string"
},
"companyKey" : {
"type" : "string"
},
"consumption" : {
"properties" : {
"data" : {
"type" : "long"
},
"voice" : {
"type" : "long"
}
}
},
"cost" : {
"type" : "double"
},
"country" : {
"type" : "string"
},
"included" : {
"type" : "boolean"
},
"msisdn" : {
"type" : "string"
},
"network" : {
"type" : "string"
}
}
}
}
My goal and issue :
My goal is to make a query that retrieve cost by callType by group. But groups are not represented in ElasticSearch, only in my PostgreSQL database.
So I will make a method that retrieves all the MSISDN for every existing group, and get something like a List of String arrays, containing every MSISDN within each group.
Let's say I have something like :
"msisdn_by_group" : [
{
"group1" : ["01111111111", "02222222222", "033333333333", "044444444444"]
},
{
"group2" : ["05555555555","06666666666"]
}
]
Now, I will use this to generate an Elasticsearch query. I want to make with an aggregation, the sum of the cost, for all those terms in different buckets, and then split it again by callType. (to make a stackedbar chart).
I've tried several things, but didn't manage to make it work (histogram, buckets, term and sum was mainly the keyword i'm playing with).
If somebody here can help me with the order, and the keywords I can use to achieve this, it would be great :) Thanks
EDIT :
Here is my last try :
QUERY:
{
"aggs" : {
"cost_histogram": {
"terms": {
"field": "callType"
},
"aggs": {
"cost_histogram_sum" : {
"sum": {
"field": "cost"
}
}
}
}
}
}
I go the expected result, but it missing the "group" split, as I don't know how to pass the MSISDN arrays as a criteria :
RESULT :
"aggregations": {
"cost_histogram": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "data",
"doc_count": 5925,
"cost_histogram_sum": {
"value": 0
}
},
{
"key": "sms_mms",
"doc_count": 5804,
"cost_histogram_sum": {
"value": 91.76999999999995
}
},
{
"key": "voice",
"doc_count": 5299,
"cost_histogram_sum": {
"value": 194.1196
}
},
{
"key": "sms_mms_plus",
"doc_count": 35,
"cost_histogram_sum": {
"value": 7.2976
}
}
]
}
}

Ok I found out how to make this with one query, but it's damn a long query because it repeats for every group, but I have no choise. I'm using the "filter" aggregator.
Here is a working example based on the array I wrote in my question above :
POST localhost:9200/cdr/_search?size=0
{
"query": {
"term" : {
"companyKey" : 1
}
},
"aggs" : {
"group_1_split_cost": {
"filter": {
"bool": {
"should": [{
"bool": {
"must": {
"match": {
"msisdn": "01111111111"
}
}
}
},{
"bool": {
"must": {
"match": {
"msisdn": "02222222222"
}
}
}
},{
"bool": {
"must": {
"match": {
"msisdn": "03333333333"
}
}
}
},{
"bool": {
"must": {
"match": {
"msisdn": "04444444444"
}
}
}
}]
}
},
"aggs": {
"cost_histogram": {
"terms": {
"field": "callType"
},
"aggs": {
"cost_histogram_sum" : {
"sum": {
"field": "cost"
}
}
}
}
}
},
"group_2_split_cost": {
"filter": {
"bool": {
"should": [{
"bool": {
"must": {
"match": {
"msisdn": "05555555555"
}
}
}
},{
"bool": {
"must": {
"match": {
"msisdn": "06666666666"
}
}
}
}]
}
},
"aggs": {
"cost_histogram": {
"terms": {
"field": "callType"
},
"aggs": {
"cost_histogram_sum" : {
"sum": {
"field": "cost"
}
}
}
}
}
}
}
}
Thanks to the newer versions of Elasticsearch we can now nest very deep aggregations, but it's still a bit too bad that we can't pass arrays of values to an "OR" operator or something like that. It could reduce the size of those queries, I guess. Even if they are a bit special and used in niche cases, as mine.

Elasticsearch filter the maximum value document

I trying to get the maximum value of document from the same name records. Forexample, I have 3 users, 2 of them have same name but different followers count, I wanted to return only 1 document from the 2 same with same name based on the maximum of followers_count.
{ id: 1, name: "John Greenwood", follower_count: 100 }
{ id: 2, name: "John Greenwood", follower_count: 200 }
{ id: 3, name: "John Underwood", follower_count: 300 }
So the result would be,
{ id: 2, name: "John Greenwood", follower_count: 200 }
{ id: 3, name: "John Underwood", follower_count: 300 }
From 2 same names, the one with the maximum followers wins and other single one will also come.
I have mapping as follow,
"users-development" : {
"mappings" : {
"user" : {
"dynamic" : "false",
"properties" : {
"follower_count" : {
"type" : "integer"
},
"name" : {
"type" : "string",
"fields" : {
"exact" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
}
}
}
This is where I have been stucked from long,
{
query: {
filtered: {
filter: {
bool: {
must: [
{ terms: { "name.exact": [ "John Greenwood", "John Underwood" ] } },
]
}
}
}
},
aggs: {
max_follower_count: { max: { field: 'follower_count' } }
},
size: 1000,
}
Any suggestions please

Your question have a special tool in the elastic stack as a hammer for a head kkk.
Are Aggregations, See the examples:
First of all in your case you will need aggregate by full name including spaces, your name field need to be not_analyzed like this
`PUT /index
{
"mappings": {
"users" : {
"properties" : {
"name" : {
"type" : "string",
"index": "not_analyzed"
}
}
}
}
}`
Now your query will be like this one:
`POST /index/users/_search
{
"aggs": {
"users": {
"terms": {
"field": "name"
},
"aggs": {
"followers": {
"max": {
"field": "follower_count"
}
}
}
}
}
}`
I just aggregated by name and used a max metric to get the higgest follower count.
The response will be like this:
`"aggregations": {
"users": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "John Greenwood",
"doc_count": 2,
"followers": {
"value": 200
}
},
{
"key": "John Underwood",
"doc_count": 1,
"followers": {
"value": 300
}
}
]
}
}`
Hope that will be good for you.
Use aggregations for all situations that you need aggregate data and get sum on values.

Ok, I think you are looking for something along these lines, using the terms aggregation
{
"query": {
"terms": { "name.exact": [ "John Greenwood", "John Underwood" ] }
},
"aggs": {
"max_follower_count": {
"terms": {
"field":"name.exact"
},
"aggs":{
"max_follow" : { "max" : { "field" : "follower_count" } }
}
}
},
"size": 1000
}
The terms aggregation will make a bucket for each unique value, from names.exact, which will only be those specified in your terms query. So we now have a bucket for both Johns, now we can use the max aggregation to count who has the most followers. The max aggregation will operate on each bucket in its parent aggregation.
Each of these unique terms will then have its max value of follower_count computed, and displayed in the bucket. Results look as follows:
... //query results of just the terms query up here
"aggregations": {
"max_follower_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "John Greenwood",
"doc_count": 2,
"max_follow": {
"value": 200
}
},
{
"key": "John Underwood",
"doc_count": 1,
"max_follow": {
"value": 300
}
}
]
}
}
The terms aggregation comes with a few caveats with how it does the counting, and the documentation linked should be pretty clear on that.

Using a custom_score to sort by a nested child's timestamp

I'm pretty new to elasticsearch and have been banging my head trying to get this sorting to work. The general idea is to search email message threads with nested messages and nested participants. The goal is to display search results at the thread level, sorting by the participant who is doing the search and either the last_received_at or last_sent_at column depending on which mailbox they are in.
My understanding is that you can't sort by a single child's value among many nested children. So in order to do this I saw a couple of suggestions for using a custom_score with a script, then sorting on the score. My plan is to dynamically change the sort column and then run a nested custom_score query that will return the date of one of the participants as the score. I've been noticing some issues with both the score format being strange (eg. always has 4 zeros at the end) and it may not be returning the date that I was expecting.
Below are simplified versions of the index and the query in question. If anyone has any suggestions, I'd be very grateful. (FYI - I am using elasticsearch version 0.20.6.)
Index:
mappings: {
message_thread: {
properties: {
id: {
type: long
}
subject: {
dynamic: true
properties: {
id: {
type: long
}
name: {
type: string
}
}
}
participants: {
dynamic: true
properties: {
id: {
type: long
}
name: {
type: string
}
last_sent_at: {
format: dateOptionalTime
type: date
}
last_received_at: {
format: dateOptionalTime
type: date
}
}
}
messages: {
dynamic: true
properties: {
sender: {
dynamic: true
properties: {
id: {
type: long
}
}
}
id: {
type: long
}
body: {
type: string
}
created_at: {
format: dateOptionalTime
type: date
}
recipient: {
dynamic: true
properties: {
id: {
type: long
}
}
}
}
}
version: {
type: long
}
}
}
}
Query:
{
"query": {
"bool": {
"must": [
{
"term": { "participants.id": 3785 }
},
{
"custom_score": {
"query": {
"filtered": {
"query": { "match_all": {} },
"filter": {
"term": { "participants.id": 3785 }
}
}
},
"params": { "sort_column": "participants.last_received_at" },
"script": "doc[sort_column].value"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"term": { "messages.recipient.id": 3785 }
}
]
}
},
"sort": [ "_score" ]
}
Solution:
Thanks to #imotov, here is the final result. The participants were not properly nested in the index (while the messages didn't need to be). In addition, include_in_root was used for the participants to simplify the query (participants are small records and not a real size issue, although #imotov also provided an example without it). He then restructured the JSON request to use a dis_max query.
curl -XDELETE "localhost:9200/test-idx"
curl -XPUT "localhost:9200/test-idx" -d '{
"mappings": {
"message_thread": {
"properties": {
"id": {
"type": "long"
},
"messages": {
"properties": {
"body": {
"type": "string",
"analyzer": "standard"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"id": {
"type": "long"
},
"recipient": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
}
}
},
"sender": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
}
}
}
}
},
"messages_count": {
"type": "long"
},
"participants": {
"type": "nested",
"include_in_root": true,
"properties": {
"id": {
"type": "long"
},
"last_received_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"last_sent_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"name": {
"type": "string",
"analyzer": "standard"
}
}
},
"subject": {
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "string"
}
}
}
}
}
}
}'
curl -XPUT "localhost:9200/test-idx/message_thread/1" -d '{
"id" : 1,
"subject" : {"name": "Test Thread"},
"participants" : [
{"id" : 87793, "name" : "John Smith", "last_received_at" : null, "last_sent_at" : "2010-10-27T17:26:58Z"},
{"id" : 3785, "name" : "David Jones", "last_received_at" : "2010-10-27T17:26:58Z", "last_sent_at" : null}
],
"messages" : [{
"id" : 1,
"body" : "This is a test.",
"sender" : { "id" : 87793 },
"recipient" : { "id" : 3785},
"created_at" : "2010-10-27T17:26:58Z"
}]
}'
curl -XPUT "localhost:9200/test-idx/message_thread/2" -d '{
"id" : 2,
"subject" : {"name": "Elastic"},
"participants" : [
{"id" : 57834, "name" : "Paul Johnson", "last_received_at" : "2010-11-25T17:26:58Z", "last_sent_at" : "2010-10-25T17:26:58Z"},
{"id" : 3785, "name" : "David Jones", "last_received_at" : "2010-10-25T17:26:58Z", "last_sent_at" : "2010-11-25T17:26:58Z"}
],
"messages" : [{
"id" : 2,
"body" : "More testing of elasticsearch.",
"sender" : { "id" : 57834 },
"recipient" : { "id" : 3785},
"created_at" : "2010-10-25T17:26:58Z"
},{
"id" : 3,
"body" : "Reply message.",
"sender" : { "id" : 3785 },
"recipient" : { "id" : 57834},
"created_at" : "2010-11-25T17:26:58Z"
}]
}'
curl -XPOST localhost:9200/test-idx/_refresh
echo
# Using include in root
curl "localhost:9200/test-idx/message_thread/_search?pretty=true" -d '{
"query": {
"filtered": {
"query": {
"nested": {
"path": "participants",
"score_mode": "max",
"query": {
"custom_score": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"participants.id": 3785
}
}
}
},
"params": {
"sort_column": "participants.last_received_at"
},
"script": "doc[sort_column].value"
}
}
}
},
"filter": {
"query": {
"multi_match": {
"query": "test",
"fields": ["subject.name", "participants.name", "messages.body"],
"operator": "and",
"use_dis_max": true
}
}
}
}
},
"sort": ["_score"],
"fields": []
}
'
# Not using include in root
curl "localhost:9200/test-idx/message_thread/_search?pretty=true" -d '{
"query": {
"filtered": {
"query": {
"nested": {
"path": "participants",
"score_mode": "max",
"query": {
"custom_score": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"participants.id": 3785
}
}
}
},
"params": {
"sort_column": "participants.last_received_at"
},
"script": "doc[sort_column].value"
}
}
}
},
"filter": {
"query": {
"bool": {
"should": [{
"match": {
"subject.name":"test"
}
}, {
"nested" : {
"path": "participants",
"query": {
"match": {
"name":"test"
}
}
}
}, {
"match": {
"messages.body":"test"
}
}
]
}
}
}
}
},
"sort": ["_score"],
"fields": []
}
'

There are a couple of issues here. You are asking about nested objects, but participants are not defined in your mapping as nested objects. The second possible issue is that score has type float, so it might not have enough precision to represent timestamp as is. If you can figure out how to fit this value into float, you can take a look at this example: Elastic search - tagging strength (nested/child document boosting). However, if you are developing a new system, it might be prudent to upgrade to 0.90.0.Beta1, which supports sorting on nested fields.

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Aggregation on filtered, nested inner_hits query in ElasticSearch - elasticsearch

Related

Elasticsearch - get all nested objects of all documents

How to calculate average rating of each products in Elasticsearch

Elasticsearch aggregation by arrays of String

Elasticsearch filter the maximum value document

Using a custom_score to sort by a nested child's timestamp

Categories

Resources