ElasticSearch: Sorting a complex query - sorting

I have a complex query in Elasticsearch (below) and I need to sort by date_creation ascending, within the "Activité" (activity) bucket. The query works, but the basic sort I have for date_creation does not. I am looking for how I would sort the Activities by date_creation, in ascending order. I've seen some posts on nested queries here on stackoverflow, for example and here in the elastic search documentation but they don't seem to answer how to address the complexity of my query.
I am using ElasticSearch 2.3.5 with Lucene 5.5.0.
var searchQuery = {
index: "resultats_" + env,
body: {
size: 0,
sort: [{ date_creation: { order: "asc", mode: "min" } }],
query: {
filtered: {
query: {
match_all: {}
},
filter: {
query: {
bool: {
should: [{}],
must: [
{
term: {
player_id: {
value: params.player_id
}
}
},
{
term: {
classes: {
value: params.grade
}
}
}
],
must_not: [{}]
}
}
}
}
},
aggs: {
Matière: {
terms: {
field: "id_matiere",
size: 10
},
aggs: {
"Titre matière": {
top_hits: {
_source: {
include: ["titre_matiere"]
},
size: 1
}
},
PP: {
terms: {
field: "id_point_pedago",
size: 10
},
aggs: {
"Titre PP": {
top_hits: {
_source: {
include: ["titre_point_pedago"]
},
size: 1
}
},
Compétence: {
terms: {
field: "id_competence",
size: 10
},
aggs: {
"Titre compétence": {
top_hits: {
_source: {
include: ["titre_competence"]
},
size: 1
}
},
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1
}
},
Trimestres: {
filters: {
filters: {
T1: {
range: {
date_creation: {
gte: params.t1_start,
lte: params.t1_end
}
}
},
T2: {
range: {
date_creation: {
gte: params.t2_start,
lte: params.t2_end
}
}
},
T3: {
range: {
date_creation: {
gte: params.t3_start,
lte: params.t3_end
}
}
}
}
},
aggs: {
Moyenne: {
avg: {
field: "resultat"
}
},
Occurrences: {
cardinality: {
field: "id_occurrence",
precision_threshold: 1000
}
},
Résultat: {
terms: {
field: "resultat",
size: 10,
min_doc_count: 0
}
}
}
}
}
}
}
}
}
}
}
}
}
}
};

You can do it like this:
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1,
add this line -> sort: [{ date_creation: { order: "asc", mode: "min" } }],
}
},

Related

Elasticsearch query syntax update 1.0.14 to 7.5.2

I am attempting to update a rather sizable Elasticsearch query. I am very new to Elasticsearch and am having a hard time wrapping my head around everything that is happening here.
This is the original query:
elasticsearch_query search_models, {
query: {
filtered: {
query: {
function_score: {
query: {
bool: {
must: [
{
multi_match: {
operator: "and",
type: "cross_fields",
query: params[:term],
fuzziness: (params[:fuzzy] || 0),
fields: [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
],
},
}
],
must_not: [
{ term: { access: { value: 2 } } },
{ term: { ledger: { value: "payroll" } } },
{ term: { ledger: { value: "credit card" } } }
],
should: [
{ term: { active: { value: 1, boost: 100 } } },
{ term: { active: { value: 2, boost: 50 } } },
{ term: { active: { value: 3, boost: 0.05 } } },
{ term: { active: { value: 4, boost: 50 } } },
{ term: { active: { value: 5, boost: 50 } } },
{ term: { active: { value: 6, boost: 50 } } },
{ term: { branch_id: { value: current_branch.id, boost: 100 } } }
]
}
},
functions: [
{
filter: { term: { auto_declined: 1 } },
boost_factor: 0.3
},
{
filter: { term: { auto_declined: 0 } },
boost_factor: 0.0001
},
{
filter: { term: { access: 1 } }, # current employee
boost_factor: 10
},
{
filter: { term: { access: 0 } },
boost_factor: 0.2
},
{
filter: { term: { unit_status: 1 } }, # current unit
boost_factor: 2
},
{
filter: { type: {value: 'txn'} },
boost_factor: 0.4
}
]
}
},
filter: {
"or" =>
{ filters: [
{ term: { "branch_id" => current_branch.id }},
{ type: { "value" => "auction" }},
{ type: { "value" => "fee_schedule"}},
{ type: { "value" => "unit"}},
{
"and" => [
{ type: { "value" => "user" }},
{ "or" => [
{ term: { "access" => 1 }}
]}
]
}
]}
}
}
}
}
This is where I have gotten to thus far:
elasticsearch_query search_models, {
query: {
bool: {
must: {
function_score: {
query: {
bool: [
must: {
multi_match: {
query: params[:term],
type: "cross_fields",
operator: "and",
fields: [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
]
}
},
must_not: [
{ term: { access: { value: 2 } } },
{ term: { ledger: { value: "payroll" } } },
{ term: { ledger: { value: "credit card" } } }
],
should: [
{ term: { active: { value: 1, boost: 100 } } },
{ term: { active: { value: 2, boost: 50 } } },
{ term: { active: { value: 3, boost: 0.05 } } },
{ term: { active: { value: 4, boost: 50 } } },
{ term: { active: { value: 5, boost: 50 } } },
{ term: { active: { value: 6, boost: 50 } } },
{ term: { branch_id: { value: current_branch.id, boost: 100 } } }
]
]
},
functions: [
{
filter: { term: { auto_declined: 1 } },
weight: 0.3
},
{
filter: { term: { auto_declined: 0 } },
weight: 0.0001
},
{
filter: { term: { access: 1 } }, # current employee
weight: 10
},
{
filter: { term: { access: 0 } },
weight: 0.2
},
{
filter: { term: { unit_status: 1 } }, # current unit
weight: 2
},
{
filter: { type: {value: 'txn'} },
weight: 0.4
}
]
}
},
filter: [
{ term: { branch_id: current_branch.id } },
{ type: { value: "auction" } },
{ type: { value: "fee_schedule"} },
{ type: { value: "unit"} },
bool: {
must: {
bool: {
should: [
{ type: { value: "user" } },
{ term: { access: 1 } }
]
}
}
}
]
}
}
}
I have:
replaced 'filtered' with 'bool' and 'must'
replaced 'boost_factor' with 'weight'
removed 'fuzziness' from the 'cross_fields' type 'multi_match'
attempted to update the 'or' and 'and' logic with newer 'bool' syntax.
The first three actions seemed to have done the trick with their respective errors, but I am getting hung up on this filter with the 'or' and 'and' logic. I would greatly appreciate some guidance!
This is the error I am receiving:
[400] {"error":{"root_cause":[{"type":"parsing_exception","reason":"[bool] query malformed, no start_object after query name","line":1,"col":61}],"type":"parsing_exception","reason":"[bool] query malformed, no start_object after query name","line":1,"col":61},"status":400}
If any further information would be helpful, please let me know.
Your query is forming invalid Json, that why the exception, see below query with no Json error.
{
"query": {
"bool": {
"must": {
"function_score": {
"query": {
"bool": {
"must": [{
"multi_match": {
"query": "params[:term]",
"type": "cross_fields",
"operator": "and",
"fields": [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
]
}
}],
"must_not": [
{ "term": { "access": { "value": 2 } } },
{ "term": { "ledger": { "value": "payroll" } } },
{ "term": { "ledger": { "value": "credit card" } } }
],
"should": [
{ "term": { "active": { "value": 1, "boost": 100 } } }
]
}
},
"functions": [
{
"filter": { "term": { "auto_declined": 1 } },
"weight": 0.3
},
{
"filter": { "term": { "auto_declined": 0 } },
"weight": 0.0001
},
{
"filter": { "term": { "access": 1 } },
"weight": 10
},
{
"filter": { "term": { "access": 0 } },
"weight": 0.2
},
{
"filter": { "term": { "unit_status": 1 } },
"weight": 2
},
{
"filter": { "type": {"value": "txn"} },
"weight": 0.4
}
]
}
},
"filter": [
{ "term": { "branch_id": "current_branch.id" } },
{ "type": { "value": "auction" } },
{ "type": { "value": "fee_schedule"} },
{ "type": { "value": "unit"} },
{"bool": {
"must": {
"bool": {
"should": [
{ "type": { "value": "user" } },
{ "term": { "access": 1 } }
]
}
}
}
}
]
}
}
}
You can compare and modify the curly braces and brackets.

Elasticsearch '[bool] failed to parse field [filter]' exception

I am trying to solve parsing exception of my search query.
I would like to get some help there :)
The exception reason:
'[1:65] [bool] failed to parse field [filter]'
message:'x_content_parse_exception'
My search:
data = (await this.elasticClient.search({
index: Indexes.Measurements,
size: 10000,
body: {
query: {
bool: {
filter: {
terms: {
"MachineId": ["mo3", "mo2"]
},
range: {
'#timestamp': {
gte: `now-${lastMinutes}m`,
lte: 'now'
}
}
}
},
},
sort : [{ "#timestamp" : "desc" }]
}})).body.hits.hits.map(data => data._source);
You are missing [] around the filter clause, try out this below query
data = (await this.elasticClient.search({
index: Indexes.Measurements,
size: 10000,
body: {
query: {
bool: {
filter: [{
terms: {
"MachineId": ["mo3", "mo2"]
}},{
range: {
'#timestamp': {
gte: `now-${lastMinutes}m`,
lte: 'now'
}
}}]
}
},
},
sort : [{ "#timestamp" : "desc" }]
}})).body.hits.hits.map(data => data._source);
In JSON format, it will be like this
{
"query": {
"bool": {
"filter": [
{
"terms": {
"MachineId": [
"mo3",
"mo2"
]
}
},
{
"range": {
"timestamp": {
"gte": "now-${lastMinutes}m",
"lte": "now"
}
}
}
]
}
}
}

Elasticsearch query and aggregation on indices

I have multiple elasticsearch indices and I would like to get the doc_count of how many documents matched the query for each index (even if no documents matched for an index). I tried this (using elasticsearch.js):
{
index: 'one,or,many,indices,here',
query: {
query_string: {
query: 'hello',
},
},
aggs: {
group_by_index: {
terms: {
field: '_index',
min_doc_count: 0,
},
},
},
from: 0,
size: 20,
};
This only works if I specify all indices on the index key. However, I don't always want to match documents across all indices in my search hits.
So I came up with:
{
index: 'all,indices,here',
query: {
bool: {
must: [
{
query_string: {
query: 'hello',
},
},
{
terms: {
_index: 'only,search,hits,indices,here',
},
},
],
},
},
aggs: {
group_by_index: {
terms: {
field: '_index',
min_doc_count: 0,
},
},
},
from: 0,
size: 20,
};
But then I get doc_count: 0 for indices where there are matches because the index is not in the bool query.
How can I get the doc_count for all indices but not get search hits from unwanted indices?
You need to move your index constraint to post_filter
{
index: 'all,indices,here',
query: {
bool: {
must: [
{
query_string: {
query: 'hello',
},
},
],
},
},
aggs: {
group_by_index: {
terms: {
field: '_index',
min_doc_count: 0,
},
},
},
post_filter: {
terms: {
_index: 'only,search,hits,indices,here',
},
},
from: 0,
size: 20,
};

ElasticSearch: Filtering a Complex Query

I have a complex query in Elasticsearch (below) and I need a pretty basic filter to filter out all "resultat" (results) where the value = 1. (resultat is a numeric field).
I've tried using the "must_not" term in the code below, which was found here and while it does not cause errors, it still does not seem to properly filter the results.
I am using ElasticSearch 2.3.5 with Lucene 5.5.0.
var searchQuery = {
index: "resultats_" + env,
body: {
size: 0,
query: {
filtered: {
query: {
match_all: {}
},
filter: {
query: {
bool: {
should: [{}],
must: [
{
term: {
player_id: {
value: params.player_id
}
}
},
{
term: {
classes: {
value: params.grade
}
}
}
],
must_not: [
{
term: {
resultat: {
value: 1
}
}
}
]
}
}
}
}
},
aggs: {
Matière: {
terms: {
field: "id_matiere",
size: 10
},
aggs: {
"Titre matière": {
top_hits: {
_source: {
include: ["titre_matiere"]
},
size: 1
}
},
PP: {
terms: {
field: "id_point_pedago",
size: 10
},
aggs: {
"Titre PP": {
top_hits: {
_source: {
include: ["titre_point_pedago"]
},
size: 1
}
},
Compétence: {
terms: {
field: "id_competence",
size: 10
},
aggs: {
"Titre compétence": {
top_hits: {
_source: {
include: ["titre_competence"]
},
size: 1
}
},
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1,
sort: [
{ date_creation: { order: "asc", mode: "min" } }
]
}
},
Trimestres: {
filters: {
filters: {
T1: {
range: {
date_creation: {
gte: params.t1_start,
lte: params.t1_end
}
}
},
T2: {
range: {
date_creation: {
gte: params.t2_start,
lte: params.t2_end
}
}
},
T3: {
range: {
date_creation: {
gte: params.t3_start,
lte: params.t3_end
}
}
}
}
},
aggs: {
Moyenne: {
avg: {
field: "resultat"
}
},
Occurrences: {
cardinality: {
field: "id_occurrence",
precision_threshold: 1000
}
},
Résultat: {
terms: {
field: "resultat",
size: 10,
min_doc_count: 0
}
}
}
}
}
}
}
}
}
}
}
}
}
}
};

Inner hits on grandparents still not working

I have problems retrieving the inner_hits of my "grandparent" items.
Parents from a child query works fine, but cant get it to return also the ones one more level up.
Any ideas of this?
The known issue for this should be fixed by now (2.3) and the workaround are written according to nested objects, not parent/child hierarchy data, so cant get it to work for me.
Code in Sense-format:
POST /test/child/_search
{
"query": {
"has_parent": {
"type": "parent",
"query": {
"has_parent": {
"type": "grandparent",
"query": {
"match_all": {}
},
"inner_hits": {}
}
},
"inner_hits": {}
}
}
}
PUT /test/child/3?parent=2&routing=1
{
"id": 3,
"name": "child",
"parentid": 2
}
PUT /test/parent/2?parent=1&routing=1
{
"id": 2,
"name": "parent",
"parentid": 1
}
PUT /test/grandparent/1
{
"id": 1,
"name": "grandparent"
}
PUT /test
{
"mappings": {
"grandparent": {},
"parent": {
"_parent": {
"type": "grandparent"
}
},
"child": {
"_parent": {
"type": "parent"
}
}
}
}
this is sample code for finding grand parent
const filterPath = `hits.hits.inner_hits.activity.hits.hits.inner_hits.user.hits.hits._source*,
hits.hits.inner_hits.activity.hits.hits.inner_hits.user.hits.hits.inner_hits.fofo.hits.hits._source*`;
const source = ['id', 'name', 'thumbnail'];
const { body } = await elasticWrapper.client.search({
index: ElasticIndex.UserDataFactory,
filter_path: filterPath,
_source: source,
body: {
from,
size,
query: {
bool: {
must: [
{
match: {
relation_type: ElasticRelationType.Like,
},
},
{
has_parent: {
parent_type: ElasticRelationType.Post,
query: {
bool: {
must: [
{
term: {
id: {
value: req.params.id,
},
},
},
{
has_parent: {
parent_type: ElasticRelationType.User,
query: {
bool: {
must: [
{
exists: {
field: 'id',
},
},
],
should: [
{
has_child: {
type: ElasticRelationType.Follower,
query: {
bool: {
minimum_should_match: 1,
should: [
{
match: {
follower:
req.currentUser?.id,
},
},
{
match: {
following:
req.currentUser?.id,
},
},
],
},
},
inner_hits: {
_source: [
'follower',
'following',
'status',
],
},
},
},
],
},
},
inner_hits: {
_source: ['id', 'name', 'thumbnail'],
},
},
},
],
},
},
inner_hits: {},
},
},
],
},
},
sort: [
{
createdAt: {
order: 'desc',
},
},
],
},
});

Resources