ElasticSearch: Filtering a Complex Query - elasticsearch

I have a complex query in Elasticsearch (below) and I need a pretty basic filter to filter out all "resultat" (results) where the value = 1. (resultat is a numeric field).
I've tried using the "must_not" term in the code below, which was found here and while it does not cause errors, it still does not seem to properly filter the results.
I am using ElasticSearch 2.3.5 with Lucene 5.5.0.
var searchQuery = {
index: "resultats_" + env,
body: {
size: 0,
query: {
filtered: {
query: {
match_all: {}
},
filter: {
query: {
bool: {
should: [{}],
must: [
{
term: {
player_id: {
value: params.player_id
}
}
},
{
term: {
classes: {
value: params.grade
}
}
}
],
must_not: [
{
term: {
resultat: {
value: 1
}
}
}
]
}
}
}
}
},
aggs: {
Matière: {
terms: {
field: "id_matiere",
size: 10
},
aggs: {
"Titre matière": {
top_hits: {
_source: {
include: ["titre_matiere"]
},
size: 1
}
},
PP: {
terms: {
field: "id_point_pedago",
size: 10
},
aggs: {
"Titre PP": {
top_hits: {
_source: {
include: ["titre_point_pedago"]
},
size: 1
}
},
Compétence: {
terms: {
field: "id_competence",
size: 10
},
aggs: {
"Titre compétence": {
top_hits: {
_source: {
include: ["titre_competence"]
},
size: 1
}
},
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1,
sort: [
{ date_creation: { order: "asc", mode: "min" } }
]
}
},
Trimestres: {
filters: {
filters: {
T1: {
range: {
date_creation: {
gte: params.t1_start,
lte: params.t1_end
}
}
},
T2: {
range: {
date_creation: {
gte: params.t2_start,
lte: params.t2_end
}
}
},
T3: {
range: {
date_creation: {
gte: params.t3_start,
lte: params.t3_end
}
}
}
}
},
aggs: {
Moyenne: {
avg: {
field: "resultat"
}
},
Occurrences: {
cardinality: {
field: "id_occurrence",
precision_threshold: 1000
}
},
Résultat: {
terms: {
field: "resultat",
size: 10,
min_doc_count: 0
}
}
}
}
}
}
}
}
}
}
}
}
}
}
};

Related

Elasticsearch query syntax update 1.0.14 to 7.5.2

I am attempting to update a rather sizable Elasticsearch query. I am very new to Elasticsearch and am having a hard time wrapping my head around everything that is happening here.
This is the original query:
elasticsearch_query search_models, {
query: {
filtered: {
query: {
function_score: {
query: {
bool: {
must: [
{
multi_match: {
operator: "and",
type: "cross_fields",
query: params[:term],
fuzziness: (params[:fuzzy] || 0),
fields: [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
],
},
}
],
must_not: [
{ term: { access: { value: 2 } } },
{ term: { ledger: { value: "payroll" } } },
{ term: { ledger: { value: "credit card" } } }
],
should: [
{ term: { active: { value: 1, boost: 100 } } },
{ term: { active: { value: 2, boost: 50 } } },
{ term: { active: { value: 3, boost: 0.05 } } },
{ term: { active: { value: 4, boost: 50 } } },
{ term: { active: { value: 5, boost: 50 } } },
{ term: { active: { value: 6, boost: 50 } } },
{ term: { branch_id: { value: current_branch.id, boost: 100 } } }
]
}
},
functions: [
{
filter: { term: { auto_declined: 1 } },
boost_factor: 0.3
},
{
filter: { term: { auto_declined: 0 } },
boost_factor: 0.0001
},
{
filter: { term: { access: 1 } }, # current employee
boost_factor: 10
},
{
filter: { term: { access: 0 } },
boost_factor: 0.2
},
{
filter: { term: { unit_status: 1 } }, # current unit
boost_factor: 2
},
{
filter: { type: {value: 'txn'} },
boost_factor: 0.4
}
]
}
},
filter: {
"or" =>
{ filters: [
{ term: { "branch_id" => current_branch.id }},
{ type: { "value" => "auction" }},
{ type: { "value" => "fee_schedule"}},
{ type: { "value" => "unit"}},
{
"and" => [
{ type: { "value" => "user" }},
{ "or" => [
{ term: { "access" => 1 }}
]}
]
}
]}
}
}
}
}
This is where I have gotten to thus far:
elasticsearch_query search_models, {
query: {
bool: {
must: {
function_score: {
query: {
bool: [
must: {
multi_match: {
query: params[:term],
type: "cross_fields",
operator: "and",
fields: [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
]
}
},
must_not: [
{ term: { access: { value: 2 } } },
{ term: { ledger: { value: "payroll" } } },
{ term: { ledger: { value: "credit card" } } }
],
should: [
{ term: { active: { value: 1, boost: 100 } } },
{ term: { active: { value: 2, boost: 50 } } },
{ term: { active: { value: 3, boost: 0.05 } } },
{ term: { active: { value: 4, boost: 50 } } },
{ term: { active: { value: 5, boost: 50 } } },
{ term: { active: { value: 6, boost: 50 } } },
{ term: { branch_id: { value: current_branch.id, boost: 100 } } }
]
]
},
functions: [
{
filter: { term: { auto_declined: 1 } },
weight: 0.3
},
{
filter: { term: { auto_declined: 0 } },
weight: 0.0001
},
{
filter: { term: { access: 1 } }, # current employee
weight: 10
},
{
filter: { term: { access: 0 } },
weight: 0.2
},
{
filter: { term: { unit_status: 1 } }, # current unit
weight: 2
},
{
filter: { type: {value: 'txn'} },
weight: 0.4
}
]
}
},
filter: [
{ term: { branch_id: current_branch.id } },
{ type: { value: "auction" } },
{ type: { value: "fee_schedule"} },
{ type: { value: "unit"} },
bool: {
must: {
bool: {
should: [
{ type: { value: "user" } },
{ term: { access: 1 } }
]
}
}
}
]
}
}
}
I have:
replaced 'filtered' with 'bool' and 'must'
replaced 'boost_factor' with 'weight'
removed 'fuzziness' from the 'cross_fields' type 'multi_match'
attempted to update the 'or' and 'and' logic with newer 'bool' syntax.
The first three actions seemed to have done the trick with their respective errors, but I am getting hung up on this filter with the 'or' and 'and' logic. I would greatly appreciate some guidance!
This is the error I am receiving:
[400] {"error":{"root_cause":[{"type":"parsing_exception","reason":"[bool] query malformed, no start_object after query name","line":1,"col":61}],"type":"parsing_exception","reason":"[bool] query malformed, no start_object after query name","line":1,"col":61},"status":400}
If any further information would be helpful, please let me know.
Your query is forming invalid Json, that why the exception, see below query with no Json error.
{
"query": {
"bool": {
"must": {
"function_score": {
"query": {
"bool": {
"must": [{
"multi_match": {
"query": "params[:term]",
"type": "cross_fields",
"operator": "and",
"fields": [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
]
}
}],
"must_not": [
{ "term": { "access": { "value": 2 } } },
{ "term": { "ledger": { "value": "payroll" } } },
{ "term": { "ledger": { "value": "credit card" } } }
],
"should": [
{ "term": { "active": { "value": 1, "boost": 100 } } }
]
}
},
"functions": [
{
"filter": { "term": { "auto_declined": 1 } },
"weight": 0.3
},
{
"filter": { "term": { "auto_declined": 0 } },
"weight": 0.0001
},
{
"filter": { "term": { "access": 1 } },
"weight": 10
},
{
"filter": { "term": { "access": 0 } },
"weight": 0.2
},
{
"filter": { "term": { "unit_status": 1 } },
"weight": 2
},
{
"filter": { "type": {"value": "txn"} },
"weight": 0.4
}
]
}
},
"filter": [
{ "term": { "branch_id": "current_branch.id" } },
{ "type": { "value": "auction" } },
{ "type": { "value": "fee_schedule"} },
{ "type": { "value": "unit"} },
{"bool": {
"must": {
"bool": {
"should": [
{ "type": { "value": "user" } },
{ "term": { "access": 1 } }
]
}
}
}
}
]
}
}
}
You can compare and modify the curly braces and brackets.

Elasticsearch '[bool] failed to parse field [filter]' exception

I am trying to solve parsing exception of my search query.
I would like to get some help there :)
The exception reason:
'[1:65] [bool] failed to parse field [filter]'
message:'x_content_parse_exception'
My search:
data = (await this.elasticClient.search({
index: Indexes.Measurements,
size: 10000,
body: {
query: {
bool: {
filter: {
terms: {
"MachineId": ["mo3", "mo2"]
},
range: {
'#timestamp': {
gte: `now-${lastMinutes}m`,
lte: 'now'
}
}
}
},
},
sort : [{ "#timestamp" : "desc" }]
}})).body.hits.hits.map(data => data._source);
You are missing [] around the filter clause, try out this below query
data = (await this.elasticClient.search({
index: Indexes.Measurements,
size: 10000,
body: {
query: {
bool: {
filter: [{
terms: {
"MachineId": ["mo3", "mo2"]
}},{
range: {
'#timestamp': {
gte: `now-${lastMinutes}m`,
lte: 'now'
}
}}]
}
},
},
sort : [{ "#timestamp" : "desc" }]
}})).body.hits.hits.map(data => data._source);
In JSON format, it will be like this
{
"query": {
"bool": {
"filter": [
{
"terms": {
"MachineId": [
"mo3",
"mo2"
]
}
},
{
"range": {
"timestamp": {
"gte": "now-${lastMinutes}m",
"lte": "now"
}
}
}
]
}
}
}

elasticsearch nested query returns only last 3 results

We have the following elasticsearch mapping
{
index: 'data',
body: {
settings: {
analysis: {
analyzer: {
lowerCase: {
tokenizer: 'whitespace',
filter: ['lowercase']
}
}
}
},
mappings: {
// used for _all field
_default_: {
index_analyzer: 'lowerCase'
},
entry: {
properties: {
id: { type: 'string', analyzer: 'lowerCase' },
type: { type: 'string', analyzer: 'lowerCase' },
name: { type: 'string', analyzer: 'lowerCase' },
blobIds: {
type: 'nested',
properties: {
id: { type: 'string' },
filename: { type: 'string', analyzer: 'lowerCase' }
}
}
}
}
}
}
}
and a sample document that looks like the following:
{
"id":"5f02e9dae252732912749e13",
"type":"test_type",
"name":"test_name",
"creationTimestamp":"2020-07-06T09:07:38.775Z",
"blobIds":[
{
"id":"5f02e9dbe252732912749e18",
"filename":"test1.csv"
},
{
"id":"5f02e9dbe252732912749e1c",
"filename":"test2.txt"
},
// removed in-between elements for simplicity
{
"id":"5f02e9dbe252732912749e1e",
"filename":"test3.csv"
},
{
"id":"5f02e9dbe252732912749e58",
"filename":"test4.txt"
},
{
"id":"5f02e9dbe252732912749e5a",
"filename":"test5.csv"
},
{
"id":"5f02e9dbe252732912749e5d",
"filename":"test6.txt"
}
]
}
I have the following ES query which is querying documents for a certain timerange based on the creationTimestamp field and then filtering the nested field blobIds based on a user query, that should match the blobIds.filename field.
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"creationTimestamp": {
"gte": "2020-07-01T09:07:38.775Z",
"lte": "2020-07-07T09:07:40.147Z"
}
}
},
{
"nested": {
"path": [
"blobIds"
],
"query": {
"query_string": {
"fields": [
"blobIds.filename"
],
"query": "*"
}
},
// returns the actual blobId hit
// and not the whole array
"inner_hits": {}
}
},
{
"query": {
"query_string": {
"query": "+type:*test_type* +name:*test_name*"
}
}
}
]
}
}
}
},
"sort": [
{
"creationTimestamp": {
"order": "asc"
},
"id": {
"order": "asc"
}
}
]
}
The above entry is clearly matching the query. However, it seems like there is something wrong with the returned inner_hits, since I always get only the last 3 blobIds elements instead of the whole array that contains 24 elements, as can be seen below.
{
"name": "test_name",
"creationTimestamp": "2020-07-06T09:07:38.775Z",
"id": "5f02e9dae252732912749e13",
"type": "test_type",
"blobIds": [
{
"id": "5f02e9dbe252732912749e5d",
"filename": "test4.txt"
},
{
"id": "5f02e9dbe252732912749e5a",
"filename": "test5.csv"
},
{
"id": "5f02e9dbe252732912749e58",
"filename": "test6.txt"
}
]
}
I find it very strange since I'm only doing a simple * query.
Using elasticsearch v1.7 and cannot update at the moment

ElasticSearch: Sorting a complex query

I have a complex query in Elasticsearch (below) and I need to sort by date_creation ascending, within the "Activité" (activity) bucket. The query works, but the basic sort I have for date_creation does not. I am looking for how I would sort the Activities by date_creation, in ascending order. I've seen some posts on nested queries here on stackoverflow, for example and here in the elastic search documentation but they don't seem to answer how to address the complexity of my query.
I am using ElasticSearch 2.3.5 with Lucene 5.5.0.
var searchQuery = {
index: "resultats_" + env,
body: {
size: 0,
sort: [{ date_creation: { order: "asc", mode: "min" } }],
query: {
filtered: {
query: {
match_all: {}
},
filter: {
query: {
bool: {
should: [{}],
must: [
{
term: {
player_id: {
value: params.player_id
}
}
},
{
term: {
classes: {
value: params.grade
}
}
}
],
must_not: [{}]
}
}
}
}
},
aggs: {
Matière: {
terms: {
field: "id_matiere",
size: 10
},
aggs: {
"Titre matière": {
top_hits: {
_source: {
include: ["titre_matiere"]
},
size: 1
}
},
PP: {
terms: {
field: "id_point_pedago",
size: 10
},
aggs: {
"Titre PP": {
top_hits: {
_source: {
include: ["titre_point_pedago"]
},
size: 1
}
},
Compétence: {
terms: {
field: "id_competence",
size: 10
},
aggs: {
"Titre compétence": {
top_hits: {
_source: {
include: ["titre_competence"]
},
size: 1
}
},
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1
}
},
Trimestres: {
filters: {
filters: {
T1: {
range: {
date_creation: {
gte: params.t1_start,
lte: params.t1_end
}
}
},
T2: {
range: {
date_creation: {
gte: params.t2_start,
lte: params.t2_end
}
}
},
T3: {
range: {
date_creation: {
gte: params.t3_start,
lte: params.t3_end
}
}
}
}
},
aggs: {
Moyenne: {
avg: {
field: "resultat"
}
},
Occurrences: {
cardinality: {
field: "id_occurrence",
precision_threshold: 1000
}
},
Résultat: {
terms: {
field: "resultat",
size: 10,
min_doc_count: 0
}
}
}
}
}
}
}
}
}
}
}
}
}
}
};
You can do it like this:
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1,
add this line -> sort: [{ date_creation: { order: "asc", mode: "min" } }],
}
},

Filter documents where all values of an array meet some criteria in elasticsearch

I have documents that look like this:
{
times: [{start: "1461116454242"},{start:"1461116454242"}]
}
I want to get all documents where every start time is in the past. This query works when all times are in the future or all are in the past, but fails if only one time is in the future (still matches).
query: {
filtered: {
filter: {
bool: {
must: [
{
nested: {
path: "times",
filter: {
script : {
script: "doc['start'].value < now",
params: {
now: Date.now()
}
}
}
}
}
]
}
},
query: {
match_all: {}
}
}
}
One solution that I just realized:
query: {
filtered: {
filter: {
bool: {
must: [
{
nested: {
path: "times",
filter: {
"bool": {
"must": [
{
"range": {
"times.start": {
lt: new Date()
}
}
}
]
}
}
}
}
],
must_not: [
{
nested: {
path: "times",
filter: {
"bool": {
"must": [
{
"range": {
"times.start": {
gte: new Date()
}
}
}
]
}
}
}
}
]
}
},
query: {
match_all: {}
}
}
}
How about this one:
include_in_parent: true in your mapping:
"times": {
"type": "nested",
"include_in_parent": true,
"properties": {
"start": {
"type": "date"
}
}
}
and use the query_string syntax, without nested:
{
"query": {
"query": {
"query_string": {
"query": "times.start:<=now AND NOT times.start:>now"
}
}
}
}

Resources