Elasticsearch query syntax update 1.0.14 to 7.5.2 - elasticsearch

I am attempting to update a rather sizable Elasticsearch query. I am very new to Elasticsearch and am having a hard time wrapping my head around everything that is happening here.
This is the original query:
elasticsearch_query search_models, {
query: {
filtered: {
query: {
function_score: {
query: {
bool: {
must: [
{
multi_match: {
operator: "and",
type: "cross_fields",
query: params[:term],
fuzziness: (params[:fuzzy] || 0),
fields: [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
],
},
}
],
must_not: [
{ term: { access: { value: 2 } } },
{ term: { ledger: { value: "payroll" } } },
{ term: { ledger: { value: "credit card" } } }
],
should: [
{ term: { active: { value: 1, boost: 100 } } },
{ term: { active: { value: 2, boost: 50 } } },
{ term: { active: { value: 3, boost: 0.05 } } },
{ term: { active: { value: 4, boost: 50 } } },
{ term: { active: { value: 5, boost: 50 } } },
{ term: { active: { value: 6, boost: 50 } } },
{ term: { branch_id: { value: current_branch.id, boost: 100 } } }
]
}
},
functions: [
{
filter: { term: { auto_declined: 1 } },
boost_factor: 0.3
},
{
filter: { term: { auto_declined: 0 } },
boost_factor: 0.0001
},
{
filter: { term: { access: 1 } }, # current employee
boost_factor: 10
},
{
filter: { term: { access: 0 } },
boost_factor: 0.2
},
{
filter: { term: { unit_status: 1 } }, # current unit
boost_factor: 2
},
{
filter: { type: {value: 'txn'} },
boost_factor: 0.4
}
]
}
},
filter: {
"or" =>
{ filters: [
{ term: { "branch_id" => current_branch.id }},
{ type: { "value" => "auction" }},
{ type: { "value" => "fee_schedule"}},
{ type: { "value" => "unit"}},
{
"and" => [
{ type: { "value" => "user" }},
{ "or" => [
{ term: { "access" => 1 }}
]}
]
}
]}
}
}
}
}
This is where I have gotten to thus far:
elasticsearch_query search_models, {
query: {
bool: {
must: {
function_score: {
query: {
bool: [
must: {
multi_match: {
query: params[:term],
type: "cross_fields",
operator: "and",
fields: [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
]
}
},
must_not: [
{ term: { access: { value: 2 } } },
{ term: { ledger: { value: "payroll" } } },
{ term: { ledger: { value: "credit card" } } }
],
should: [
{ term: { active: { value: 1, boost: 100 } } },
{ term: { active: { value: 2, boost: 50 } } },
{ term: { active: { value: 3, boost: 0.05 } } },
{ term: { active: { value: 4, boost: 50 } } },
{ term: { active: { value: 5, boost: 50 } } },
{ term: { active: { value: 6, boost: 50 } } },
{ term: { branch_id: { value: current_branch.id, boost: 100 } } }
]
]
},
functions: [
{
filter: { term: { auto_declined: 1 } },
weight: 0.3
},
{
filter: { term: { auto_declined: 0 } },
weight: 0.0001
},
{
filter: { term: { access: 1 } }, # current employee
weight: 10
},
{
filter: { term: { access: 0 } },
weight: 0.2
},
{
filter: { term: { unit_status: 1 } }, # current unit
weight: 2
},
{
filter: { type: {value: 'txn'} },
weight: 0.4
}
]
}
},
filter: [
{ term: { branch_id: current_branch.id } },
{ type: { value: "auction" } },
{ type: { value: "fee_schedule"} },
{ type: { value: "unit"} },
bool: {
must: {
bool: {
should: [
{ type: { value: "user" } },
{ term: { access: 1 } }
]
}
}
}
]
}
}
}
I have:
replaced 'filtered' with 'bool' and 'must'
replaced 'boost_factor' with 'weight'
removed 'fuzziness' from the 'cross_fields' type 'multi_match'
attempted to update the 'or' and 'and' logic with newer 'bool' syntax.
The first three actions seemed to have done the trick with their respective errors, but I am getting hung up on this filter with the 'or' and 'and' logic. I would greatly appreciate some guidance!
This is the error I am receiving:
[400] {"error":{"root_cause":[{"type":"parsing_exception","reason":"[bool] query malformed, no start_object after query name","line":1,"col":61}],"type":"parsing_exception","reason":"[bool] query malformed, no start_object after query name","line":1,"col":61},"status":400}
If any further information would be helpful, please let me know.

Your query is forming invalid Json, that why the exception, see below query with no Json error.
{
"query": {
"bool": {
"must": {
"function_score": {
"query": {
"bool": {
"must": [{
"multi_match": {
"query": "params[:term]",
"type": "cross_fields",
"operator": "and",
"fields": [
"name^2", "address", "email", "email2",
"primary_contact", "id", "lotname^7",
"lotname_keyword^9", "corp_name^4",
"vin^4", "serial_number^3", "dba_names",
"title_number", "title_tracking",
"full_name^3", "username", "reference", "user_name",
"phone", "text_number", "name_keyword^9"
]
}
}],
"must_not": [
{ "term": { "access": { "value": 2 } } },
{ "term": { "ledger": { "value": "payroll" } } },
{ "term": { "ledger": { "value": "credit card" } } }
],
"should": [
{ "term": { "active": { "value": 1, "boost": 100 } } }
]
}
},
"functions": [
{
"filter": { "term": { "auto_declined": 1 } },
"weight": 0.3
},
{
"filter": { "term": { "auto_declined": 0 } },
"weight": 0.0001
},
{
"filter": { "term": { "access": 1 } },
"weight": 10
},
{
"filter": { "term": { "access": 0 } },
"weight": 0.2
},
{
"filter": { "term": { "unit_status": 1 } },
"weight": 2
},
{
"filter": { "type": {"value": "txn"} },
"weight": 0.4
}
]
}
},
"filter": [
{ "term": { "branch_id": "current_branch.id" } },
{ "type": { "value": "auction" } },
{ "type": { "value": "fee_schedule"} },
{ "type": { "value": "unit"} },
{"bool": {
"must": {
"bool": {
"should": [
{ "type": { "value": "user" } },
{ "term": { "access": 1 } }
]
}
}
}
}
]
}
}
}
You can compare and modify the curly braces and brackets.

Related

how to get document above average value in elasticsearch

After grouping by device ID, I want to find the number of data with a numerical value greater than or equal to the average value of the data.
doc = [
{ deviceId: 1, data: {temp:1} },
{ deviceId: 1, data: {temp:2} },
{ deviceId: 1, data: {temp:3} },
{ deviceId: 1, data: {temp:4} },
{ deviceId: 1, data: {temp:5} },
{ deviceId: 2, data: {temp:1} },
{ deviceId: 2, data: {temp:2} },
{ deviceId: 2, data: {temp:3} },
{ deviceId: 2, data: {temp:4} },
{ deviceId: 2, data: {temp:5} },
{ deviceId: 3, data: {temp:1} },
{ deviceId: 3, data: {temp:2} },
{ deviceId: 3, data: {temp:3} },
{ deviceId: 3, data: {temp:4} },
{ deviceId: 3, data: {temp:5} },
];
"The desired result is"
result = aggregations :{
clusters:{
...
bucket:[
{ key:"1",
doc_count: 5,
avgData: {value: 3.0}
above_avgDataValue : {
doc_count : 2 // === data.temp > 3
}
}
]
}
}
Below is the aggregation I tried
_search {
size:0,
query:{
bool:{
filter:[
terms:{deviceId:[1,2]}
]
}
},
aggs:{
cluster:{
terms:{field:deviceId}
},
aggs:{
"avgData" : {"avg": {"field":"temp"}}
}
}
};
please help
Tldr;
I don't think this is possible with a single query.
But you could work around the issue by:
Get the average per deviceId
Get the number of doc above the average of deviceId
Work around
To get the average per deviceId the following query should work.
GET /73034730/_search
{
"size": 0,
"aggs": {
"avg_per_fields": {
"terms": {
"field": "deviceId",
"size": 10
},
"aggs": {
"avg": {
"avg": {
"field": "data.temp"
}
}
}
}
}
}
Then you could do the following query
GET /73034730/_search
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"match": {
"deviceId": "1"
}
},
{
"range": {
"data.temp": {
"gte": 3
}
}
}
]
}
},
{
"bool": {
"must": [
{
"match": {
"deviceId": "2"
}
},
{
"range": {
"data.temp": {
"gte": 3.33333335
}
}
}
]
}
},
{
"bool": {
"must": [
{
"match": {
"deviceId": "3"
}
},
{
"range": {
"data.temp": {
"gte": 3
}
}
}
]
}
}
],
"minimum_should_match": 1
}
},
"size": 0,
"aggs": {
"avg_per_fields": {
"terms": {
"field": "deviceId",
"size": 10
}
}
}
}
which considering the dataset you have should give you
{
...
"aggregations": {
"avg_per_fields": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 3
},
{
"key": 2,
"doc_count": 3
},
{
"key": 3,
"doc_count": 3
}
]
}
}
}

Elasticsearch '[bool] failed to parse field [filter]' exception

I am trying to solve parsing exception of my search query.
I would like to get some help there :)
The exception reason:
'[1:65] [bool] failed to parse field [filter]'
message:'x_content_parse_exception'
My search:
data = (await this.elasticClient.search({
index: Indexes.Measurements,
size: 10000,
body: {
query: {
bool: {
filter: {
terms: {
"MachineId": ["mo3", "mo2"]
},
range: {
'#timestamp': {
gte: `now-${lastMinutes}m`,
lte: 'now'
}
}
}
},
},
sort : [{ "#timestamp" : "desc" }]
}})).body.hits.hits.map(data => data._source);
You are missing [] around the filter clause, try out this below query
data = (await this.elasticClient.search({
index: Indexes.Measurements,
size: 10000,
body: {
query: {
bool: {
filter: [{
terms: {
"MachineId": ["mo3", "mo2"]
}},{
range: {
'#timestamp': {
gte: `now-${lastMinutes}m`,
lte: 'now'
}
}}]
}
},
},
sort : [{ "#timestamp" : "desc" }]
}})).body.hits.hits.map(data => data._source);
In JSON format, it will be like this
{
"query": {
"bool": {
"filter": [
{
"terms": {
"MachineId": [
"mo3",
"mo2"
]
}
},
{
"range": {
"timestamp": {
"gte": "now-${lastMinutes}m",
"lte": "now"
}
}
}
]
}
}
}

elasticsearch nested query returns only last 3 results

We have the following elasticsearch mapping
{
index: 'data',
body: {
settings: {
analysis: {
analyzer: {
lowerCase: {
tokenizer: 'whitespace',
filter: ['lowercase']
}
}
}
},
mappings: {
// used for _all field
_default_: {
index_analyzer: 'lowerCase'
},
entry: {
properties: {
id: { type: 'string', analyzer: 'lowerCase' },
type: { type: 'string', analyzer: 'lowerCase' },
name: { type: 'string', analyzer: 'lowerCase' },
blobIds: {
type: 'nested',
properties: {
id: { type: 'string' },
filename: { type: 'string', analyzer: 'lowerCase' }
}
}
}
}
}
}
}
and a sample document that looks like the following:
{
"id":"5f02e9dae252732912749e13",
"type":"test_type",
"name":"test_name",
"creationTimestamp":"2020-07-06T09:07:38.775Z",
"blobIds":[
{
"id":"5f02e9dbe252732912749e18",
"filename":"test1.csv"
},
{
"id":"5f02e9dbe252732912749e1c",
"filename":"test2.txt"
},
// removed in-between elements for simplicity
{
"id":"5f02e9dbe252732912749e1e",
"filename":"test3.csv"
},
{
"id":"5f02e9dbe252732912749e58",
"filename":"test4.txt"
},
{
"id":"5f02e9dbe252732912749e5a",
"filename":"test5.csv"
},
{
"id":"5f02e9dbe252732912749e5d",
"filename":"test6.txt"
}
]
}
I have the following ES query which is querying documents for a certain timerange based on the creationTimestamp field and then filtering the nested field blobIds based on a user query, that should match the blobIds.filename field.
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"creationTimestamp": {
"gte": "2020-07-01T09:07:38.775Z",
"lte": "2020-07-07T09:07:40.147Z"
}
}
},
{
"nested": {
"path": [
"blobIds"
],
"query": {
"query_string": {
"fields": [
"blobIds.filename"
],
"query": "*"
}
},
// returns the actual blobId hit
// and not the whole array
"inner_hits": {}
}
},
{
"query": {
"query_string": {
"query": "+type:*test_type* +name:*test_name*"
}
}
}
]
}
}
}
},
"sort": [
{
"creationTimestamp": {
"order": "asc"
},
"id": {
"order": "asc"
}
}
]
}
The above entry is clearly matching the query. However, it seems like there is something wrong with the returned inner_hits, since I always get only the last 3 blobIds elements instead of the whole array that contains 24 elements, as can be seen below.
{
"name": "test_name",
"creationTimestamp": "2020-07-06T09:07:38.775Z",
"id": "5f02e9dae252732912749e13",
"type": "test_type",
"blobIds": [
{
"id": "5f02e9dbe252732912749e5d",
"filename": "test4.txt"
},
{
"id": "5f02e9dbe252732912749e5a",
"filename": "test5.csv"
},
{
"id": "5f02e9dbe252732912749e58",
"filename": "test6.txt"
}
]
}
I find it very strange since I'm only doing a simple * query.
Using elasticsearch v1.7 and cannot update at the moment

ElasticSearch: Filtering a Complex Query

I have a complex query in Elasticsearch (below) and I need a pretty basic filter to filter out all "resultat" (results) where the value = 1. (resultat is a numeric field).
I've tried using the "must_not" term in the code below, which was found here and while it does not cause errors, it still does not seem to properly filter the results.
I am using ElasticSearch 2.3.5 with Lucene 5.5.0.
var searchQuery = {
index: "resultats_" + env,
body: {
size: 0,
query: {
filtered: {
query: {
match_all: {}
},
filter: {
query: {
bool: {
should: [{}],
must: [
{
term: {
player_id: {
value: params.player_id
}
}
},
{
term: {
classes: {
value: params.grade
}
}
}
],
must_not: [
{
term: {
resultat: {
value: 1
}
}
}
]
}
}
}
}
},
aggs: {
Matière: {
terms: {
field: "id_matiere",
size: 10
},
aggs: {
"Titre matière": {
top_hits: {
_source: {
include: ["titre_matiere"]
},
size: 1
}
},
PP: {
terms: {
field: "id_point_pedago",
size: 10
},
aggs: {
"Titre PP": {
top_hits: {
_source: {
include: ["titre_point_pedago"]
},
size: 1
}
},
Compétence: {
terms: {
field: "id_competence",
size: 10
},
aggs: {
"Titre compétence": {
top_hits: {
_source: {
include: ["titre_competence"]
},
size: 1
}
},
Activité: {
terms: {
field: "id_activite",
size: 10
},
aggs: {
"Titre activité": {
top_hits: {
_source: {
include: [
"titre_activite",
"nombre_perimetre_occurrence"
]
},
size: 1,
sort: [
{ date_creation: { order: "asc", mode: "min" } }
]
}
},
Trimestres: {
filters: {
filters: {
T1: {
range: {
date_creation: {
gte: params.t1_start,
lte: params.t1_end
}
}
},
T2: {
range: {
date_creation: {
gte: params.t2_start,
lte: params.t2_end
}
}
},
T3: {
range: {
date_creation: {
gte: params.t3_start,
lte: params.t3_end
}
}
}
}
},
aggs: {
Moyenne: {
avg: {
field: "resultat"
}
},
Occurrences: {
cardinality: {
field: "id_occurrence",
precision_threshold: 1000
}
},
Résultat: {
terms: {
field: "resultat",
size: 10,
min_doc_count: 0
}
}
}
}
}
}
}
}
}
}
}
}
}
}
};

Inner hits on grandparents still not working

I have problems retrieving the inner_hits of my "grandparent" items.
Parents from a child query works fine, but cant get it to return also the ones one more level up.
Any ideas of this?
The known issue for this should be fixed by now (2.3) and the workaround are written according to nested objects, not parent/child hierarchy data, so cant get it to work for me.
Code in Sense-format:
POST /test/child/_search
{
"query": {
"has_parent": {
"type": "parent",
"query": {
"has_parent": {
"type": "grandparent",
"query": {
"match_all": {}
},
"inner_hits": {}
}
},
"inner_hits": {}
}
}
}
PUT /test/child/3?parent=2&routing=1
{
"id": 3,
"name": "child",
"parentid": 2
}
PUT /test/parent/2?parent=1&routing=1
{
"id": 2,
"name": "parent",
"parentid": 1
}
PUT /test/grandparent/1
{
"id": 1,
"name": "grandparent"
}
PUT /test
{
"mappings": {
"grandparent": {},
"parent": {
"_parent": {
"type": "grandparent"
}
},
"child": {
"_parent": {
"type": "parent"
}
}
}
}
this is sample code for finding grand parent
const filterPath = `hits.hits.inner_hits.activity.hits.hits.inner_hits.user.hits.hits._source*,
hits.hits.inner_hits.activity.hits.hits.inner_hits.user.hits.hits.inner_hits.fofo.hits.hits._source*`;
const source = ['id', 'name', 'thumbnail'];
const { body } = await elasticWrapper.client.search({
index: ElasticIndex.UserDataFactory,
filter_path: filterPath,
_source: source,
body: {
from,
size,
query: {
bool: {
must: [
{
match: {
relation_type: ElasticRelationType.Like,
},
},
{
has_parent: {
parent_type: ElasticRelationType.Post,
query: {
bool: {
must: [
{
term: {
id: {
value: req.params.id,
},
},
},
{
has_parent: {
parent_type: ElasticRelationType.User,
query: {
bool: {
must: [
{
exists: {
field: 'id',
},
},
],
should: [
{
has_child: {
type: ElasticRelationType.Follower,
query: {
bool: {
minimum_should_match: 1,
should: [
{
match: {
follower:
req.currentUser?.id,
},
},
{
match: {
following:
req.currentUser?.id,
},
},
],
},
},
inner_hits: {
_source: [
'follower',
'following',
'status',
],
},
},
},
],
},
},
inner_hits: {
_source: ['id', 'name', 'thumbnail'],
},
},
},
],
},
},
inner_hits: {},
},
},
],
},
},
sort: [
{
createdAt: {
order: 'desc',
},
},
],
},
});

Resources