query return [parsing_exception] [size] query malformed, no start_object after query name, with { line=1 & col=264 } - elasticsearch

I'm new in elasticsearch, and i try to use dev tools to create filters.
here is what work and I want to use
POST /transform_alldomain/_search
{
"size":0,
"aggs": {
"group": {
"terms": {
"field": "Email.keyword"
},
"aggs": {
"group": {
"terms": {
"field": "bln.keyword"
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"extract_date.max": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}}
now i want to use this similiar stuff to filter as type this into filter, edit as query dsl
{
"size":0,
"aggs": {
"group": {
"terms": {
"field": "Email.keyword"
},
"aggs": {
"group": {
"terms": {
"field": "bln.keyword"
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"extract_date.max": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}}
it returns
[parsing_exception] [size] query malformed, no start_object after query name, with { line=1 & col=324 }
I don't know what is the difference and how to make it work
I need to create searched object from this
How I execute the filter:
it returns

The Discover app is not the right tool to use to make aggregations, the Discover app is only useful for queries and filters.
What you want to achieve can be done with a Data table visualization. So instead of Discover, go to Visualize, then pick "Create Visualization"
Then pick the "Data Table" Visualization
Then pick your index pattern
And finally you can define your two terms aggregations like this:

Related

How to define percentage of result items with specific field in Elasticsearch query?

I have a search query that returns all items matching users that have type manager or lead.
{
"from": 0,
"size": 20,
"query": {
"bool": {
"should": [
{
"terms": {
"type": ["manager", "lead"]
}
}
]
}
}
}
Is there a way to define what percentage of the results should be of type "manager"?
In other words, I want the results to have 80% of users with type manager and 20% with type lead.
I want to make a suggestion to use bucket_path aggregation. As I know this aggregation needs to be run in sub-aggs of a histogram aggregation. As you have such field in your mapping so I think this query should work for you:
{
"size": 0,
"aggs": {
"NAME": {
"date_histogram": {
"field": "my_datetime",
"interval": "month"
},
"aggs": {
"role_type": {
"terms": {
"field": "type",
"size": 10
},
"aggs": {
"count": {
"value_count": {
"field": "_id"
}
}
}
},
"role_1_ratio": {
"bucket_script": {
"buckets_path": {
"role_1": "role_type['manager']>count",
"role_2": "role_type['lead']>count"
},
"script": "params.role_1 / (params.role_1+params.role_2)*100"
}
},
"role_2_ratio": {
"bucket_script": {
"buckets_path": {
"role_1": "role_type['manager']>count",
"role_2": "role_type['lead']>count"
},
"script": "params.role_2 / (params.role_1+params.role_2)*100"
}
}
}
}
}
}
Please let me know if it didn't work well for you.

Filter out terms aggregation buckets in elasticsearch after applying aggregation

Below is snapshot of the dataset:
recordNo employeeId employeeStatus employeeAddr
1 employeeA Permanent
2 employeeA ABC
3 employeeB Contract
4 employeeB CDE
I want to get the list of employees along with employeeStatus and employeeAddr.
So I am using terms aggregation on employeeId and then using sub-aggregations of employeeStatus and employeeAddr to get these details.
Below query returns the results correctly.
{
"aggregations": {
"Employee": {
"terms": {
"field": "employeeID"
},
"aggregations": {
"employeeStatus": {
"terms": {"field": "employeeStatus"}
},
"employeeAddr": {
"terms": {"field": "employeeAddr"}
}
}
}
}
}
Now I want only the employees which are in Permanent status. So I am applying filter aggregation.
{
"aggregations": {
"filter_Employee_employeeID": {
"filter": {
"bool": {
"must": [
{
"match": {
"employeeStatus": {"query": "Permanent"}
}
}
]
}
},
"aggregations": {
"Employee": {
"terms": {
"field": "employeeID"
},
"aggregations": {
"employeeStatus": {
"terms": {"field": "employeeStatus"}
},
"employeeAddr": {
"terms": {"field": "employeeAddr"}
}
}
}
}
}
}
}
Now the problem is that the employeeAddr aggregation returns no buckets for employeeA because record 2 gets filtered out before the aggregation is done.
Assuming that I cannot modify the data set and I want to achieve the result with a single elastic query, how can I do it?
I checked the Bucket Selector pipeline aggregation but it only works for metric aggregations.
Is there a way to filter out term buckets after the aggregation is applied?
If I understood correctly you want to preserve the aggregations even if you use some kind of filter. To achieve that, try using the post_filter clause.
You can check the docs here
The clause is applied "outside" the aggregation. Using your example, it should look like this:
{
"aggregations": {
"filter_Employee_employeeID": {
"aggregations": {
"Employee": {
"terms": {
"field": "employeeID"
},
"aggregations": {
"employeeStatus": {
"terms": {
"field": "employeeStatus"
}
},
"employeeAddr": {
"terms": {
"field": "employeeAddr"
}
}
}
}
}
}
},
"post_filter": {
"bool": {
"must": [
{
"match": {
"employeeStatus": {
"query": "Permanent"
}
}
}
]
}
}
}
I tested a combination of the include field for the terms aggregation, plus using a bucket_selector with document count would give you the desired result.
Filtering term values is here.
Bucket selector using document count is here
the subtlety here is that, yes you need numeric values, but you can also reference meta/custom fields that elasticsearch has
{
"aggregations": {
"Employee": {
"terms": {
"field": "employeeId.keyword"
},
"aggregations": {
"employeeStatus": {
"terms": {"field": "employeeStatus", "include": "Permanent"}
},
"employeeAddr": {
"terms": {"field": "employeeAddr"}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"count": "employeeStatus._bucket_count"
},
"script": {
"source": "params.count != 0"
}
}
}
}
}
}
}
I tested this on 7.10 and it worked, returning only employeeA, with the address included.

ELASTICSEARCH - How can i get an aggregation in a boolean field?

I want to get aggregation in a boolean field, but the out is a error:
query:
"""
{
"size": 0,
"aggs": {
"RecentCreated": {
"terms": {
"field": "created_at.keyword",
"order": {
"_key": "desc"
},
"size": 1
},
"aggs": {
"nestedData": {
"nested": {
"path": "data.add.serv"
},
"aggs": {
"NAME": {
"terms": {
"field": "data.add.serv.beast"
, "include": true
}
}
}
}
}
}
}
}
"""
error:
"type" : "x_content_parse_exception",
"reason" : "[terms] include doesn't support values of type: VALUE_BOOLEAN"
I have been reading that it is possible to transform the true values ​​into 1 through script to get count in the aggregation, but I cannot get the result of the true values
How could I get a count of the boolean field with true value?
I think what you might want to do is use a filter aggregation over your nested document rather than a terms aggregation. So in short change this bit of your query:
"aggs": {
"NAME": {
"terms": {
"field": "data.add.serv.beast",
"include": true
}
}
}
to
"aggs": {
"NAME": {
"filter": {
"term": {
"data.add.serv.beast": true
}
}
}
}
I'm not too familiar with nested aggregations, so there might still be an error with my syntax. The main point is to use a filter aggregation rather than terms, hopefully that should work for you.

Elasticsearch: Aggregate all unique values of a field and apply a condition or filter by another field

My documents look like this:
{
"ownID": "Val_123",
"parentID": "Val_456",
"someField": "Val_78",
"otherField": "Val_90",
...
}
I am trying to get all (unique, as in one instance) results for a list of ownID values, while filtering by a list of parentID values and vice-versa.
What I did so far is:
Get (separate!) unique values for ownID and parentID in key1 and key2
{
"size": 0,
"aggs": {
"key1": {
"terms": {
"field": "ownID",
"include": {
"partition": 0,
"num_partitions": 10
},
"size": 100
}
},
"key2": {
"terms": {
"field": "parentID",
"include": {
"partition": 0,
"num_partitions": 10
},
"size": 100
}
}
}
}
Use filter to get (some) results matching either ownID OR parentID
{
"size": 0,
"query": {
"bool": {
"should": [
{
"terms": {
"ownID": ["Val_1","Val_2","Val_3"]
}
},
{
"terms": {
"parentID": ["Val_8","Val_9"]
}
}
]
}
},
"aggs": {
"my_filter": {
"top_hits": {
"size": 30000,
"_source": {
"include": ["ownID", "parentID","otherField"]
}
}
}
}
}
However, I need to get separate results for each filter in the second query, and get:
(1) the parentID of the documents matching some value of ownID
(2) the ownID for the documents matching some value of parentID.
So far I managed to do it using two similar queries (see below for (1)), but I would ideally want to combine them and query only once.
{
"size": 0,
"query": {
"bool": {
"should": [
{
"terms": {
"ownID": [ "Val1", Val_2, Val_3 ]
}
}
]
}
},
"aggs": {
"my_filter": {
"top_hits": {
"size": 30000,
"_source": {
"include": "parentID"
}
}
}
}
}
I'm using Elasticsearch version 5.2
If I got your question correctly then you need to get all the aggregations count correct irrespective of the filter query but in search hits you want the filtered documents only, so for this elasticsearch has another type of filter : "post filter" : refer to this : https://www.elastic.co/guide/en/elasticsearch/reference/5.5/search-request-post-filter.html
its really simple, it will just filter the results after the aggregations have been computed.

Elasticsearch query shows more data than it has

In my contains field I have "xr" data and "xra","xrb","xrc" seperately. When I make query for the count of "xr" elasticsearch does not return me 1, it returns 4. How can I manage it?
This is my query
"aggs": {
"Group1": {
"terms": {
"field": "method.keyword",
"include": ".*POST.*",
},
"aggs": {
"Group3": {
"terms": {
"field": "contains.keyword",
"size": 11593,
}
}
},
}

Resources