Kibana and Vega with nested aggregations - elasticsearch

Given the below Vega JSON in Kibana, how can i make it dynamic such that when a user on searches for something in the search box it applies to both the query and the pos filter tags below. So let's say a user types in 'face*' in the search box then i want the values marked with <----- this to change below to dynamically change
{
"$schema": "https://vega.github.io/schema/vega/v4.3.0.json",
"autosize": "fit",
"title": "POS COUNT",
"data": [
{
"name": "data_table",
"url": {
"index": "sa_test_index_data",
"body": {
"size": 0,
"query": {
"nested": {
"path": "xforms.sentence.tokens",
"query": {
"bool": {
"should": [
{
"wildcard": {
"xforms.sentence.tokens.value.keyword": "24*" <----- this to change
}
}
]
}
}
}
},
"aggs": {
"sentence": {
"nested": {"path": "xforms.sentence.tokens"},
"aggs": {
"pos_filter": {
"filter": {
"wildcard": {"xforms.sentence.tokens.value.keyword": "24*"} <----- this to change
},
"aggs": {
"pos": {
"terms": {"field": "xforms.sentence.tokens.tag.keyword"}
}
}
}
}
}
}
}
},
"format": {"property": "aggregations.sentence.pos_filter.pos.buckets"},
"transform": [
{
"type": "collect",
"sort": {"field": ["doc_count"], "order": ["descending"]}
}
]
},
{
"name": "data_table_pie_inner",
"source": "data_table",
"transform": [
{
"type": "aggregate",
"groupby": ["key"],
"fields": ["doc_count"],
"ops": ["sum"],
"as": ["ff_sum_count"]
},
{
"type": "pie",
"field": "ff_sum_count",
"as": ["ff_inner_startAngle", "ff_inner_endAngle"]
}
]
}
],
"scales": [
{
"name": "scale_color",
"type": "ordinal",
"range": {"scheme": "category10"},
"domain": {"data": "data_table", "field": "key"}
}
],
"marks": [
{
"name": "mark_inner_ring",
"type": "arc",
"from": {"data": "data_table_pie_inner"},
"encode": {
"enter": {
"x": {"signal": "width / 2"},
"y": {"signal": "height / 2"},
"fill": {"scale": "scale_color", "field": "key"},
"fillOpacity": {"value": 0.8},
"stroke": {"value": "white"},
"startAngle": {"field": "ff_inner_startAngle"},
"endAngle": {"field": "ff_inner_endAngle"},
"innerRadius": {"value": 0},
"outerRadius": {"value": 100},
"tooltip": {
"signal": "datum['key'] + ': count ' + datum['ff_sum_count']"
}
}
}
}
],
"legends": [
{
"fill": "scale_color",
"title": "POS",
"orient": "right",
"encode": {
"symbols": {"enter": {"fillOpacity": {"value": 0.5}}},
"labels": {"update": {"text": {"field": "value"}}}
}
}
]
}

Slightly confused by the question, but I figure you are looking to incorporate the dashboard filters into the query body aswell. Make sure to use the %...% parameters in your query likeso.
I noticed in the comment you mention it going into the aggs. I don't think this is doable, maybe try using the %...% parameters in there?
{
body: {
query: {
bool: {
must: [
// This string will be replaced
// with the auto-generated "MUST" clause
"%dashboard_context-must_clause%"
{
range: {
// apply timefilter (upper right corner)
// to the #timestamp variable
#timestamp: {
// "%timefilter%" will be replaced with
// the current values of the time filter
// (from the upper right corner)
"%timefilter%": true
// Only work with %timefilter%
// Shift current timefilter by 10 units back
shift: 10
// week, day (default), hour, minute, second
unit: minute
}
}
}
]
must_not: [
// This string will be replaced with
// the auto-generated "MUST-NOT" clause
"%dashboard_context-must_not_clause%"
]
filter: [
// This string will be replaced
// with the auto-generated "FILTER" clause
"%dashboard_context-filter_clause%"
]
}
}
}
}

Related

Cannot seem to use must and must_not together in an elastic search query

If I run the following query:
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "boxing",
"fuzziness": 2,
"minimum_should_match": 2
}
}
],
"must_not": [
{
"terms_set": {
"allowedCountries": {
"terms": ["gb", "mx"],
"minimum_should_match_script": {
"source": "2"
}
}
}
}
],
"filter": [
{
"range": {
"expireTime": {
"gt": 1674061907954
}
}
},
{
"term": {
"region": {
"value": "row"
}
}
},
{
"term": {
"sourceType": {
"value": "article"
}
}
}
]
}
}
}
against an index with articles that look like:
{
"_index": "content-items-v10",
"_type": "_doc",
"_id": "e7hm75ui4dma1mm4j8q5v7914",
"_score": 4.3724976,
"_source": {
"allowedCountries": ["gb", "ie"],
"body": "Both Joshua Buatsi and Craig Richards join The DAZN Boxing Show ahead of their clash at London's O2 Arena. Matchroom's Eddie Hearn also gives his take on the night, as well as Chantelle Cameron previewing her contest with Victoria Noelia Bustos.",
"competitions": [
{
"id": "8lo6205qyio0fksjx9glqbdhj",
"name": "Buatsi v Richards"
}
],
"contestants": [
{
"id": "7rq59j3eiamxlm12vhxcsgujj",
"name": "Joshua Buatsi"
},
{
"id": "boby9oqe23g6qyuwphrxh8su5",
"name": "Craig Richards"
}
],
"countries": [
{
"id": "7yasa43laq1nb2e6f8bfuvxed",
"name": "World"
},
{
"id": "258l9t5sm55592i08mdpqzr3t",
"name": "United Kingdom"
}
],
"dotsLastUpdateTime": 1673979749396,
"expireTime": 4800000000000,
"fixtureDate": {},
"headline": "Buatsi vs. Richards: Preview",
"id": "e7hm75ui4dma1mm4j8q5v7914",
"importance": 0,
"languageKeys": ["en"],
"languages": ["en"],
"lastUpdateTime": {
"ts": 1653088281000,
"iso8601": "2022-05-20T23:11:21.000Z"
},
"promoImageUrl": null,
"publication": {
"typeId": "1plcw0iyhx9vn1fcanbm2ja3rf",
"typeName": "Shoulder"
},
"publishedTime": {
"ts": 1653088281000,
"iso8601": "2022-05-20T23:11:21.000Z"
},
"region": "row",
"shortHeadline": null,
"sourceType": "article",
"sports": [
{
"id": "2x2oqzx60orpoeugkd754ga17",
"name": "Boxing"
}
],
"teaser": "",
"thumbnailImageUrl": "https://images.daznservices.com/di/library/babcock_canada/45/3e/the-dazn-boxing-show-20052022_xc4jbfqi022l1shq9lu641h9e.png?t=-477976832",
"translations": {}
}
}
I get the following validation error from elasticsearch:
{
"ok": false,
"errors": {
"validation": [
{
"message": "\"query.bool.must_not\" is not allowed",
"path": [
"query",
"bool",
"must_not"
],
"type": "object.unknown",
"context": {
"child": "must_not",
"label": "query.bool.must_not",
"value": [
{
"terms_set": {
"allowedCountries": {
"terms": [
"gb",
"mx"
],
"minimum_should_match_script": {
"source": "2"
}
}
}
}
],
"key": "must_not"
}
}
]
},
"correlationId": "d29e9275-9ab3-4ff8-944d-852b98d4b503"
}
And I cannot figure out what the issue might be! From the elastic docs it should be OK.
I'm using ElasticSearch 7.9.3 running in a local docker container.
I'm hoping someone out there will give me a clue!
Cheers!
I would expect this to just work.
I'm trying to filter out articles that have both of the country codes gb and mx in the field allowedCountries.
I can include them easily enough in the results when I add the terms_set query to the bool.must section of the query.
It works well, you just need to enclose your query in the query section
{
"query": { <--- add this
"bool": { <--- your query starts here
"must": [
...
Thank you for responding!
I was helping with a system I did not have full context on - it turns out there is a proxy in the mix with validation that was blocking the must_not query. So, with the proxy fixed, it now works.

Elasticsearch: Alert on significant data change

I have an index with exchange rates change log. Documents inside index look like this
{
"sourceId": "gh-ghs",
"targetCountry": "gh",
"targetCurrency": "ghs",
"rate": 2.3,
"modified": "2021-04-07T12:00:57.2760000Z",
},
{
"sourceId": "gh-ghs",
"targetCountry": "gh",
"targetCurrency": "ghs",
"rate": 2.5,
"modified": "2021-04-06T12:00:57.2760000Z",
},
{
"sourceId": "mx-mxn",
"targetCountry": "mx",
"targetCurrency": "mxn",
"rate": 20.3,
"modified": "2021-04-08T12:00:57.2760000Z",
},
{
"sourceId": "mx-mxn",
"targetCountry": "mx",
"targetCurrency": "mxn",
"rate": 2.2,
"modified": "2021-04-07T12:00:57.2760000Z",
},
{
"sourceId": "mx-mxn",
"targetCountry": "mx",
"targetCurrency": "mxn",
"rate": 2.23,
"modified": "2021-04-06T12:00:57.2760000Z",
}
As you can see we have exchange rate change per destination (country+currency). Take a look mx-mxn documents. Last change was from 2.2 to 20.3. Probably it is human mistake, and we want to alert about such cases.
I tried to create following query to find significant changes
{
"query": {
"bool": {
"must": [],
{
"range": {
"modified": {
"format": "strict_date_optional_time",
"gte": "now",
"lte": "now - 5h"
}
}
}
],
"should": [],
"must_not": []
}
},
"aggs": {
"group": {
"terms": {
"field": "sourceId",
"size": 1000
},
"aggs": {
"2-metric": {
"top_metrics": {
"metrics": {
"field": "rate"
},
"size": 2,
"sort": {
"modified": "desc"
}
}
}
}
}
}
}
Using this query I managed to get to latest changes by destination. Response look like this
{
"aggregations": {
"group": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "mx-mxn",
"doc_count": 25,
"2-metric": {
"top": [
{
"sort": [
"2022-10-16T15:01:57.098Z"
],
"metrics": {
"rate": 12
}
},
{
"sort": [
"2022-10-16T13:01:57.098Z"
],
"metrics": {
"rate": 150
}
}
]
}
},
{
"key": "gh-ghs",
"doc_count": 18,
"2-metric": {
"top": [
{
"sort": [
"2022-10-14T11:28:38.995Z"
],
"metrics": {
"rate": 11.25
}
},
{
"sort": [
"2022-10-13T11:37:09.945Z"
],
"metrics": {
"rate": 10.9609375
}
}
]
}
}
]
}
}
}
So I managed to get two latest changes for each destination. But I want to setup alert for all buckets, where value changed more than on 10 percent. In this case it is mx-mxn. How can I do it in Elastic and Kibana ?
Tldr;
To gather the "detection" data:
anomaly detection jobs
derivative aggregation
To perform the alert:
Kibana alerting with machine learning job.
Watcher with elasticsearch query input.

Elasticsearch - Query to Determine All Unique IDs that are distance X away from a particular ID?

I have data in this format generated from a random walk (to simulate people walking around). It is set up in this manner { location : { lat: someLat, lon: someLong }, id: uniqueId, date:date }. I am trying to write a query given a users unique ID, find how many other unique IDs came within X distance of the given ID between a certain time range. Any hints on how to accomplish this?
My idea is to have a top level filter aggregration, with a nested geo-query of some sort. I think the geo-distance query is the way to go, but I am not sure how to include it into the below query to get all of unique IDs that come within X distance of the ID I am filtering on. The query below is where I am starting from, I am filtering all documents from now - 1 day to now, where the documents user Id is the provided value. How would I check all other documents for their distances against documents that match this query?
{
"aggs" : {
"range": {
"date_range": {
"field": "date",
"format": "MM-yyyy",
"ranges": [
{ "to": "now" },
{ "from": "now-1d" }
]
}
},
"locations" : {
"filter" : {
"term": { "id.keyword": "7a50ab18-886b-42a2-80ad-3d45112e3cfd" }
}
}
}
}
Your hunch is correct. All of this can be done using range & geo_distance filtering and _geo_distance sorting. You wanna filter on the query-level, not in the aggs though:
GET walking/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "now-1d"
}
}
}
],
"filter": [
{
"geo_distance": {
"distance": "20m",
"location": {
"lat": 48.20150179951008,
"lon": 16.39111876487732
}
}
}
]
}
},
"aggs": {
"rings_around_loc": {
"geo_distance": {
"field": "location",
"origin": {
"lat": 48.20150179951008,
"lon": 16.39111876487732
},
"unit": "m",
"keyed": true,
"ranges": [
{
"to": 10
},
{
"from": 10,
"to": 50
},
{
"from": 50
}
]
}
},
"locations": {
"value_count": {
"field": "id.keyword"
}
}
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": 48.20150179951008,
"lon": 16.39111876487732
},
"order": "asc",
"unit": "m",
"mode": "min",
"distance_type": "arc",
"ignore_unmapped": true
}
}
]
}
Not sure what you need the range buckets for so I left them out.
Full steps to replicate:
PUT walking
{
"mappings": {
"properties": {
"date": {
"type": "date"
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"location": {
"type": "geo_point"
}
}
}
}
And then POST _bulk this random walk data

Nested aggregation in nested aggregation query

Having the below (abbreviated) document in Elastic Search 7.1. Focusing on questions.influencerReponse.selectAllThatApplyResponses path.
{
"questions": [
{
"questionId": "79cfc6e7-731e-4d83-9dd6-82f4f39fff03",
"questionKind": "select_all_that_apply",
"questionText": "Have you heard of any of the following charities?",
"questionOptions": {
"1": "Plan International",
"2": "Young Women's Trust",
"3": "Women For Refugee Women",
"4": "The FPA"
},
"influencerReponse": {
"questionId": "79cfc6e7-731e-4d83-9dd6-82f4f39fff03",
"questionKind": "select_all_that_apply",
"text": null,
"questionOrder": 3,
"order": null,
"shortAnswerResponse": null,
"viewerSentimentResponse": null,
"yesNoResponse": null,
"selectAllThatApplyResponses": [
{
"key": "2",
"value": "Young Women's Trust"
}
]
}
}
]
}
I want to get the term aggregations for the key or the value, both are keyword type. I accomplished that before but not in the level of selectAllThatApplyResponses nested type.
Here's what I have so far and throwing the below error.
{
"query": {
"bool": {
"must": [
{
"term": {
"sponsorshipId": {
"value": "33c7140f-23ae-46f2-a0fe-49e2251114e4"
}
}
}
]
}
},
"track_total_hits": true,
"size": 0,
"aggs": {
"select_all_that_apply_responses": {
"nested": {
"path": "questions"
},
"aggs": {
"filter_types": {
"filter": {
"bool": {
"must": [
{
"match": {
"questions.questionId": "79cfc6e7-731e-4d83-9dd6-82f4f39fff03"
}
}
]
}
},
"aggs": {
"select_all_that_apply_nested": {
"nested": {
"path": "questions.influencerReponse.selectAllThatApplyResponses"
},
"aggs": {
"terms": {
"field": "questions.influencerReponse.selectAllThatApplyResponses.key"
}
}
}
}
}
}
}
}
}
I am receiving the below error.
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "Expected [START_OBJECT] under [field], but got a [VALUE_STRING] in [terms]",
"line": 42,
"col": 46
}
],
"type": "parsing_exception",
"reason": "Expected [START_OBJECT] under [field], but got a [VALUE_STRING] in [terms]",
"line": 42,
"col": 46
},
"status": 400
}
The final terms agg needs a name too -- I called it select_all_that_apply_nested_terms.
...
"select_all_that_apply_nested":{
"nested":{
"path":"questions.influencerReponse.selectAllThatApplyResponses"
},
"aggs":{
"select_all_that_apply_nested_terms":{
"terms":{
"field":"questions.influencerReponse.selectAllThatApplyResponses.key"
}
}
}
}
...

Function Score On Nested Object

I have this index blog with the following settings and mappings.
PUT /blog
{
"settings": {
"index": {
"number_of_shards": "1"
}
},
"mappings": {
"post": {
"_all": {
"enabled": false
},
"properties": {
"title": {
"type": "string"
},
"content": {
"type": "string"
},
"visitor": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
},
"last_visit": {
"type": "date",
"format": "yyyy-MM-dd"
}
}
}
}
}
}
}
I want to rank my posts based on relevancy and visitor's last visit. I tried this query without success. It seems like the gauss function cannot get the value of visitor's last_visit. How to get this worked?
POST /blog/post/_search
{
"query": {
"function_score": {
"functions": [
{
"gauss": {
"visitor.last_visit": {
"origin": "now/d",
"offset": "3d",
"scale": "4d",
"decay": 0.5
}
},
"filter": {
"nested": {
"path": "visitor",
"query": {
"term": {
"visitor.id": "1"
}
}
}
}
}
]
}
}
}
Here is a query with a match for a name that uses a nested object that I had for a particular use case. I didn't use any date fields, but as I said, it does use a nested object. I used relevancy of distance along with a text match, so it's similar.
I used the answer from this question to structure my query as it matched what I was trying to do. Scoring documents by text match and distance
GET dev_search_core_data/_search?size=200
{
"query": {
"bool": {
"should": [
{
"match": {
"NAME": "Amy Smith"
}
},
{
"bool": {
"must": [
{
"function_score": {
"query": {
"nested": {
"path": "LOCATION",
"query": {
"term": {
"LOCATION.SOME_IND": {
"value": true
}
}
}
}
},
"functions": [
{
"gauss": {
"LOCATION.COORDINATES": {
"origin": "-118.309, 34.041",
"scale": "50km",
"offset": "10km",
"decay": 0.5
}
}
}
]
}
}
]
}
}
]
}
}
}
I think the problem is with the structure of your query. I always run this command first to validate my queries if I'm having any problems to eliminate any syntax issues.
GET dev_search_core_data/_validate/query?explain
This was the result:
{
"valid": true,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"explanations": [
{
"index": "dev_search_core_data_b",
"valid": true,
"explanation": "filtered((NAME:amy NAME:smith) (+function score (ToParentBlockJoinQuery (filtered(LOCATION.SOME_IND:true)->random_access(_type:_LOCATION)),function=org.elasticsearch.index.query.functionscore.DecayFunctionParser$GeoFieldDataScoreFunction#274227b9)))->cache(org.elasticsearch.index.search.nested.NonNestedDocsFilter#1012ada6)"
}
]
}
I also looked at the docs for an in-depth explanation of how the function score worked. You don't mention your version, but I'm using ES 1.6.

Resources