Nest Sum aggregation with sub terms aggregation

Nest Sum aggregation with sub terms aggregation - elasticsearch

I created aggregations includes a terms aggregate with a sum aggregate for each term bucket and will sort on the sum value. This works fine. However if I add a nested terms aggregation
which is the second commented block in the code snippet. It fails with this error message "{"type":"aggregation_execution_exception","reason":"Invalid aggregation order path [AggCPUTimeMS].}" and after I remove the "Order" line it works just fine. Is it not possible to sort the bucket by a sum if there is sub aggregation?
To Clarify what I need:
I want top level bucket group by "System" and I pick top n (this is why I need to order on this level) then inside each "System" bucket I create second level buckets group by "QueryHash". So it is not 2 groups at the same level, it is nested groups.
Another question is about the first commented block: what is the difference between "Order" and "BucketSort"? My guess is "Order" will sort the buckets and pick the top n while "BucketSort" will sort the buckets already picked, randomly or not.
var response = client.Search<SearchResultBucket>(search => search
.Size(0)
.RequestConfiguration(r => r.DisableDirectStreaming())
.Aggregations(aggContainer => aggContainer
.Terms("topLevelAggregation", termsAgg => termsAgg
.Field(new Field("System.keyword"))
.Size(5)
.Aggregations(aggContainer => aggContainer
.Sum("AggCPUTimeMS", sumAgg => sumAgg
.Field(new Field("CpuTimeMilliseconds"))
)
//.BucketSort("sum_bucket_sort", bs => bs
// .Sort(s => s
// .Descending("AggCPUTimeMS")
// )
//)
)
.Order(o => o.Descending("AggCPUTimeMS"))
//.Aggregations(aggContainer => aggContainer
// .Terms("SubAggregation1", termsAgg => termsAgg
// .Field(new Field("QueryHash.keyword"))
// )
//)
)
)
Mapping here:
{
"idx-au2-prod-sqlcpumonitoring-prod-2022.06.17-000043" : {
"mappings" : {
"dynamic_templates" : [
{
"message_field" : {
"path_match" : "message",
"match_mapping_type" : "string",
"mapping" : {
"norms" : false,
"type" : "text"
}
}
},
{
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
},
"norms" : false,
"type" : "text"
}
}
}
],
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "keyword"
},
"BatchResult" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"ClientAppName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"ClientHostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"ClientProcessId" : {
"type" : "long"
},
"CollectSystemDateUtc" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"CollectSystemTimeUtc" : {
"type" : "date"
},
"CpuTimeMilliseconds" : {
"type" : "long"
},
"CurrentVersion" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"DatabaseName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"Duration" : {
"type" : "long"
},
"EventName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"ExeDate" : {
"type" : "date"
},
"IncrementalId" : {
"type" : "long"
},
"LogicalReads" : {
"type" : "long"
},
"Owner" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"OwnerId" : {
"type" : "long"
},
"PhysicalReads" : {
"type" : "long"
},
"Qty" : {
"type" : "long"
},
"QueryHash" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"QueryPlanHash" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"QueryType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"QueryTypeId" : {
"type" : "long"
},
"ResourceGroupID" : {
"type" : "long"
},
"ResourcePoolID" : {
"type" : "long"
},
"RowCount" : {
"type" : "long"
},
"ServerInstanceName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"ServerPrincipalName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"SessionID" : {
"type" : "long"
},
"SourceTableName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"SqlText" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 32766
}
},
"norms" : false
},
"System" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"norms" : false
},
"SystemId" : {
"type" : "long"
},
"TaskTime" : {
"type" : "long"
},
"Writes" : {
"type" : "long"
},
"geoip" : {
"dynamic" : "true",
"properties" : {
"ip" : {
"type" : "ip"
},
"latitude" : {
"type" : "half_float"
},
"location" : {
"type" : "geo_point"
},
"longitude" : {
"type" : "half_float"
}
}
}
}
}
}
}
document example:
{
"_index": "idx-au2-prod-sqlcpumonitoring-prod-2022.06.17-000043",
"_id": "lDoxiYEBssAtKosd_uKJM",
"_version": 1,
"_score": 1,
"_source": {
"QueryType": "Regular",
"SqlText": "BACKUP LOG #DbName TO DISK = #FilePath WITH INIT, NOSKIP, RETAINDAYS=1, NAME = 'LogBackup', NO_COMPRESSION;\n\t\t\t",
"LogicalReads": 417,
"QueryTypeId": 1,
"QueryHash": "0",
"CollectSystemDateUtc": "2022.06.22",
"DatabaseName": "FO8PRD",
"ResourceGroupID": 280,
"System": "Unknown",
"SessionID": 3027,
"#version": "1",
"Writes": 8,
"ClientAppName": "ServiceRunner",
"ClientProcessId": 40792,
"Owner": "CORE",
"CollectSystemTimeUtc": "2022-06-22T02:02:50.293",
"QueryPlanHash": "0",
"Duration": 1133,
"SystemId": 14,
"Qty": 1,
"#timestamp": "2022-06-22T02:15:45.711Z",
"CpuTimeMilliseconds": 16,
"EventName": "sp_statement_completed",
"PhysicalReads": 99,
"ResourcePoolID": 257,
"TaskTime": 43216568,
"CurrentVersion": "2.17.41",
"RowCount": 8,
"IncrementalId": 18894454167,
"ClientHostname": "SPRC-015",
"ServerInstanceName": "INSTANCE1",
"ExeDate": "2022-06-16T23:00:00+10",
"OwnerId": 3,
"ServerPrincipalName": "Admin"
}

Related

AWS OpenSearch. Replica reappearing after migrating from hot to warm

I was having a problem dropping replicas when transitioning to warm nodes and I found out that the replicas need to be dropped while hot because the ultrawarm nodes have read-only S3 backed storage. So, I then added another hot stage called "hot_no_replicas" and I was seeing that in fact the replicas were being dropped as planned, However, after the warm migration, the replicas are reappearing. You can see it clearly here in the screenshot. 000022 still has a replica and is on hot. 000021, being older, has had the replica dropped and is still on hot. But 000020 being older than 000021 and having migrated to warm, now has a replica again. I will paste the logging policy and attach screenshot. Does anyone know why this is happening?
ps, I added another , probably superfluous "number_of_replicas" : 0" to the warm state to try one more time to get them removed before the warm transition.
{
"_id" : "shortterm_logging_policy",
"_version" : 37,
"_seq_no" : 1640996,
"_primary_term" : 1,
"policy" : {
"policy_id" : "shortterm_logging_policy",
"description" : "hot-warm-delete",
"last_updated_time" : 1672416827543,
"schema_version" : 14,
"error_notification" : null,
"default_state" : "hot",
"states" : [
{
"name" : "hot",
"actions" : [
{
"retry" : {
"count" : 3,
"backoff" : "exponential",
"delay" : "1m"
},
"rollover" : {
"min_size" : "1gb",
"min_doc_count" : 6000000,
"min_index_age" : "1d"
}
}
],
"transitions" : [
{
"state_name" : "hot_no_replicas",
"conditions" : {
"min_index_age" : "2d"
}
}
]
},
{
"name" : "hot_no_replicas",
"actions" : [
{
"retry" : {
"count" : 3,
"backoff" : "exponential",
"delay" : "1m"
},
"replica_count" : {
"number_of_replicas" : 0
}
}
],
"transitions" : [
{
"state_name" : "warm",
"conditions" : {
"min_index_age" : "3d"
}
}
]
},
{
"name" : "warm",
"actions" : [
{
"retry" : {
"count" : 3,
"backoff" : "exponential",
"delay" : "1m"
},
"replica_count" : {
"number_of_replicas" : 0
}
},
{
"retry" : {
"count" : 3,
"backoff" : "exponential",
"delay" : "1m"
},
"warm_migration" : { }
}
],
"transitions" : [
{
"state_name" : "delete",
"conditions" : {
"min_index_age" : "7d"
}
}
]
},
{
"name" : "delete",
"actions" : [
{
"retry" : {
"count" : 3,
"backoff" : "exponential",
"delay" : "1m"
},
"delete" : { }
}
],
"transitions" : [ ]
}
],
"ism_template" : [
{
"index_patterns" : [
"filebeat-syslog-*"
],
"priority" : 1,
"last_updated_time" : 1652815954078
},
{
"index_patterns" : [
"filebeat-mapi_logs-*"
],
"priority" : 2,
"last_updated_time" : 1666124989125
},
{
"index_patterns" : [
"filebeat-proftpd-*"
],
"priority" : 3,
"last_updated_time" : 1666126038959
}
]
}
}
Here I have run explain on 000017 which is in the same state as 000020 was.
{
"filebeat-syslog-000017" : {
"aliases" : {
"all_logs" : { },
"filebeat-syslog-write" : {
"is_write_index" : false
}
},
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"agent" : {
"properties" : {
"ephemeral_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"apenv" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"app" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"beat" : {
"properties" : {
"hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"close_inactive" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ecs" : {
"properties" : {
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"host" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"input" : {
"properties" : {
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"log" : {
"properties" : {
"file" : {
"properties" : {
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"offset" : {
"type" : "long"
}
}
},
"log_category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"offset" : {
"type" : "long"
},
"prospector" : {
"properties" : {
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"received_at" : {
"type" : "date"
},
"received_from" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"site" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"source" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"syslog_hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"syslog_message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"syslog_pid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"syslog_program" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"syslog_timestamp" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timezone" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"mapping" : {
"total_fields" : {
"limit" : "3000"
},
"ignore_malformed" : "true"
},
"refresh_interval" : "30s",
"translog" : {
"flush_threshold_size" : "1024mb"
},
"plugins" : {
"index_state_management" : {
"rollover_alias" : "filebeat-syslog-write"
}
},
"provided_name" : "filebeat-syslog-000017",
"query" : {
"default_field" : "message"
},
"creation_date" : "1672592177394",
"number_of_replicas" : "1",
"uuid" : "o6UaHr1oQaaDMQHDEe0bkA",
"version" : {
"created" : "135248027"
},
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_hot"
}
}
},
"number_of_shards" : "3",
"merge" : {
"scheduler" : {
"max_thread_count" : "1"
}
}
}
}
}
}

Fuzzy query doesn't work on text type, but works on keyword type

I have a query that works only on keyword type and I can't figured out why.
However, if I use a Match query plus a fuzziness parameter I can make it work with the text type.
Why is this happening?
Please see find the queries below
(the working query should return Eddie's documents.)
1) FUZZY QUERY TEXT TYPE -> NOT WORKING
GET kibana_sample_data_ecommerce/_search
{
"query": {
"fuzzy": {
"customer_first_name": {
"value": "Eddi",
"fuzziness": "AUTO"
}
}
}
}
2) FUZZY QUERY KEYWORD TYPE - WORKING
GET kibana_sample_data_ecommerce/_search
{
"query": {
"fuzzy": {
"customer_first_name.keyword": {
"value": "Eddi",
"fuzziness": "AUTO"
}
}
}
}
3) MATCH QUERY + FUZINESS -> WORKING
GET kibana_sample_data_ecommerce/_search
{
"query": {
"match": {
"customer_first_name.keyword": {
"query": "Eddi",
"fuzziness": "Auto"
}
}
}
}
INDEX SETTINGS
{
"kibana_sample_data_ecommerce" : {
"aliases" : { },
"mappings" : {
"properties" : {
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"currency" : {
"type" : "keyword"
},
"customer_birth_date" : {
"type" : "date"
},
"customer_first_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"customer_full_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"customer_gender" : {
"type" : "keyword"
},
"customer_id" : {
"type" : "keyword"
},
"customer_last_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"customer_phone" : {
"type" : "keyword"
},
"day_of_week" : {
"type" : "keyword"
},
"day_of_week_i" : {
"type" : "integer"
},
"email" : {
"type" : "keyword"
},
"geoip" : {
"properties" : {
"city_name" : {
"type" : "keyword"
},
"continent_name" : {
"type" : "keyword"
},
"country_iso_code" : {
"type" : "keyword"
},
"location" : {
"type" : "geo_point"
},
"region_name" : {
"type" : "keyword"
}
}
},
"manufacturer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"order_date" : {
"type" : "date"
},
"order_id" : {
"type" : "keyword"
},
"products" : {
"properties" : {
"_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"base_price" : {
"type" : "half_float"
},
"base_unit_price" : {
"type" : "half_float"
},
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"created_on" : {
"type" : "date"
},
"discount_amount" : {
"type" : "half_float"
},
"discount_percentage" : {
"type" : "half_float"
},
"manufacturer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"min_price" : {
"type" : "half_float"
},
"price" : {
"type" : "half_float"
},
"product_id" : {
"type" : "long"
},
"product_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "english"
},
"quantity" : {
"type" : "integer"
},
"sku" : {
"type" : "keyword"
},
"tax_amount" : {
"type" : "half_float"
},
"taxful_price" : {
"type" : "half_float"
},
"taxless_price" : {
"type" : "half_float"
},
"unit_discount_amount" : {
"type" : "half_float"
}
}
},
"sku" : {
"type" : "keyword"
},
"taxful_total_price" : {
"type" : "half_float"
},
"taxless_total_price" : {
"type" : "half_float"
},
"total_quantity" : {
"type" : "integer"
},
"total_unique_products" : {
"type" : "integer"
},
"type" : {
"type" : "keyword"
},
"user" : {
"type" : "keyword"
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "1",
"auto_expand_replicas" : "0-1",
"provided_name" : "kibana_sample_data_ecommerce",
"creation_date" : "1579684918696",
"number_of_replicas" : "0",
"uuid" : "Ga3UfyyAQjGpa5JDbJB7Sw",
"version" : {
"created" : "7050299"
}
}
}
}
}

Fuzzy Query is a Term Level Query. Which means that queries are not analyzed.
WHY QUERY 1) FAILS
If you query for "Eddi" it will be compared with analyzed text, in this case "eddie".
And from 'Eddi' to 'eddie' it is a 2 edits distance.
Hence the query will not succeed because terms between 3 and 5 length have the max edit distance is 1 (with the "fuzziness: AUTO" configuration)
WHY QUERY 2) SUCCEDS
In other hand if you use Keyword, Keywords are store without being analyzed. and therefore. Eddi is a 1 edit distance from Eddie.

Remove stopwords while querying using GET Request in Elasticsearch

I'm trying to implement Stop Token Filter in an index of Elasticsearch. I've following code taken from here.
PUT /test1
{
"settings": {
"analysis": {
"filter": {
"my_stop": {
"type": "stop",
"stopwords": "_english_"
}
}
}
}
}
I have my data stored in JSON format and have a field named as Ingredients which contains stopped words. I want to search through the whole index (containing almost 80k records) about the top 100 most appeared values in Ingredients tag. The query I'm using to retrieve the results is
GET test1/_search?size=0&pretty
{
"aggs": {
"genres": {
"terms": {
"field": "Ingredients",
"size": 100,
"exclude": "[0-9].*"
}
}
}
}
I need to exclude Numbers from it for which I'm using exclude.
But applying the above query using Kibana it doesn't remove the Stop Words and keeps them displayed while querying response.
As per the documentation, it should remove the stopped words but it isn't doing that. I'm unable to find the cause as I'm a newbie in Elasticsearch. Please help me figure it out.
I'm using elasticsearch-7.3.1 and Kibana-7.3.1.
I'm working over it for about two days but none of the methods is working. Thanks! Any help would be really appreciated.
If I try it using this way, it works but while putting the GET request as per the method defined above, it doesn't work at all.
POST test1/_analyze
{
"analyzer": "my_stop",
"text": "House of Dickson<br> corp"
}
My Mapping
{
"recipe_test" : {
"aliases" : { },
"mappings" : {
"properties" : {
"Author" : {
"properties" : {
"additionalInfo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"eval" : {
"type" : "boolean"
},
"url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Channel" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Cousine" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Ingredients" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"fielddata" : true
},
"Keywords" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"MakingMethod" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Publication" : {
"properties" : {
"additionalInfo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"eval" : {
"type" : "boolean"
},
"published" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Rating" : {
"properties" : {
"bestRating" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ratingCount" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ratingValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"worstRating" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Servings" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Timings" : {
"properties" : {
"cookTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"prepTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"totalTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"recipe_url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "1",
"provided_name" : "recipe_test",
"creation_date" : "1567443878756",
"analysis" : {
"filter" : {
"english_stop" : {
"type" : "stop",
"stopwords" : "_english_"
}
},
"analyzer" : {
"rebuilt_stop" : {
"filter" : [
"asciifolding",
"lowercase",
"english_stop"
],
"tokenizer" : "standard"
}
}
},
"number_of_replicas" : "1",
"uuid" : "K-FrOyc6QlWokGQoN6HxCg",
"version" : {
"created" : "7030199"
}
}
}
}
}
My Example Data
{
"recipe_url": "http1742637/bean-and-pesto-mash",
"Channel": "waqas",
"recipe_id":"31",
"Title": "Bean & pesto mash",
"Rating": {
"ratingValue": "4.625",
"bestRating": "5",
"worstRating": "1",
"ratingCount": "8"
},
"Timings": {
"cookTime": "PT5M",
"prepTime": "PT5M",
"totalTime": "PT10M"
},
"Author": {
"eval": false,
"value": "dfgkkdfgdfgfmes",
"url": "https://www.example.com/",
"additionalInfo": "Recipe from Good Food magazine, ",
"description": "Substitute potatoes with pulses for a healthy alternative mash with a chunky texture",
"published": "November 2011"
},
"Publication": {
"eval": false,
"value": "",
"url": "",
"additionalInfo": "",
"published": ""
},
"Nutrition": "per serving",
"NutritionContents": {
"kcal": "183",
"fat": "5g",
"saturates": "1g",
"carbs": "25g",
"sugars": "3g",
"fibre": "7g",
"protein": "11g",
"salt": "0.84g"
},
"SkillLevel": "Easy",
"Ingredients": [
"drizzle", "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus"
],
"MakingMethod": [
"Heat the oil in a large saucepan. Add the beans and cook for 3-4 mins until hot through. Lightly mash with a potato masher for a chunky texture. Stir through the pesto and season. To serve, drizzle with a little olive oil, if you like."
],
"Keywords": [
"Cannellini bean",
"Cannellini beans",
"Mash",
"Beans",
"Super healthy",
"Pulses",
"5-a-day",
"Low fat",
"Diet",
"Dieting",
"Side dish",
"Bangers and mash",
"Sausage and mash",
"Texture",
"Fireworks",
"Pesto",
"Easy",
"Vegetarian",
"Healthy",
"Bonfire Night"
],
"Category": [
"Side dish",
"Dinner"
],
"Cousine": "British",
"Servings": "Serves 4"
}

There is no easy way on how to do this.
Option 1
Enable fielddata on the text field on which you applied the correct analyzer. Something like this:
{
"settings": {
"analysis": {
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "_english_"
}
},
"analyzer": {
"rebuilt_stop": {
"filter": [
"asciifolding",
"lowercase",
"english_stop"
],
"tokenizer": "standard"
}
}
}
},
"mappings": {
"properties": {
"Ingredients": {
"type": "text",
"analyzer": "rebuilt_stop",
"fielddata": true
}
}
}
}
And then you run your terms aggregation.
Drawbacks: it could use a lot of memory because of fielddata usage.
Option 2
Use term vectors API. Since you are interested in the most used "values"/"terms" in Ingredients field you could call this API on one document in the index and have the total term frequency for each term in that specific document. The drawback: you need to specify a certain document ID and only the terms in that document will be reported back.
Something like this:
GET /test/_termvectors/1
{
"fields" : ["Ingredients"],
"offsets" : false,
"payloads" : false,
"positions" : false,
"term_statistics" : true,
"field_statistics" : false
}
Option 3
Probably the ugliest one. Something around these lines: Elasticsearch: index a field with keyword tokenizer but without stopwords
Advantage: is not using fielddata (heap memory). Drawback: you'd have to define the stopwords manually in the char_filter definition.

Has_parent query returns no hits in elasticsearch 6.5.4

I'm currently trying to find all children of parents that match certain query using the following has_parent query:
GET my_index*/_search
{
"query": {
"has_parent": {
"parent_type": "threat",
"query": {
"term": {
"type.keyword": {
"value": "ip"
}
}
}
}
}
}
But it returns no hits, even with a match_all query.
The mapping of the index is as follows:
"my_index" : {
"mappings" : {
"doc" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"asn_info" : {
"properties" : {
"as_org" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"asn" : {
"type" : "long"
}
}
},
"campaign" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"category_description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"confidence" : {
"type" : "float"
},
"criticity" : {
"type" : "float"
},
"detection_date" : {
"type" : "float"
},
"feed" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"feeds" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"geo" : {
"properties" : {
"city_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_code2" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_code3" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"latitude" : {
"type" : "float"
},
"longitude" : {
"type" : "float"
}
}
},
"hierarchy" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"threat" : "date"
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ip" : {
"type" : "long"
},
"ip_address" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"port" : {
"type" : "long"
},
"subcategory" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "date"
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
As you can see, the hierarchy field is a join field, with "threat" defined as parent of "date". I don't see any problem with this. Does anyone know what could be happening?

Average value of array subfield in elasticsearch

I have around 1.06 M docs formatted like this :
"geometry" : {
"type" : "Polygon",
"coordinates" : [
[
[
-0.3994018,
43.547069
],
[
-0.3994509,
43.5469605
],
[
-0.3995154,
43.5469097
],
[
-0.3995879,
43.5468503
],
[
-0.3996707,
43.5467618
],
[
-0.3997065,
43.546666
]
]
]
},
"properties" : {
"commune" : "64063",
"section" : "A",
"numero" : "105",
"id" : "640630000A0105",
"contenance" : 12280,
"prefixe" : "000",
"updated" : "2014-06-18",
"created" : "2013-11-14"
},
"id" : "640630000A0105",
"type" : "Feature"
I'd like to request all the records where the average of geometry.coordinates[0][X][0] (longitude) is between two values and same thing for geometry.coordinates[0][X][1] (latitude).
How can I ask this to elasticsearch ? range + avg ?
EDIT : here is the data mapping
{
"parcelles" : {
"mappings" : {
"parcelle" : {
"properties" : {
"geometry" : {
"properties" : {
"coordinates" : {
"type" : "float"
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"properties" : {
"properties" : {
"commune" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"contenance" : {
"type" : "long"
},
"created" : {
"type" : "date"
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"numero" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"prefixe" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"section" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"updated" : {
"type" : "date"
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
I'm pretty new to elastic so I don't have all the knowledge needed to spot some obvious mistakes.
Thanks

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Nest Sum aggregation with sub terms aggregation - elasticsearch

Related

AWS OpenSearch. Replica reappearing after migrating from hot to warm

Fuzzy query doesn't work on text type, but works on keyword type

Remove stopwords while querying using GET Request in Elasticsearch

Has_parent query returns no hits in elasticsearch 6.5.4

Average value of array subfield in elasticsearch

Categories

Resources