How do I query nested with normal match query together? - elasticsearch

I want to fire nestedQuery on addresses and multiMatchQuery on name in single query. I tried few ways but I am getting "[bool] query does not support [nested]". I don't know whether this is possible or not (ES version: 7.x).
When I separately querying (i.e.nestedQuery() & multiMatchQuery()) that time it is working fine.
Please help me with that.
This is the mapping I am using:
{
"employee" : {
"mappings" : {
"properties" : {
"addresses" : {
"type" : "nested",
"properties" : {
"permanentAddress" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"TemporaryAddress" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
1. {
"query": {
"nested": {
"path": "addresses",
"query": {
"bool": {
"must": [
{ "match": { "addresses.permanentAddress": "xxx" } }
]
}
},
"score_mode": "avg"
}
}
}
2. {
"query": {
"bool": {
"must" : [
{
"multi_match" : {
"query" : "xxx",
"fields" : [
"name^1.0"
],
"type" : "best_fields",
"boost" : 1.0
}
}
]
}
}
}
nestedQuery() = looking for xxx value in addresses.permanentAddress
multi_match() = looking for xxx value in name
If value of name or addresses.permanentAddress matches with xxx then returns the result.

"bool" : {
"should" : [
{
"bool" : {
"must" : [
{
"match" : {
"name" : {
"query" : "xxx",
"operator" : "AND",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
{
"nested" : {
"query" : {
"bool" : {
"must" : [
{
"match" : {
"employee.permanentAddress" : {
"query" : "xxx",
"operator" : "AND",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
"path" : "employee",
"ignore_unmapped" : false,
"score_mode" : "none",
"boost" : 1.0,
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
This Bool query with nested worked for me and with that I can able to check parent's as well as nested property.

Related

ElasticSearch - how to get aggregation of aggregation

I am very new to elasticsearch
I work for a dating website that has data as follows:
Single - with fields: name, signUpDate, state, and other data fields.
Encounter - with fields: state, encounterDate, singlesInvolved, and other data fields.
These are my 2 indexes
Now I have to write a query that returns as follows:
For every state, how many singles, how many encounters, the longest time a single has been part of our website, and the average time a single has been part of our website
And also return one result that is that same average for all states
Like this example:
[
{ //this one is the average of all states
"singles": 45,
"dates": 18,
"minWaitingTime": 1644677979530,
"avgWaitingTime": 15603
},
{ //these are the averages of each state
"state": "MA",
"singles": 50,
"dates": 23,
"minWaitingTime": 1644677979530,
"avgWaitingTime": 15603
},
{
"state": "NY",
"singles": 39,
"dates": 13,
"minWaitingTime": 1644850558872,
"avgWaitingTime": 6033
}
]
I've been working on the query for each state individually but i dont know how to get an average of all states
so far what i have is this:
GET /single,encounter/_search
{
"size": 0,
"aggs": {
"bystate": {
"terms": {
"field": "state",
"size": 59
},
"aggs": {
"group-by-index": {
"terms": {
"field": "_index"
}
},
"min_date": {
"min": {
"field": "signedUpAt"
}
},
"avg_date": {
"avg": {
"field": "signedUpAt"
}
}
}
}
}
}
I don't know if there is a better way to do this, likewise I don't know how to calculate the average (singles, encounters, min_date and average_date average) for all states using this result
Every result of the previous query looks like this:
{
"key" : "MA",
"doc_count" : 164,
"avg_date" : {
"value" : 1.6457900076508965E12,
"value_as_string" : "2022-02-25T11:53:27.650"
},
"min_date" : {
"value" : 1.64467797953E12,
"value_as_string" : "2022-02-12T14:59:39.530"
},
"group-by-index" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "single",
"doc_count" : 135
},
{
"key" : "encounter",
"doc_count" : 29
}
]
}
},
I would really appreciate help on this one
Addition: index mapping.
Encounter:
{
"encounter" : {
"aliases" : { },
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"avgAge" : {
"type" : "integer",
"index" : false,
"doc_values" : false
},
"application" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"createdAt" : {
"type" : "date",
"format" : "date_hour_minute_second_millis"
},
"encounterId" : {
"type" : "keyword"
},
"locationType" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"singleOneId" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"singleTwoId" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"serviceLine" : {
"type" : "keyword"
},
"state" : {
"type" : "keyword"
},
"rating" : {
"type" : "keyword"
}
}
},
"settings" : {
"index" : {
"refresh_interval" : "1s",
"number_of_shards" : "1",
"provided_name" : "encounter",
"creation_date" : "1643704661932",
"number_of_replicas" : "1",
"uuid" : "MliXQL_bRBKDN7_d8G_BYw",
"version" : {
"created" : "7100299"
}
}
}
}
}
And Single:
{
"single" : {
"aliases" : { },
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"id" : {
"type" : "keyword"
},
"singleId" : {
"type" : "keyword"
},
"state" : {
"type" : "keyword"
},
"preferedGender" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"refresh_interval" : "1s",
"number_of_shards" : "1",
"provided_name" : "single",
"creation_date" : "1643704662136",
"number_of_replicas" : "1",
"uuid" : "Js_tqZfRRx-IxbjVRRN4wQ",
"version" : {
"created" : "7100299"
}
}
}
}
}
You can use avg bucket aggregation, where you can provide bucket_path and based on value it will calculate avg of entire aggregation.
Below is sample query:
{
"size": 0,
"aggs": {
"bystate": {
"terms": {
"field": "state",
"size": 59
},
"aggs": {
"group-by-index": {
"terms": {
"field": "_index"
}
},
"min_date": {
"min": {
"field": "signedUpAt"
}
},
"avg_date": {
"avg": {
"field": "signedUpAt"
}
}
}
},
"avg_all_state": {
"avg_bucket": {
"buckets_path": "bystate>avg_date"
}
}
}
}

Elasticsearch - Missing Field Value For Nested Field - Function Score Query | v7.10.2

I have already posted this on the ES group but I got no response and so posted it on SO. Link https://discuss.elastic.co/t/missing-field-value-for-nested-field-function-score-query-v7-10-2/291365
I have been trying for a long time now but the nested field value is always saying missing field value while calculating the score.
Mapping:
{
"doctor_idx" : {
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"service" : {
"type" : "nested",
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"serviceTypeEarliestAvailability" : {
"type" : "nested",
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"earliestAvailableDateTimeByType" : {
"type" : "date",
"format" : "date_hour_minute_second"
},
"serviceType" : {
"type" : "text"
},
"servicesMedium" : {
"type" : "keyword"
}
}
}
}
}
}
}
}
}
For simplicity, I have kept just one record. A glimpse of the record
"serviceTypeEarliestAvailability" : [
{
"serviceType" : "Service Type",
"earliestAvailableDateTimeByType" : "2021-12-09T19:39:16",
"servicesMedium" : [
"MED1",
"MED2",
"MED3",
"MED4"
]
}
],
The following query gives: "A document doesn't have a value for a field! Use doc[].size()==0 to check if a document is missing a field!"
I tried using field_value_factor instead of script_score but it's the same problem and it complains about the missing field value.
Query 1: with script_score
GET /doctor_idx/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type" // no complaints about this
}
},
"weight": 10
},
{
"script_score": {
"script": {
"source": "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())"
}
}
}
]
}
}
}
}
}
}
}
Query 2 : With field_value_factor
GET /doctor_idx/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type"
}
},
"weight": 10
},
{
"field_value_factor": {
"field": "service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType"
}
}
]
}
}
}
}
}
}
}
Error From ES for Query 1:
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.get(ScriptDocValues.java:160)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.getValue(ScriptDocValues.java:154)",
"(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
" ^---- HERE"
],
"script" : "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
"lang" : "painless",
"position" : {
"offset" : 79,
"start" : 0,
"end" : 98
}
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "doctor_idx",
"node" : "mvh5k24dQPqM-d7JVeNomQ",
"reason" : {
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.get(ScriptDocValues.java:160)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.getValue(ScriptDocValues.java:154)",
"(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
" ^---- HERE"
],
"script" : "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
"lang" : "painless",
"position" : {
"offset" : 79,
"start" : 0,
"end" : 98
},
"caused_by" : {
"type" : "illegal_state_exception",
"reason" : "A document doesn't have a value for a field! Use doc[<field>].size()==0 to check if a document is missing a field!"
}
}
}
]
},
"status" : 400
}
Error From ES for Query 2:
{
"error" : {
"root_cause" : [
{
"type" : "exception",
"reason" : "Missing value for field [service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType]"
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "doctor_idx",
"node" : "mvh5k24dQPqM-d7JVeNomQ",
"reason" : {
"type" : "exception",
"reason" : "Missing value for field [service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType]"
}
}
]
},
"status" : 500
}
The only record I have in the ES
{
"took" : 32,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "doctor_idx",
"_type" : "_doc",
"_id" : "xM20oH0Bmp1zsT0w8pQe",
"_score" : 1.0,
"_source" : {
"_class" : "com.insig.clinic_apps.services_server.booking.dao.DoctorSchema",
"id" : "xM20oH0Bmp1zsT0w8pQe",
"metadata" : {
"doctorId" : "xhnz2lGvXxelWyVekz82c2v6Srb2",
"fullName" : "Dave Insig Email Test",
"credentials" : [
"MD"
],
"languagesSpoken" : [
"EN",
"NOT_DEFINED"
],
"specialitiesServed" : [ ],
"city" : "VAUGHAN",
"provincesPermitted" : [ ],
"province" : "ON",
"country" : "CANADA"
},
"service" : {
"servicesMedium" : [
"IN_CLINIC",
"VIDEO",
"MESSAGING",
"PHONE"
],
"servicesTypeDuration" : [
{
"serviceType" : "Acne Symptoms",
"duration" : 5,
"servicesMedium" : [
"IN_CLINIC",
"MESSAGING",
"PHONE",
"VIDEO"
]
}
],
"serviceTypeEarliestAvailability" : [
{
"serviceType" : "Acne Symptoms",
"earliestAvailableDateTimeByType" : "2021-12-09T19:39:16",
"servicesMedium" : [
"IN_CLINIC",
"MESSAGING",
"PHONE",
"VIDEO"
]
}
],
"bufferTimeForNextAvailability" : 0
},
"earliestAvailableDateTime" : "2021-12-09T19:39:16",
"patientRating" : 4.384481,
"onTimeRating" : 3.171053
}
}
]
}
}
/!\ Partial solution /!\
Query 1
When I was trying to execute your query I got this error:
{
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "dynamic method [org.elasticsearch.script.JodaCompatibleZonedDateTime, toEpochMilli/0] not found"
}
}
So I did a slight change to your query .toInstant().toEpochMilli()
GET /so_custom_score/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type"
}
},
"weight": 10
},
{
"script_score": {
"script": {
"source": """
def availability = doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value;
return availability.toInstant().toEpochMilli();
"""
}
}
}
]
}
}
}
}
}
}
}
Well, wasted a lot of time here and figured that there was no problem with the queries. Elasticsearch v7.10.2 has an issue when I try to get the explanation of the query.
It works perfectly fine without the explain parameter.
See the at org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction$1.explainScore(FieldValueFactorFunction.java:103) ~[elasticsearch-7.10.2.jar:7.10.2]
PS: One of the comments in the OG question mentioned that the latest Elasticsearch v7.15 works fine even with explain parameter.

How to built AND condition between should and must elastic search bool query

Here is the sample USER document
{
"id" : "1234567",
"userId" : "testuser01",
"firstName" : "firstname",
"lastName" : "lastname",
"orgId" : "567890",
"phoneNumber" : "1234567890"
}
I want to build a search query where in I want to pull all those users which belong to particular orgId AND which matches the search text entered by user in any of the fields (userId, firstname, etc.)
ex. if search is made using text "first", I want to pull all those records which belong to particular orgId AND fields containing first in it.
Sample query I am trying is
"query" : {
"bool" : {
"must" : [
{
"term" : {
"orgId.keyword" : {
"value" : "567890",
"boost" : 1.0
}
}
}
],
"should" : [
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"lastName^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"userId^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"orgId^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"firstName^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"phoneNumber^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"id^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
"sort" : [
{
"userId.keyword" : {
"order" : "asc"
}
}
]
}
Issue I am facing is, I want to have AND condition between MUST and SHOULD.
You don't need to specify the query for each field in query_string query. Rather you can specify the list of fields as below:
{
"query": {
"bool": {
"must": [
{
"term": {
"orgId.keyword": {
"value": "567890",
"boost": 1
}
}
},
{
"simple_query_string": {
"query": "first*",
"fields": [
"lastName^1.0",
"userId^1.0",
"orgId^1.0",
"firstName^1.0",
"phoneNumber^1.0",
"id^1.0"
]
}
}
]
}
},
"sort": [
{
"userId.keyword": {
"order": "asc"
}
}
]
}
Also to answer
How to built AND condition between should and must elastic search bool query?
here is a sample query for this:
{
"query": {
"bool": {
"must": [
{
"term": {
"field1": "someval"
}
},
{
"bool": {
"should": [
{
"terms": {
"field2": [
"v1",
"v2"
]
}
},
{
"query_string": {
"query": "this AND that OR thus"
}
}
]
}
}
]
}
}
}

query to find all docs that match with exact terms with all the fields in the query

I have a simple doc structure as follows.
{
"did" : "1",
"uid" : "user1",
"mid" : "pc-linux1",
"path" : "/tmp/path1"
}
I need to query elastic ,that matches all fields exactly
GET index2/_search
{
"query": {
"bool":{
"must": [
{
"term" : { "uid" : "user1"}
},
{
"term" : { "mid" : "pc-linux1"}
},
{
"term" : { "did" : "1"}
},
{
"term" : { "path" : "/tmp/path1"}
}
]
}
}
}
The matching should happen without any kind of elastic 'analysis' on keywords, so that "/tmp/path1" is matched as a full term.
I tried to use a custom mapping: with
"index" : false
which does not work.
PUT /index2?include_type_name=true
{
"mappings" : {
"_doc": {
"properties" : {
"did" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"mid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"uid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
}
}
}
}
}
I am using elastic7.0 and few posts suggesting a custom mapping with
"index" : "not_analysed"
does not get accepted as a valid mapping in elastic 7.0
Any suggestions?
If you want to match exact terms, try this query:
GET index2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"uid": "user1"
}
},
{
"match": {
"mid": "pc-linux1"
}
},
{
"match": {
"did": "1"
}
},
{
"match": {
"path": "/tmp/path1"
}
}
]
}
}
}

How to do ES Moving Avearge Prediction with Logstash?

I am using Elasticsearch 2.3.2, and Logstash 2.3.3. I have found from https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-movavg-aggregation.html which states that moving average can do predictions. I know it is possible to only make query in ES, but I am not sure how should I do that with logstash.
I have a logstash file which reads a csv log file storing CPU usage for every 15 seconds. Should I just include the following into the logstash output json file for the related index as an output mapping?
{
"the_movavg":{
"moving_avg":{
"buckets_path": "the_sum",
"window" : 30,
"model" : "holt_winters",
"settings" : {
"type" : "mult",
"alpha" : 0.5,
"beta" : 0.5,
"gamma" : 0.5,
"period" : 7,
"pad" : true
}
}
}
This is my json file for logstash
{
"template" : "linux_cpu-*",
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true, "omit_norms" : true},
"dynamic_templates" : [ {
"message_field" : {
"match" : "message",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fielddata" : { "format" : "disabled" }
}
}
}, {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fielddata" : { "format" : "disabled" },
"fields" : {
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
}
}
}
} ],
"properties" : {
"#timestamp": { "type": "date" },
"#version": { "type": "string", "index": "not_analyzed" },
"geoip" : {
"dynamic": true,
"properties" : {
"ip": { "type": "ip" },
"location" : { "type" : "geo_point" },
"latitude" : { "type" : "float" },
"longitude" : { "type" : "float" }
}
}
}
}
}
}
And is it possible to have it as a graph as to be shown in Kibana?

Resources