Elasticsearch inverse range overlap query - elasticsearch

I have the following document:
{
"blocked_availabilities": [
{
"start_time": "2016-05-26T19:30:00Z",
"end_time": "2016-05-26T20:30:00Z"
},
{
"start_time": "2017-05-26T16:00:00Z",
"end_time": "2017-05-26T17:00:00Z",
}
]
}
blocked_availabilities is a nested type in the mapping.
What I'm trying to do is match documents that do not overlap with a specified start and end time. I have the following query to do this (which doesn't work, of course):
{
"query":{
"bool":{
"filter":{
"nested":{
"path":"blocked_availabilities",
"query":{
"bool":{
"must_not":{
"bool":{
"must":[
{
"range":{
"blocked_availabilities.start_time":{
"from":null,
"include_lower":true,
"include_upper":true,
"to":"2016-05-26T20:00:00Z"
}
}
},
{
"range":{
"blocked_availabilities.end_time":{
"from":"2016-05-26T19:00:00Z",
"include_lower":true,
"include_upper":true,
"to":null
}
}
}
]
}
}
}
}
}
}
}
}
}
The problem seems to be that one of the nested documents doesn't match so the whole document is returned.
Is there a good way to do what I want? I expect this document to not be returned by my query since it overlaps with the first nested document.

One way to achieve this is to check it there is any nested object withing the overlapping period and do a must-not of the nested query.
This would end up matching on only documents which do not contain any blocked_availabilities overlapping in the desired time period.
Example:
Setup Index
put test
put test/test/_mapping
{
"properties": {
"blocked_availabilities": {
"type": "nested",
"properties": {
"start_time": {
"type": "date"
},
"end_time": {
"type": "date"
}
}
}
}
}
}
put test/test/1
{
"blocked_availabilities": [
{
"start_time": "2016-05-26T19:30:00Z",
"end_time": "2016-05-26T20:30:00Z"
},
{
"start_time": "2017-05-26T16:00:00Z",
"end_time": "2017-05-26T17:00:00Z"
}
]
}
Query:
put test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "blocked_availabilities",
"query": {
"bool": {
"should": [
{
"range": {
"blocked_availabilities.end_time": {
"lte": "2016-05-26T20:00:00Z",
"gte": "2016-05-26T19:00:00Z"
}
}
},
{
"range": {
"blocked_availabilities.start_time": {
"lte": "2016-05-26T20:00:00Z",
"gte": "2016-05-26T19:00:00Z"
}
}
}
]
}
}
}
}
]
}
}
}

You're trying to make not ((start > s) and (end < e)). Why not simply make (start > e) or (end < s)? Looks it should work if data is consistent.

Related

elasticsearch need to add a must to a bool should query

I have the following query that works as expected:
GET <index_name>/_search
{
"sort": [
{
"irFileCreateTime": {
"order": "desc"
}
}
],
"query": {
"bool": {
"should": [
{
"match": {
"fileId": 46704
}
},
{
"match": {
"fileId": 46706
}
},
{
"match": {
"fileId": 46719
}
}
]
}
}
}
The problem is that I need to further filter the data, but the field I need to filter on is a text field. I have tried many different ways of putting a must match into my query but everything is either malformed or filters out all hits when I know it should only filter out half. How can I add a must match "irStatus":"COMPLETE" to this query? Thanks in advance.
What you're after is a term query on, preferably, the keyword of irStatus. That is to say:
GET index/_search
{
"sort": [
{
"irFileCreateTime": {
"order": "desc"
}
}
],
"query": {
"bool": {
"must": [
{
"term": {
"irStatus.keyword": {
"value": "COMPLETE"
}
}
}
],
"should": [
{
"match": {
"fileId": 46704
}
},
{
"match": {
"fileId": 46706
}
},
{
"match": {
"fileId": 46719
}
}
]
}
}
}
Assuming your mapping looks something like this:
{
"mappings": {
"properties": {
"irFileCreateTime": {
"type": "date"
},
"fileId": {
"type": "integer"
},
"irStatus": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
The reason it's apparently failing on your end is that "COMPLETE" has been lowercased due to standard analyzer.
Alternatively, you could do:
{
"must":[
{
"query_string":{
"query":"irStatus:COMPLETE AND (fileId:(46704 OR 46706 OR 46719))"
}
}
]
}

Filter nested sorting in elasticsearch

I have a document with a nested structure the nested object has an assignment_name and a due_date:
The mapping
{
"goal": {
"mappings": {
"doc": {
"properties": {
"title": {
"type": "keyword"
},
// lot's of other fields here ...
"steps": {
"type": "nested",
"properties": {
"assignment_name": {
"type": "keyword"
},
"due_date": {
"type": "date"
}
// lots of other fields here
}
}
}
}
}
}
}
I want to:
Filter all document that have a specific assignment_name (e.g.user_a)
Sort the result by the next due_date, not taking other assignements into account.
This query gives me random result (no sortings):
{
"query":{
"bool":{
"filter":[
{
"nested":{
"path":"steps",
"query":{
"term":{
"steps.assignment_name":"user_a"
}
}
}
}
]
}
},
"sort":[
{
"steps.due_date":{
"order":"asc",
"nested":{
"path":"steps",
"filter":{
"term":{
"steps.assignment_name":"user_a"
}
}
}
}
}
],
"from":0,
"size":25
}
Firstly you need to ensure that datatype for steps field is nested. Then you have to use nested sorting to sort documents based on a nested document field.
The query would be:
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "steps",
"query": {
"term": {
"steps.assignment_name": "user_a"
}
}
}
}
]
}
},
"sort": [
{
"steps.due_date": {
"order": "asc",
"nested": {
"path": "steps",
"filter": {
"term": {
"steps.assignment_name": "user_a"
}
}
}
}
}
]
}
The catch above is to use the same filter in sort as used in the main query to filter the documents. This ensures that the correct nested document's field value is considered to sort the documents.

How to join ElasticSearch query with multi_match, boosting, wildcard and filter?

I'm trying to acheve this goals:
Filter out results by bool query, like "status=1"
Filter out results by bool range query, like "discance: gte 10 AND lte 60"
Filter out results by match at least one int value from int array
Search words in many fields with calculating document score. Some fields needs wildcard, some boosting, like importantfield^2, somefield*, someotherfield^0.75
All above points join by AND operator. All terms in one point join by OR operator.
Now I wrote something like this, but wildcards not working. Searching "abc" don't finds "abcd" in "name" field.
How to solve this?
{
"filtered": {
"query": {
"multi_match": {
"query": "John Doe",
"fields": [
"*name*^1.75",
"someObject.name",
"tagsArray",
"*description*",
"ownerName"
]
}
},
"filter": {
"bool": {
"must": [
{
"term": {
"status": 2
}
},
{
"bool": {
"should": [
{
"term": {
"someIntsArray": 1
}
},
{
"term": {
"someIntsArray": 5
}
}
]
}
},
{
"range": {
"distanceA": {
"lte": 100
}
}
},
{
"range": {
"distanceB": {
"gte": 50,
"lte": 100
}
}
}
]
}
}
}
}
Mappings:
{
"documentId": {
"type": "integer"
},
"ownerName": {
"type": "string",
"index": "not_analyzed"
},
"description": {
"type": "string"
},
"status": {
"type": "byte"
},
"distanceA": {
"type": "short"
},
"createdAt": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"distanceB": {
"type": "short"
},
"someObject": {
"properties": {
"someObject_id": {
"type": "integer"
},
"name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"someIntsArray": {
"type": "integer"
},
"tags": {
"type": "string",
"index": "not_analyzed"
}
}
You can make use of Query String if you would want to apply wildcard for multiple fields and at the same time apply various boosting values for individual fields:
Below is how your query would be:
POST <your_index_name>/_search
{
"query":{
"bool":{
"must":[
{
"query_string":{
"query":"abc*",
"fields":[
"*name*^1.75",
"someObject.name",
"tagsArray",
"*description*",
"ownerName"
]
}
}
],
"filter":{
"bool":{
"must":[
{
"term":{
"status":"2"
}
},
{
"bool":{
"minimum_should_match":1,
"should":[
{
"term":{
"someIntsArray":1
}
},
{
"term":{
"someIntsArray":5
}
}
]
}
},
{
"range":{
"distanceA":{
"lte":100
}
}
},
{
"range":{
"distanceB":{
"gte": 50,
"lte":100
}
}
}
]
}
}
}
}
}
Note that for the field someIntsArray, I've made use of "minimum_should_match":1 so that you won't end up with documents that'd have neither of those values.
Updated Answer:
Going by the updated comment, you can have the fields with wildcard search used by query_string and you can make use of simple match query with boosting as shown in below. Include both these queries (can even add more match queries depending on your requirement) in a combine should clause. That way you can control where wildcard query can be used and where not.
{
"query":{
"bool":{
"should":[
{
"query_string":{
"query":"joh*",
"fields":[
"name^2"
]
}
},
{
"match":{
"description":{
"query":"john",
"boost":15
}
}
}
],
"filter":{
"bool":{
"must":[
{
"term":{
"status":"2"
}
},
{
"bool":{
"minimum_should_match":1,
"should":[
{
"term":{
"someIntsArray":1
}
},
{
"term":{
"someIntsArray":5
}
}
]
}
},
{
"range":{
"distanceA":{
"lte":100
}
}
},
{
"range":{
"distanceB":{
"lte":100
}
}
}
]
}
}
}
}
}
Let me know if this helps

Elastic Search error in complex bool query

I am trying to make an elasticsearch query where, I need to search for a time frame in the elasticsearch table. I have records which has startime and endtime. And from UI I am giving a starttime and endtime which is time windows for which I need to search files for. Assuming the time window of starttime and endtime in the records is smaller than the time window entered by user, I have created the following query:
{
"_source":["filename","starttime","endtime"],
"sort":[{
"starttime":{"order":"asc"}
}],
"query":{
"bool":{
"should":{
"bool":{
"must":[
"range":{
"starttime":{
"lte":1489602610000
}
},
"range":{
"endtime":{
"gte":1489602610000,
}
}
]
}
},
"should":{
"bool":{
"must":[
"range":{
"starttime":{
"gte":1489602610000
}
},
"range":{
"endtime":{
"lte":1489689000000
}
}
]
}
},
"should":{
"bool":{
"must":[
"range":{
"starttime":{
"lte":1489689000000
}
},
"range":{
"endtime":{
"gte":1489689000000
}
}
]
}
}
}
}
}
I am getting error
"Unexpected character (':' (code 58)): was expecting comma to separate
Array entries\n at [Source:
org.elasticsearch.transport.netty4.ByteBufStreamInput#29263f09; line:
11, column: 33]"
There are several issues with your query:
one dangling comma
more than one bool/should clauses
range queries not properly wrapped inside curly braces
You can find the correct query below:
{
"_source": [
"filename",
"starttime",
"endtime"
],
"sort": [
{
"starttime": {
"order": "asc"
}
}
],
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"starttime": {
"lte": 1489602610000
}
}
},
{
"range": {
"endtime": {
"gte": 1489602610000
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"starttime": {
"gte": 1489602610000
}
}
},
{
"range": {
"endtime": {
"lte": 1489689000000
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"starttime": {
"lte": 1489689000000
}
}
},
{
"range": {
"endtime": {
"gte": 1489689000000
}
}
}
]
}
}
]
}
}
}

Elastic search filtered query, query part being ignored?

I'm building up the following search in code, the idea being that it filters down the set of matches then queries this so I can add score based on certain fields. For some reason the filter part works but whatever I put in the query (i.e. in the below I have no index sdfsdfsdf) it still returns anything matching the filter.
Is the syntax wrong?
{
"query":{
"filtered":{
"query":{
"bool":{
"must":{
"match":{
"sdfsdfsdf":{
"query":"4",
"boost":2.0
}
}
}
},
"filter":{
"bool":{
"must":[
{
"terms":{
"_id":[
"55f93ead5df34f1900abc20b",
"55f8ab0226ec4bb216d7c938",
"55dc4e949dcf833308c63d6b"
]
}
},
{
"range":{
"published_date":{
"lte":"now"
}
}
}
],
"must_not":{
"terms":{
"_id":[
"55f0a799acccc28204a5058c"
]
}
}
}
}
}
}
}
}
Your filter is not at the right level. It should not be inside query but at the same level as query like this:
{
"query": {
"filtered": {
"query": { <--- query and filter at the same level
"bool": {
"must": {
"match": {
"sdfsdfsdf": {
"query": "4",
"boost": 2
}
}
}
}
},
"filter": { <--- query and filter at the same level
"bool": {
"must": [
{
"terms": {
"_id": [
"55f93ead5df34f1900abc20b",
"55f8ab0226ec4bb216d7c938",
"55dc4e949dcf833308c63d6b"
]
}
},
{
"range": {
"published_date": {
"lte": "now"
}
}
}
],
"must_not": {
"terms": {
"_id": [
"55f0a799acccc28204a5058c"
]
}
}
}
}
}
}
}
You need to replace sdfsdfsdf with your existing field name in your type, e.g. title, otherwise I think it will fallback to match_all query.
"match":{
"title":{
"query": "some text here",
"boost":2.0
}
}

Resources