Query to get available dates using start and end date - elasticsearch

I’m trying to create a query which returns available products with no reservation at that date (or date range) or no reservations at all. It’s driving me crazy.
Here is my current mapping with index settings:
{
"development_product_instances" : {
"aliases" : { },
"mappings" : {
"product_instance" : {
"properties" : {
"reservations" : {
"type" : "nested",
"properties" : {
"end_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
},
"start_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
}
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1503327829680",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "9b9BhF-ITta2dlCKRLrnfA",
"version" : {
"created" : "2040499"
}
}
},
"warmers" : { }
}
}
And the query:
{
bool: {
should: [
{
nested: {
path: "reservations",
filter: {
bool: {
must_not: [
{
range:
{
"reservations.start_date":
{
gte: start_date,
lte: end_date
}
}
},
{
range:
{
"reservations.end_date":
{
gte: start_date,
lt: end_date
}
}
}
]
}
}
}
},
{
not: {
nested: {
path: "reservations",
filter: {
match_all: {}
}
}
}
}
]
}
}
When there is more than one reservation it returns all.
I hope someone can see the bug in there. Maybe i'm missing something in the bigger picture.

Your problem is that the must_not is inside the nested query. That means that if it matches for any of the nested reservations, then the parent document matches. So when there are multiple reservations, unless the range you're querying overlaps all the existing reservations, you get a match. You can rewrite it like this (note that this query also matches when reservations is empty):
{
"query": {
"bool": {
"must_not": {
"nested": {
"path": "reservations",
"query": {
"bool": {
"should": [
{
"range": {
"reservations.start_date": {
"gte": start_date,
"lt": end_date
}
}
},
{
"range": {
"reservations.end_date": {
"gte": start_date,
"lt": end_date
}
}
},
{
"bool": {
"must": [
{
"range": {
"reservations.start_date": {
"lt": start_date
}
}
},
{
"range": {
"reservations.end_date": {
"gt": end_date
}
}
}
]
}
}
]
}
}
}
}
}
}
}

Related

elastic - query multiple levels on nested object in inner_hits

i have a huge nested object which has lots of levels
i want to create a query which will return only the leaf / some object in the middle,
and the query is supposed to query multiple levels in the tree.
for example:
my DB is saving the whole company structure.
company -> wards -> employees -> working hours
i want to make a query that will return only the working hours of the employees in ward 2 which started later than 3pm this month
i tried to use inner_hits - but to no use.
as requested, sample document and expected result:
company:[{
properties:{companyId: 112}
ward:[{
properties: {wardId: 223}
employee:{
properties: {employeeId: 334},
workingHours: [
{ date: "1.1.2021", numOfHours: 4},
{ date: "1.2.2021", numOfHours: 7}
]
}]
}]
}]
the query:
I need to return the working hours of date "1.2.21" , of employee 334, of ward 223. and only the working hours, not the whole tree.
expected result:
4 or { date: "1.1.2021", numOfHours: 4} , whatever is simpler
hope its clear now
You need to add inner_hits to all nested queries
You can either parse entire result to get matched working hours(from inner hits) o can use response filtering to remove additional data
Mapping
PUT index123
{
"mappings": {
"properties": {
"company": {
"type": "nested",
"properties": {
"ward": {
"type": "nested",
"properties": {
"employee": {
"type": "nested",
"properties": {
"workingHours": {
"type": "nested",
"properties": {
"date": {
"type": "date"
}
}
}
}
}
}
}
}
}
}
}
}
Data
"_index" : "index123",
"_type" : "_doc",
"_id" : "9gGYI3oBt-MOenya6BcN",
"_score" : 1.0,
"_source" : {
"company" : [
{
"companyId" : 112,
"ward" : [
{
"wardId" : 223,
"employee" : {
"employeeId" : 334,
"workingHours" : [
{
"date" : "2021-01-01",
"numOfHours" : 4
},
{
"date" : "2021-01-02",
"numOfHours" : 7
}
]
}
}
]
}
]
}
}
Query
GET index123/_search?filter_path=hits.hits.inner_hits.ward.hits.hits.inner_hits.employee.hits.hits.inner_hits.workingHours.hits.hits._source
{
"query": {
"nested": {
"inner_hits": {
"name":"ward"
},
"path": "company.ward",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.wardId": {
"value": 223
}
}
},
{
"nested": {
"inner_hits": {
"name":"employee"
},
"path": "company.ward.employee",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.employee.employeeId": {
"value":334
}
}
},
{
"nested": {
"inner_hits": {
"name":"workingHours"
},
"path": "company.ward.employee.workingHours",
"query": {
"range": {
"company.ward.employee.workingHours.date": {
"gte": "2021-01-01",
"lte": "2021-01-01"
}
}
}
}
}
]
}
}
}
}
]
}
}
}
}
}
Result
{
"hits" : {
"hits" : [
{
"inner_hits" : {
"ward" : {
"hits" : {
"hits" : [
{
"inner_hits" : {
"employee" : {
"hits" : {
"hits" : [
{
"inner_hits" : {
"workingHours" : {
"hits" : {
"hits" : [
{
"_source" : {
"date" : "2021-01-01",
"numOfHours" : 4
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
Update:
Query with company ID
GET index123/_search?filter_path=hits.hits.inner_hits.company.hits.hits.inner_hits.ward.hits.hits.inner_hits.employee.hits.hits.inner_hits.workingHours.hits.hits._source
{
"query": {
"nested": {
"path": "company",
"inner_hits": {
"name": "company"
},
"query": {
"bool": {
"must": [
{
"term": {
"company.companyId": {
"value": 112
}
}
},
{
"nested": {
"inner_hits": {
"name": "ward"
},
"path": "company.ward",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.wardId": {
"value": 223
}
}
},
{
"nested": {
"inner_hits": {
"name": "employee"
},
"path": "company.ward.employee",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.employee.employeeId": {
"value": 334
}
}
},
{
"nested": {
"inner_hits": {
"name": "workingHours"
},
"path": "company.ward.employee.workingHours",
"query": {
"range": {
"company.ward.employee.workingHours.date": {
"gte": "2021-01-01",
"lte": "2021-01-01"
}
}
}
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
}
}

Elasticsearch - Range search by price and date does not work

Range search by price and by date does not work for me. And I don’t understand where to look for an error in mapping or in request.
I have mapping for my fields in document:
"mappings": {
"properties": {
"sales" : {
"type" : "nested",
"properties" : {
"from" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"price" : {
"type" : "double"
},
"to" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
This is my request by date:
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sales",
"query": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "2019-09-01 12:37:55",
"lte": "2019-09-02 13:38:04"
}
}
}
]
}
}
}
}
]
}
}
You need to specify the field you want to search against. In your case, you want to filter by range from and to under sales nested object.
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sales",
"query": {
"bool": {
"filter": [
{
"range": {
"sales.from": { #field for from date
"gte": "2019-09-01 12:37:55"
}
}
},
{
"range": {
"sales.to": { #field for to date
"lte": "2019-09-02 13:38:04"
}
}
}
]
}
}
}
}
]
}
}
}

Elasticsearch querying number of dates in array matching query

I have documents in the following form
PUT test_index/_doc/1
{
"dates" : [
"2018-07-15T14:12:12",
"2018-09-15T14:12:12",
"2018-11-15T14:12:12",
"2019-01-15T14:12:12",
"2019-03-15T14:12:12",
"2019-04-15T14:12:12",
"2019-05-15T14:12:12"],
"message" : "hello world"
}
How do I query for documents such that there are n number of dates within the dates array falling in between two specified dates?
For example: Find all documents with 3 dates in the dates array falling in between "2018-05-15T14:12:12" and "2018-12-15T14:12:12" -- this should return the above document as "2018-07-15T14:12:12", "2018-09-15T14:12:12" and "2018-11-15T14:12:12" fall between "2018-05-15T14:12:12" and "2018-12-15T14:12:12".
I recently faced the same problem. However came up with two solutions.
1) If you do not want to change your current mapping, you could query for the documents using query_string. Also note you will have to create the query object according to the range that you have. ("\"2019-04-08\" OR \"2019-04-09\" OR \"2019-04-10\" ")
{
"query": {
"query_string": {
"default_field": "dates",
"query": "\"2019-04-08\" OR \"2019-04-09\" OR \"2019-04-10\" "
}
}
}
However,this type of a query only makes sense if the range is short.
2) So the second way is the nested method. But you will have to change your current mapping in such a way.
{
"properties": {
"dates": {
"type": "nested",
"properties": {
"key": {
"type": "date",
"format": "YYYY-MM-dd"
}
}
}
}
}
So your query will look something like this :-
{
"query": {
"nested": {
"path": "dates",
"query": {
"bool": {
"must": [
{
"range": {
"dates.key": {
"gte": "2018-04-01",
"lte": "2018-12-31"
}
}
}
]
}
}
}
}
}
You can create dates as a nested document and use bucket selector aggregation.
{
"empId":1,
"dates":[
{
"Days":"2019-01-01"
},
{
"Days":"2019-01-02"
}
]
}
Mapping:
"mappings" : {
"properties" : {
"empId" : {
"type" : "keyword"
},
"dates" : {
"type" : "nested",
"properties" : {
"Days" : {
"type" : "date"
}
}
}
}
}
GET profile/_search
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "dates",
"query": {
"range": {
"dates.Days": {
"format": "yyyy-MM-dd",
"gte": "2019-05-01",
"lte": "2019-05-30"
}
}
}
}
}
}
},
"aggs": {
"terms_parent_id": {
"terms": {
"field": "empId"
},
"aggs": {
"availabilities": {
"nested": {
"path": "dates"
},
"aggs": {
"avail": {
"range": {
"field": "dates.Days",
"ranges": [
{
"from": "2019-05-01",
"to": "2019-05-30"
}
]
},
"aggs": {
"count_Total": {
"value_count": {
"field": "dates.Days"
}
}
}
},
"max_hourly_inner": {
"max_bucket": {
"buckets_path": "avail>count_Total"
}
}
}
},
"bucket_selector_page_id_term_count": {
"bucket_selector": {
"buckets_path": {
"children_count": "availabilities>max_hourly_inner"
},
"script": "params.children_count>=19;" ---> give the number of days that should match
}
},
"hits": {
"top_hits": {
"size": 10
}
}
}
}
}
}
I found my own answer to this, although I'm not sure how efficient it is compared to the other answers:
GET test_index/_search
{
"query":{
"bool" : {
"filter" : {
"script" : {
"script" : {"source":"""
int count = 0;
for (int i=0; i<doc['dates'].length; ++i) {
if (params.first_date < doc['dates'][i].toInstant().toEpochMilli() && doc['dates'][i].toInstant().toEpochMilli() < params.second_date) {
count += 1;
}
}
if (count >= 2) {
return true
} else {
return false
}
""",
"lang":"painless",
"params": {
"first_date": 1554818400000,
"second_date": 1583020800000
}
}
}
}
}
}
}
where the parameters are the two dates in epoch time. I've chosen 2 matches here, but obviously you can generalise to any number.

Get available apartments query

Overview
I have apartments which have reservations. My index has the reservations as nested fields with date fields for start_date and end_date.
I'm using the chewy ruby gem - but this doesn't matter at this time i think. Just need to get my query right.
Goal
I want to fetch all available apartments which have no reservation at the given date or no reservations at all.
Current query
Unfortunately returns all apartments:
:query => {
:bool => {
:must_not => [
{
:range => {:"reservations.start_date" => {:gte => "2017-02-10"}}
},
{
:range => {:"reservations.end_date" => {:lte => "2017-02-12"}}
}
]
}
}
Index Settings
{
"apartments" : {
"aliases" : { },
"mappings" : {
"apartment" : {
"properties" : {
"city" : {
"type" : "string"
},
"coordinates" : {
"type" : "geo_point"
},
"email" : {
"type" : "string"
},
"reservations" : {
"type" : "nested",
"properties" : {
"end_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
},
"start_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
}
}
},
"street" : {
"type" : "string"
},
"zip" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1487289727161",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "-rM79OUvQ3qkkLJmQCsoCg",
"version" : {
"created" : "2040499"
}
}
},
"warmers" : { }
}
}
We have to list free apartments and those apartment that will be available in the desired period (start_date, end_date variables)
So it should be a or query: free_aparments or available_aparments
The free apartments (those that haven't any value in reservations field) should be easy to query with a missing filter, but this is a nested field and we have to deal with.
If we perform the query with a missing filter all docs will be returned. It's weird but it happens. Here there's the explained solution: https://gist.github.com/Erni/7484095 and here is the issue: https://github.com/elastic/elasticsearch/issues/3495 The gist snnipet works with all elasticsearch versions.
The other part of the or query are available apartments. I've solved this part performing a not query. Return me those apartments that NOT have a reservation, thought a list of range that match with those aparments that do have a reservation and then negate the result using must_not filter
elasticsearch_query = {
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{
"nested": {
"filter": {
"bool": {
"must_not" : [
{
"range": {
"start_date": {
"gte" : start_date,
"lt" :end_date
}
}
},
{
"range": {
"end_date": {
"gte" : end_date,
#"lte" :end_date
}
}
}
]
}
},
"path": "reservations"
}
},
{
#{ "missing" : { "field" : "reservations"} }
"not": {
"nested": {
"path": "reservations",
"filter": {
"match_all": {}
}
}
}
}
],
}
}
},
},
"sort" : {"id":"desc"}
}
You can have a look to my solution in this notebook
I've created and example, populating a sample index and searching for desired apartments with this query
Comments answers:
Prefix: Since nested filter is performed setting path will be queried, prefix is no needed at all (at least in my tested version). And yes, you can add a field names start_date at document level or at another nested field
Apartment matches: Yes, it matches with 91 sample apartments, but since I did a search with default size parameter, only 10 are returned (I didn't specified its value, its default value). If you need to get ALL of them, use a scroll search
(notebook has been modified to clarify this points)
First of all, I think you must use the nested query.
I am not familiar with chewy-gem but the query would look something like:
:query => {
:nested: => {
:path: => "reservations",
:query => {
:bool => {
:must_not => [
{
:range => {:"reservations.start_date" => {:gte => "2017-02-10"}}
},
{
:range => {:"reservations.end_date" => {:lte => "2017-02-12"}}
}
]
}
}
}
}
But it might also not work as if there is a reservation in 2018, the fisrt bool query will be true (as the start date will be > 2017-02-10), therefore the appartment will not be returned, if I'm correct.
I would do something like:
:query => {
:nested: => {
:path: => "reservations",
:query => {
:bool => {
:must_not => [
{
:range => {:"reservations.start_date" => {:gte => "2017-02-10", :lte => "2017-02-12"}}
},
{
:range => {:"reservations.end_date" => {:gte => "2017-02-10", :lte => "2017-02-12"}}
}
]
}
}
}
}
which means no start date beetween the range you want, no end date beetween the range you want.
This is the query I came up with which is supposed to take into account all conditions, namely:
either there are no reservations (1st top-level bool/should)
or there are at least one reservation and the reservation start and end dates do not overlap with the requested dates.
Here, we're asking for free apartments between 2017-02-10 and 2017-02-12
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"nested": {
"path": "reservations",
"query": {
"bool": {
"must_not": {
"exists": {
"field": "reservations.start_date"
}
}
}
}
}
},
{
"bool": {
"must": [
{
"nested": {
"path": "reservations",
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"range": {
"reservations.start_date": {
"gt": "2017-02-10"
}
}
},
{
"range": {
"reservations.end_date": {
"lt": "2017-02-10"
}
}
}
]
}
}
}
},
{
"nested": {
"path": "reservations",
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"range": {
"reservations.start_date": {
"gt": "2017-02-12"
}
}
},
{
"range": {
"reservations.end_date": {
"lt": "2017-02-12"
}
}
}
]
}
}
}
}
]
}
}
]
}
}

Groupby functionality on multiple fields in elastic search

I have a requirement where I need to groupby status_value as per regions and regions as per given date. For the same I have written a query and it is not exactly working in the ES. It would be a great help, if someone look into this and provide me with the solution.
Note: I would like to get the result for the last day (i.e. previous day).
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"UsagePoint_Asset_lifecycle_installationDate": "2014-07-13T16:55:00.0-07:00"
}
}
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "UsagePoint_ServiceLocation_region"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "UsagePoint_status_value"
}
}
}
}
}
}
my sql query may be like below:
select count(status_value)
from products
where date = "yesterday"
group by region , date
Please check below query is working, but I would like to get the values for a specific day or dates.
{
"agg1": {
"terms": {
"field":"UsagePoint_Asset_lifecycle_installationDate"
},
"aggs" : {
"product" : {
"terms" : {
"field" : "UsagePoint_ServiceLocation_region"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "UsagePoint_status_value"
}
}
}
}
}
}
}
If you need the group by info for yesterday , the following is a solution.
For any other custom dates , change the value in gte ( Greater than or equals to ) and lt ( Less than )
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"range": {
"UsagePoint_Asset_lifecycle_installationDate": {
"gte": "now-1d",
"lt": "now"
}
}
}
}
},
"aggs": {
"product": {
"terms": {
"field": "UsagePoint_ServiceLocation_region"
},
"aggs": {
"material": {
"terms": {
"field": "UsagePoint_status_value"
}
}
}
}
}
}

Resources