How to access elasticsearch nested objects - elasticsearch

I tried to get my head wrapped around nested queries but I can't get this to work.
I have 2 items in ES that look like this
{
"_index": "catalog",
"_type": "products",
"_source": {
"product": {
"ean": "abc",
"features": {
"Product Type": "DVD player",
},
"color": "Black",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "zzz",
"url": "http://www.amazon.com/dp/zzz"
}
],
"category": "Portable DVD Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_source": {
"product": {
"ean": "def",
"features": {
"Product Type": "MP3 player",
},
"color": "Black",
"manufacturer": "LG",
"sitedetails": [
{
"name": "amazon.com",
"sku": "aaa",
"url": "http://www.amazon.com/dp/aaa"
}
],
"category": "MP3 Players"
}
}
}
2 questions:
What is the curl to get sku = zzz?
What is the curl to get both items on a search for "players"?
tnx!

Heyy bro, lets do the magic.
First , you need an mapping including your nested objects, like this
curl -XPUT "http://192.168.99.100:9200/catalog" -d'
{
"mappings": {
"products": {
"properties": {
"product": {
"type": "nested",
"properties": {
"features": {
"type":"nested"
},
"sitedetails": {
"type": "nested"
}
}
}
}
}
}
}'
After that, lets insert your data (change your Product Type to product_type)
curl -XPOST "http://192.168.99.100:9200/catalog/products" -d'
{
"product": {
"ean": "abc",
"features": {
"product_type": "DVD player"
},
"color": "Black",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "zzz",
"url": "http://www.amazon.com/dp/zzz"
}
],
"category": "Portable DVD Players"
}
}'
Now, lets do the query
curl -XPOST "http://192.168.99.100:9200/catalog/products/_search" -d'
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "product.features",
"query": {
"match": {
"product.features.product_type": "player"
}
}
}
},
{
"nested": {
"path": "product.sitedetails",
"query": {
"match": {
"product.sitedetails.sku": "zzz"
}
}
}
}
]
}
}
}'
And the response will be:
"hits": {
"total": 1,
"max_score": 1.4054651,
"hits": [
{
"_index": "catalog",
"_type": "products",
"_id": "AVM_fcYgvVoSi3OfqPTX",
"_score": 1.4054651,
"_source": {
"product": {
"ean": "abc",
"features": {
"Product Type": "DVD player"
},
"color": "Black",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "zzz",
"url": "http://www.amazon.com/dp/zzz"
}
],
"category": "Portable DVD Players"
}
}
}
]
}
Hope it help :D

Use:
curl 'http://localhost:9200/catalog/products/_search?q=sku:"zzz"&pretty=true'
curl 'http://localhost:9200/catalog/products/_search?q=sku:*&pretty=true'. like my thinking, you want to get data within sku:"zzz" and sku:"aaa".
Referer:
http://joelabrahamsson.com/elasticsearch-101/
http://www.elasticsearchtutorial.com/elasticsearch-in-5-minutes.html

Related

Elastic Search Aggregation and Complex Query

I have created the index
PUT ten2
{
"mappings": {
"documents": {
"properties": {
"title": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},"uid": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"publish_details": {
"type": "nested",
"properties": {
"environment": {
"type": "keyword"
},
"locale": {
"type": "keyword"
},
"time": {
"type": "date"
},
"version": {
"type": "integer"
}
}
}
}
}
}
}
and added documents into it. here is the list of documents:
[{
"_index": "ten2",
"_type": "documents",
"_id": "blt69b62b48bbed1fb6_en-us",
"_source": {
"publish_details": [{
"environment": "blt603fe91adbdcff66",
"time": "2020-06-24T12:11:25.276Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
},
{
"environment": "blt603fe91adbdcff66",
"time": "2020-06-24T12:11:25.276Z",
"locale": "hi-in",
"user": "bltaadab2f531206e9d",
"version": 1
}
],
"title": "Entry 1",
"uid": "blt69b62b48bbed1fb6"
}
},
{
"_index": "ten2",
"_type": "documents",
"_id": "blt69b62b48bbed1fb6_mr-in",
"_source": {
"publish_details": [{
"environment": "blt603fe91adbdcff66",
"time": "2020-06-24T12:12:35.467Z",
"locale": "mr-in",
"user": "bltaadab2f531206e9d",
"version": 1
}],
"title": "Entry 3",
"uid": "blt69b62b48bbed1fb6"
}
},
{
"_index": "ten2",
"_type": "documents",
"_id": "blt4044c5198122a3ed_en-us",
"_source": {
"publish_details": [{
"environment": "blt603fe91adbdcff66",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
},{
"environment": "blt603fe91adbdcff6690",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
}],
"title": "Entry 10",
"uid": "blt4044c5198122a3ed"
}
}
]
and I want the following result
[
{
"_index": "ten2",
"_type": "documents",
"_id": "blt4044c5198122a3ed_en-us",
"_source": {
"publish_details": [{
"environment": "blt603fe91adbdcff66",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
},{
"environment": "blt603fe91adbdcff6690",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
}],
"title": "Entry 10",
"uid": "blt4044c5198122a3ed"
}
}
]
I am using the following query to get the result
GET ten2/_search
{
"query": {
"bool": {
"must": [{
"bool": {
"must_not": [{
"bool": {
"must": [{
"nested": {
"path": "publish_details",
"query": {
"term": {
"publish_details.environment": "blt603fe91adbdcff66"
}
}
}
}, {
"nested": {
"path": "publish_details",
"query": {
"term": {
"publish_details.locale": "en-us"
}
}
}
}, {
"nested": {
"path": "publish_details",
"query": {
"term": {
"publish_details.locale": "hi-in"
}
}
}
}, {
"nested": {
"path": "publish_details",
"query": {
"term": {
"publish_details.locale": "mr-in"
}
}
}
}]
}
}]
}
}
}
}
}
kindly help me a query to get expected result. First two dicuemtns having same uid only publish_details.locale is different.I am using must query within must_not to get result, currently I am getting all three documents but I want only last one. I have million documwnts.
To know more about Bool queries refer to this official documentation
Adding a working example with your mapping, index data, and with the search query
Search Query:
{
"query": {
"nested": {
"path": "publish_details",
"query": {
"bool": {
"must": [
{
"match": {
"publish_details.locale": "en-us"
}
}
],
"must_not": [
{
"match": {
"publish_details.environment": "blt603fe91adbdcff66"
}
},
{
"match": {
"publish_details.locale": "hi-in"
}
},
{
"match": {
"publish_details.locale": "mr-in"
}
}
]
}
},
"inner_hits": {
}
}
}
}
Search Result :
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "3",
"_score": 0.53899646,
"_source": {
"publish_details": [
{
"environment": "blt603fe91adbdcff66",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
},
{
"environment": "blt603fe91adbdcff6690",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
}
],
"title": "Entry 10",
"uid": "blt4044c5198122a3ed"
},
"inner_hits": {
"publish_details": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.53899646,
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "publish_details",
"offset": 1
},
"_score": 0.53899646,
"_source": {
"environment": "blt603fe91adbdcff6690",
"time": "2020-06-24T12:10:46.430Z",
"locale": "en-us",
"user": "bltaadab2f531206e9d",
"version": 1
}
}
]
}
}
}
}
]
To know more about inner hits refer to this documentation
The above query returns only the third document, thus satisfying the conditions of the search query. In the Inner Hits, only one part of the third document is returning, and the part which is matching blt603fe91adbdcff66 is discarded.

Elasticsearch - Return a subset of nested results

Elasticsearch 7.7 and I'm using the official php client to interact with the server.
My issue was somewhat solved here: https://discuss.elastic.co/t/need-to-return-part-of-a-doc-from-a-search-query-filter-is-parent-child-the-way-to-go/64514/2
However "Types are deprecated in APIs in 7.0+" https://www.elastic.co/guide/en/elasticsearch/reference/7.x/removal-of-types.html
Here is my document:
{
"offering_id": "1190",
"account_id": "362353",
"service_id": "20087",
"title": "Quick Brown Mammal",
"slug": "Quick Brown Fox",
"summary": "Quick Brown Fox"
"header_thumb_path": "uploads/test/test.png",
"duration": "30",
"alter_ids": [
"59151",
"58796",
"58613",
"54286",
"51812",
"50052",
"48387",
"37927",
"36685",
"36554",
"28807",
"23154",
"22356",
"21480",
"220",
"1201",
"1192"
],
"premium": "f",
"featured": "f",
"events": [
{
"event_id": "9999",
"start_date": "2020-07-01 14:00:00",
"registration_count": "22",
"description": "boo"
},
{
"event_id": "9999",
"start_date": "2020-07-01 14:00:00",
"registration_count": "22",
"description": "xyz"
},
{
"event_id": "9999",
"start_date": "2020-08-11 11:30:00",
"registration_count": "41",
"description": "test"
}
]
}
Notice how the object may have one or many "events"
Searching based on event data is the most common use case.
For example:
Find events that start before 12pm
Find events with a description of "xyz"
List find events with a start date in the next 10 days.
I would like to NOT return any events that didn't match the query!
So, for example Find events with a description of "xyz" for a given service
{
"query": {
"bool": {
"must": {
"match": {
"events.description": "xyz"
}
},
"filter": {
"bool": {
"must": [
{
"term": {
"service_id": 20087
}
}
]
}
}
}
}
}
I would want the result to look like this:
{
"offering_id": "1190",
"account_id": "362353",
"service_id": "20087",
"title": "Quick Brown Mammal",
"slug": "Quick Brown Fox",
"summary": "Quick Brown Fox"
"header_thumb_path": "uploads/test/test.png",
"duration": "30",
"alter_ids": [
"59151",
"58796",
"58613",
"54286",
"51812",
"50052",
"48387",
"37927",
"36685",
"36554",
"28807",
"23154",
"22356",
"21480",
"220",
"1201",
"1192"
],
"premium": "f",
"featured": "f",
"events": [
{
"event_id": "9999",
"start_date": "2020-07-01 14:00:00",
"registration_count": "22",
"description": "xyz"
}
]
}
However, instead it just returns the ENTIRE document, with all events.
Is it even possible to return only a subset of the data? Maybe with Aggregations?
Right now, we're doing an "extra" set of filtering on the result set in the application (php in this case) to strip out event blocks that don't match the desired results.
It would be nice to just have elastic give directly what's needed instead of doing extra processing on the result to pull out the applicable event.
Thought about restructuring the data to instead have it based around "events" but then I would be duplicating data since every offering will have the parent data too.
This used to be in SQL, where there was a relation instead of having the data nested like this.
A subset of the nested data can be returned using Nested Aggregations along with Filter Aggregations
To know more about these aggregations refer these official documentation :
Filter Aggregation
Nested Aggregation
Index Mapping:
{
"mappings": {
"properties": {
"offering_id": {
"type": "integer"
},
"account_id": {
"type": "integer"
},
"service_id": {
"type": "integer"
},
"title": {
"type": "text"
},
"slug": {
"type": "text"
},
"summary": {
"type": "text"
},
"header_thumb_path": {
"type": "keyword"
},
"duration": {
"type": "integer"
},
"alter_ids": {
"type": "integer"
},
"premium": {
"type": "text"
},
"featured": {
"type": "text"
},
"events": {
"type": "nested",
"properties": {
"event_id": {
"type": "integer"
},
"registration_count": {
"type": "integer"
},
"description": {
"type": "text"
}
}
}
}
}
}
Search Query :
{
"size": 0,
"aggs": {
"nested": {
"nested": {
"path": "events"
},
"aggs": {
"filter": {
"filter": {
"match": { "events.description": "xyz" }
},
"aggs": {
"total": {
"top_hits": {
"size": 10
}
}
}
}
}
}
}
}
Search Result :
"hits": [
{
"_index": "foo21",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "events",
"offset": 1
},
"_score": 1.0,
"_source": {
"event_id": "9999",
"start_date": "2020-07-01 14:00:00",
"registration_count": "22",
"description": "xyz"
}
}
]
Second Method :
{
"query": {
"bool": {
"must": [
{
"match": {
"service_id": "20087"
}
},
{
"nested": {
"path": "events",
"query": {
"bool": {
"must": [
{
"match": {
"events.description": "xyz"
}
}
]
}
},
"inner_hits": {
}
}
}
]
}
}
}
You can even go through this SO answer:
How to filter nested aggregation bucket?
Returning a partial nested document in ElasticSearch

Get only the matching values and corresponding fields from ElasticSearch

In elasticsearch, let's say I have documents like
{
"name": "John",
"department": "Biology",
"address": "445 Mount Eden Road"
},
{
"name": "Jane",
"department": "Chemistry",
"address": "32 Wilson Street"
},
{
"name": "Laura",
"department": "BioTechnology",
"address": "21 Greens Road"
},
{
"name": "Mark",
"department": "Physics",
"address": "Random UNESCO Bio-reserve"
}
There is a use-case where, if I type "bio" in a search bar, I should get the matching field-value(s) from elasticsearch along with the field name.
For this example,
Input: "bio"
Expected Output:
{
"field": "department",
"value": "Biology"
},
{
"field": "department",
"value": "BioTechnology"
},
{
"field": "address",
"value": "Random UNESCO Bio-reserve"
}
What type of query should I use? I can think of using NGram Tokenizer and then use match query. But, I am not sure how shall I get only the matching field value (not the entire document) and the corresponding field name as the output.
After reading further about Completion Suggesters and Context Suggesters, I could solve this problem in the following way:
1) Keep a separate "suggest" field for each record with type "completion" with context-mapping of type "category". The mapping I created looks like as follows:
{
"properties": {
"suggest": {
"type": "completion",
"contexts": [
{
"name": "field_type",
"type": "category",
"path": "cat"
}
]
},
"name": {
"type": "text"
},
"department": {
"type": "text"
},
"address": {
"type": "text"
}
}
}
2) Then I insert the records as shown below (adding search metadata to the "suggest" field with proper "context").
For example, to insert the first record, I execute the following:
POST: localhost:9200/test_index/test_type/1
{
"suggest": [
{
"input": ["john"],
"contexts": {
"field_type": ["name"]
}
},
{
"input": ["biology"],
"contexts": {
"field_type": ["department"]
}
},
{
"input": ["445 mount eden road"],
"contexts": {
"field_type": ["address"]
}
}
],
"name": "john",
"department": "biology",
"address": "445 mount eden road"
}
3) If we want to search terms occurring in the middle of a sentence (as the search-term "bio" occurs in middle of the address field in the 4th record, we can index the entry as follows:
POST: localhost:9200/test_index/test_type/4
{
"suggest": [
{
"input": ["mark"],
"contexts": {
"field_type": ["name"]
}
},
{
"input": ["physics"],
"contexts": {
"field_type": ["department"]
}
},
{
"input": ["random unesco bio-reserve", "bio-reserve"],
"contexts": {
"field_type": ["address"]
}
}
],
"name": "mark",
"department": "physics",
"address": "random unesco bio-reserve"
}
4) Then search for the keyword "bio" like this:
localhost:9200/test_index/test_type/_search
{
"_source": false,
"suggest": {
"suggestion" : {
"text" : "bio",
"completion" : {
"field" : "suggest",
"size": 10,
"contexts": {
"field_type": [ "name", "department", "address" ]
}
}
}
}
}
The response:
{
"hits": {
"total": 0,
"max_score": 0,
"hits": []
},
"suggest": {
"suggestion": [
{
"text": "bio",
"offset": 0,
"length": 3,
"options": [
{
"text": "bio-reserve",
"_index": "test_index",
"_type": "test_type",
"_id": "4",
"_score": 1,
"contexts": {
"field_type": [
"address"
]
}
},
{
"text": "biology",
"_index": "test_index",
"_type": "test_type",
"_id": "1",
"_score": 1,
"contexts": {
"field_type": [
"department"
]
}
},
{
"text": "biotechnology",
"_index": "test_index",
"_type": "test_type",
"_id": "3",
"_score": 1,
"contexts": {
"field_type": [
"department"
]
}
}
]
}
]
}
}
Can anyone please suggest any better approach?

How to aggregate on nested objects in elasticsearch

I have the following mapping in ES:
"mappings": {
"products": {
"properties": {
"product": {
"type" : "nested",
"properties": {
"features": {
"type": "nested"
},
"sitedetails": {
"type": "nested"
}
}
}
}
}
}
and then 3 products like this:
"hits": [
{
"_index": "catalog",
"_type": "products",
"_id": "AVNE8F4mFYOWvB4rMqdO",
"_score": 1,
"_source": {
"product": {
"ean": "abc",
"features": {
"productType": "DVD player"
},
"color": "Black",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "zzz",
"url": "http://www.amazon.com/dp/zzz"
}
],
"category": "Portable DVD Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_id": "AVNE8XkXFYOWvB4rMqdQ",
"_score": 1,
"_source": {
"product": {
"ean": "def",
"features": {
"ProductType": "MP3 player"
},
"color": "Black",
"manufacturer": "LG",
"sitedetails": [
{
"name": "amazon.com",
"sku": "aaa",
"url": "http://www.amazon.com/dp/aaa"
}
],
"category": "MP3 Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_id": "AVNIh-xVWwxj6Cz_r8AT",
"_score": 1,
"_source": {
"product": {
"ean": "abc",
"features": {
"productType": "DVD player"
},
"color": "White",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "ggg",
"url": "http://www.amazon.com/dp/ggg"
}
],
"category": "Portable DVD Players"
}
}
}
]
I need to display on the UI side 2 filters, one for Manufacturer and one for website.
How can I aggregate on product.manufacturer and product.sitedetails.name?
tnx!
Figured it out:
GET /catalog/products/_search
{
"aggs": {
"byManufacturer": {
"nested": {
"path": "product"
},
"aggs": {
"byManufacturer": {
"terms": {
"field": "product.manufacturer"
}
}
}
},
"bySeller": {
"nested": {
"path": "product.sitedetails"
},
"aggs": {
"bySeller": {
"terms": {
"field": "product.sitedetails.name"
}
}
}
}
}
}

ElasticSearch query sub-objects

I wandered through the docs a lot today, but can't find the answer; probably because I'm new to Elastic and don't really know the entire ES-terminology yet.
Say I have a books type containing a bunch of, well - books. Each book has a nested author.
{
"name": "Me and Jane",
"rating": "10",
"author": {
"name": "John Doe",
"alias":"Mark Twain"
}
}
Now, I know we can query the authors fields like this:
"match": {
"author.name": "Doe"
}
But what if I want to search across all the author fields? I tried author._all, which doesn't work.
Another approach is multi_match with wildcard field names: https://www.elastic.co/guide/en/elasticsearch/guide/current/multi-match-query.html#_using_wildcards_in_field_names
Something like this, I think:
"query": {
"nested": {
"path": "author",
"query": {
"multi_match": {
"query": "doe",
"fields": [
"author.*"
]
}
}
}
}
UPDATE: full sample provided
PUT /books
{
"mappings": {
"paper": {
"properties": {
"author": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"alias": {
"type": "string"
}
}
}
}
}
}
}
POST /books/paper/_bulk
{"index":{"_id":1}}
{"author":[{"name":"john doe","alias":"doe"},{"name":"mark twain","alias":"twain"}]}
{"index":{"_id":2}}
{"author":[{"name":"mark doe","alias":"john"}]}
{"index":{"_id":3}}
{"author":[{"name":"whatever","alias":"whatever"}]}
GET /books/paper/_search
{
"query": {
"nested": {
"path": "author",
"query": {
"multi_match": {
"query": "john",
"fields": [
"author.*"
]
}
}
}
}
}
Result is:
"hits": {
"total": 2,
"max_score": 0.5906161,
"hits": [
{
"_index": "books",
"_type": "paper",
"_id": "2",
"_score": 0.5906161,
"_source": {
"author": [
{
"name": "mark doe",
"alias": "john"
}
]
}
},
{
"_index": "books",
"_type": "paper",
"_id": "1",
"_score": 0.5882852,
"_source": {
"author": [
{
"name": "john doe",
"alias": "doe"
},
{
"name": "mark twain",
"alias": "twain"
}
]
}
}
]
}
You can use Query String Query, The example:
{
"query": {
"query_string": {
"fields": ["author.*"],
"query": "doe",
"use_dis_max": true
}
}
}

Resources