I'm using ElasticSearch 2.4
I need to get all Purchases that match all queries.
I'm actually using inner_hits function but it doesn´t works as expected because it only shows the match of the current nested query and the problem is the combination with main document query.
I have this mapping and bellow I created an example with my comments:
PUT /example_contact_purchases
{
"mappings": {
"contact": {
"dynamic": false,
"properties": {
"name": {
"type": "string"
},
"country": {
"type": "string"
},
"purchases": {
"type": "nested",
"properties": {
"uuid":{
"type":"string"
},
"brand":{
"type":"string"
}
}
}
}
}
}
}
POST example_contact_purchases/contact
{
"name" : "Fran",
"country": "ES",
"purchases" : [
{
"uuid" : "23",
"brand":"Sony"
},
{
"uuid":"23",
"brand":"Sony"
}
]
}
POST example_contact_purchases/contact
{
"name" : "Jhon",
"country": "UK",
"purchases" : [
{
"uuid" : "45",
"brand": "Lenovo"
},
{
"uuid":"23",
"brand":"Sony"
},
{
"uuid":"77",
"brand":"HP"
}
]
}
POST example_contact_purchases/contact
{
"name" : "Lucas",
"country": "ES",
"purchases" : [
{
"uuid" : "45",
"brand": "Lenovo"
},
{
"uuid":"23",
"brand":"Sony"
},
{
"uuid":"77",
"brand":"HP"
}
]
}
GET example_contact_purchases/contact/_search
{
"query": {
"bool": {
"should": [
{"bool": {
"must": [
{
"query_string": {
"query": "country:ES"
}
},
{
"nested": {
"path": "purchases",
"inner_hits":{
"name":"0"
},
"filter": {
"query": {
"query_string": {
"query": "(purchases.brand:Sony)"
}
}
}
}
}
]
}},
{"bool": {
"must": [
{
"query_string": {
"query": "country:UK"
}
},
{
"nested": {
"path": "purchases",
"inner_hits":{
"name":"1"
},
"filter": {
"query": {
"query_string": {
"query": "(purchases.uuid:45)"
}
}
}
}
}
]
}
}
]
}
}
}
I am using simple query like this:
"(country.raw:ES AND purchases.brand:Sony) OR (country:UK AND purchases.uuid:45)"
And the result of the search query is:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.5949223,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_score": 0.5949223,
"_source": {
"name": "Jhon",
"country": "UK",
"purchases": [
{
"uuid": "45",
"brand": "Lenovo"
},
{
"uuid": "23",
"brand": "Sony"
},
{
"uuid": "77",
"brand": "HP"
}
]
},
"inner_hits": {
"0": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "45",
"brand": "Lenovo"
}
}
]
}
}
}
},
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJKBHXthyTIlmcERN",
"_score": 0.5949223,
"_source": {
"name": "Lucas",
"country": "ES",
"purchases": [
{
"uuid": "45",
"brand": "Lenovo"
},
{
"uuid": "23",
"brand": "Sony"
},
{
"uuid": "77",
"brand": "HP"
}
]
},
"inner_hits": {
"0": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJKBHXthyTIlmcERN",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJKBHXthyTIlmcERN",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "45",
"brand": "Lenovo"
}
}
]
}
}
}
},
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJI1SXthyTIlmcERL",
"_score": 0.5139209,
"_source": {
"name": "Fran",
"country": "ES",
"purchases": [
{
"uuid": "23",
"brand": "Sony"
},
{
"uuid": "23",
"brand": "Sony"
}
]
},
"inner_hits": {
"0": {
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJI1SXthyTIlmcERL",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
},
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJI1SXthyTIlmcERL",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
}
}
]
}
}
Unfortunatly the first result is wrong:
"inner_hits": {
"0": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "45",
"brand": "Lenovo"
}
}
]
}
}
}
It should show the purchase for Jhon UK with parameters:
{"uuid": "45","brand":"Lenovo"} ( inner_hits with name "1")
Thanks
Related
my stop.txt is having messi
Settings is belows
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym_en": {
"type": "synonym",
"synonyms_path": "synom.txt"
},
"english_stop": {
"type": "stop",
"stopwords_path": "stop.txt"
}
},
"analyzer": {
"english_analyzer": {
"tokenizer": "standard",
"filter": ["english_stop", "synonym_en"]
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "english_analyzer"
}
}
}
}
My dictionary is below
[
{ "id": 0, "name": "Messiis player" },
{ "id": 1, "name": "Messi player" },
{ "id": 2, "name": "Messi and Rono player" },
{ "id": 3, "name": "Rono and Messi player" },
{ "id": 4, "name": "messiis and Messi player" }
]
DSL query is below
{
"query": {
"bool": {
"must": {
"query_string": {
"query": "messi*",
"fields": ["name^128"]
}
}
}
}
}
My Out is below getting full document
{
"took": 3,
"timed_out": false,
"_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 },
"hits": {
"total": { "value": 5, "relation": "eq" },
"max_score": 128.0,
"hits": [
{
"_index": "player",
"_type": "_doc",
"_id": "0",
"_score": 128.0,
"_source": { "id": 0, "name": "Messiis player" }
},
{
"_index": "player",
"_type": "_doc",
"_id": "1",
"_score": 128.0,
"_source": { "id": 1, "name": "Messi player" }
},
{
"_index": "player",
"_type": "_doc",
"_id": "2",
"_score": 128.0,
"_source": { "id": 2, "name": "Messi and Rono player" }
},
{
"_index": "player",
"_type": "_doc",
"_id": "3",
"_score": 128.0,
"_source": { "id": 3, "name": "Rono and Messi player" }
},
{
"_index": "player",
"_type": "_doc",
"_id": "4",
"_score": 128.0,
"_source": { "id": 4, "name": "messiis and Messi player" }
}
]
}
}
My query have *
if i am searching for "query": "messi*", i am getting output {'id': 4, 'name': 'messiis and Messi player'}
if i am searching for "query": "messi*", I need expected out as below
if i am searching also "query": "Messi*", I need expected out as below(basically case has to insensensitive)
not getting where is the error occurs
{
"took": 8,
"timed_out": false,
"_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 },
"hits": {
"total": { "value": 2, "relation": "eq" },
"max_score": 128.0,
"hits": [
{
"_index": "player",
"_type": "_doc",
"_id": "0",
"_score": 128.0,
"_source": { "id": 0, "name": "Messiis player" }
},
{
"_index": "player",
"_type": "_doc",
"_id": "4",
"_score": 128.0,
"_source": { "id": 4, "name": "messiis and Messi player" }
}
]
}
}
The problem is that your stop.txt file probably contains messi in lowercase and your english_analyzer doesn't lowercase your tokens.
So you have two options:
A. you can add Messi in your stop.txt file
B. you can add a lowercase token filter
"analyzer": {
"english_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "english_stop", "synonym_en"]
^
|
add this
}
}
Then it will work and remove all messi tokens (whatever the case)
you can try this:
{
"query": {
"bool": {
"must": {
"query_string": {
"query": "messi",
"default_field": "name",
"default_operator":"OR"
}
}
}
}
}
Here, I have a indexed document like:
doc = {
"id": 1,
"content": [
{
"txt": I,
"time": 0,
},
{
"txt": have,
"time": 1,
},
{
"txt": a book,
"time": 2,
},
{
"txt": do not match this block,
"time": 3,
},
]
}
And I want to match "I have a book", and return the matched time: 0,1,2. Is there anyone who knows how to build the index and the query for this situation?
I think the "content.txt" should be flattened but "content.time" should be nested?
want to match "I have a book", and return the matched time: 0,1,2.
Adding a working example with index mapping,search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"content": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"nested": {
"path": "content",
"query": {
"bool": {
"must": [
{
"match": {
"content.txt": "I have a book"
}
}
]
}
},
"inner_hits": {}
}
}
}
Search Result:
"inner_hits": {
"content": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 2.5226097,
"hits": [
{
"_index": "64752029",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "content",
"offset": 2
},
"_score": 2.5226097,
"_source": {
"txt": "a book",
"time": 2
}
},
{
"_index": "64752029",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "content",
"offset": 0
},
"_score": 1.5580825,
"_source": {
"txt": "I",
"time": 0
}
},
{
"_index": "64752029",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "content",
"offset": 1
},
"_score": 1.5580825,
"_source": {
"txt": "have",
"time": 1
}
}
]
}
}
}
}
I have the following documents in index products
{ "product_name": "prod-1", "meta": [ { "tag": "tag1", "score": "12" }, { "tag": "tag2", "score": "24" } ] }
{ "product_name": "prod-2", "meta": [ { "tag": "tag1", "score": "36" } ] }
{ "product_name": "prod-2", "meta": [ { "tag": "tag2", "score": "44" } ] }
{ "product_name": "prod-3", "meta": [ { "tag": "tag3", "score": "54" } ] }
I know how to group by product_name in es
POST /products/_search
{
"size": 0,
"aggs": {
"by_product": {
"terms": {
"field": "product_name"
}
}
}
}
After grouping by product_name, I want a field called meta in each bucket which has a union of meta from all documents in that bucket like this
[
{
"key": "prod-1",
"meta": [{ "tag": "tag1", "score": "12" }, { "tag": "tag2", "score": "24" }]
},
{
"key": "prod-2",
"meta": [{ "tag": "tag1", "score": "36" }, { "tag": "tag2", "score": "44" }]
},
{
"key": "prod-3",
"meta": [ { "tag": "tag3", "score": "54" } ]
}
]
How can I achive this in elaticsearch?
The best way to show your expected search result is to use top hits
aggregation using which you can add additional fields to terms
aggregation
Search Query:
{
"size": 0,
"aggs": {
"by_product": {
"terms": {
"field": "product_name.keyword"
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"_source": {
"includes": [
"meta.tag",
"meta.score"
]
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"by_product": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "prod-2",
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "36",
"tag": "tag1"
}
]
}
},
{
"_index": "64801386",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "44",
"tag": "tag2"
}
]
}
}
]
}
}
},
{
"key": "prod-1",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "12",
"tag": "tag1"
},
{
"score": "24",
"tag": "tag2"
}
]
}
}
]
}
}
},
{
"key": "prod-3",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "54",
"tag": "tag3"
}
]
}
}
]
}
}
}
]
}
How can i make elasticsearch return nested values in format of hits {value1:..., value2..., value3..., etc..}
This is my request:
{
"_source": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"path": "photo",
"query": {
"bool": {
"must": [
{
"match": {
"photo.hello": "true"
}
}
]
}
},
"inner_hits" : {}
}
}
]
}}}
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1.2231436,
"hits": [
{
"_index": ".3eautiful",
"_type": "profile",
"_id": "6UAaCls5iSgavEtFE2qMX902Xmb2",
"_score": 1.2231436,
"inner_hits": {
"photo": {
"hits": {
"total": 1,
"max_score": 1.2231436,
"hits": [
{
"_index": ".3eautiful",
"_type": "profile",
"_id": "6UAaCls5iSgavEtFE2qMX902Xmb2",
"_nested": {
"field": "photo",
"offset": 0
},
"_score": 1.2231436,
"_source": {
"hello": "true",
"i_am_superCOOL": "true",
"xoxox": "true",
"id": "-KSDRx5BN54JHitoq7Wb"
}
}
]
}
}
}
},
{
"_index": ".3eautiful",
"_type": "profile",
"_id": "KDFbeXrOedf7b6NVRGMO0HDIFgx1",
"_score": 1.2231436,
"inner_hits": {
"photo": {
"hits": {
"total": 2,
"max_score": 1.2231436,
"hits": [
{
"_index": ".3eautiful",
"_type": "profile",
"_id": "KDFbeXrOedf7b6NVRGMO0HDIFgx1",
"_nested": {
"field": "photo",
"offset": 1
},
"_score": 1.2231436,
"_source": {
"alahu": "true",
"hello": "true",
"same": "true",
"smukais": "true",
"id": "-KSDJzyUC_N5je-cR2aT"
}
},
{
"_index": ".3eautiful",
"_type": "profile",
"_id": "KDFbeXrOedf7b6NVRGMO0HDIFgx1",
"_nested": {
"field": "photo",
"offset": 0
},
"_score": 1.2231436,
"_source": {
"hello": "true",
"same": "true",
"selfyyy": "true",
"superSexy": "true",
"id": "-KPn4p7spS8NO7IVSLdF"
}
}
]
}
}
}
}
]
}
}
I am using 2 dimension dynamic attribute search, the problem with this approach is that the result's can be 20 from 1 user, but i need to make it propriety based.
Just sticked to the same format.
How to sort by match prioritising the most left words matched
Explanation
Sort the prefix query by the word it matches, but prioritising the matches in the words more at left.
Tests I've made
Data
DELETE /test
PUT /test
PUT /test/person/_mapping
{
"properties": {
"name": {
"type": "multi_field",
"fields": {
"name": {"type": "string"},
"original": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
PUT /test/person/1
{"name": "Berta Kassulke"}
PUT /test/person/2
{"name": "Kaley Bartoletti"}
PUT /test/person/3
{"name": "Kali Hahn"}
PUT /test/person/4
{"name": "Karolann Klein"}
PUT /test/person/5
{"name": "Sofia Mandez Kaloo"}
The mapping was added for the 'sort on original value' test.
Simple query
Query
POST /test/person/_search
{
"query": {
"prefix": {"name": {"value": "ka"}}
}
}
Result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "person",
"_id": "4",
"_score": 1,
"_source": {
"name": "Karolann Klein"
}
},
{
"_index": "test",
"_type": "person",
"_id": "5",
"_score": 1,
"_source": {
"name": "Sofia Mandez Kaloo"
}
},
{
"_index": "test",
"_type": "person",
"_id": "1",
"_score": 1,
"_source": {
"name": "Berta Kassulke"
}
},
{
"_index": "test",
"_type": "person",
"_id": "2",
"_score": 1,
"_source": {
"name": "Kaley Bartoletti"
}
},
{
"_index": "test",
"_type": "person",
"_id": "3",
"_score": 1,
"_source": {
"name": "Kali Hahn"
}
}
]
}
}
With sorting
Request
POST /test/person/_search
{
"query": {
"prefix": {"name": {"value": "ka"}}
},
"sort": {"name": {"order": "asc"}}
}
Result
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": null,
"hits": [
{
"_index": "test",
"_type": "person",
"_id": "2",
"_score": null,
"_source": {
"name": "Kaley Bartoletti"
},
"sort": [
"bartoletti"
]
},
{
"_index": "test",
"_type": "person",
"_id": "1",
"_score": null,
"_source": {
"name": "Berta Kassulke"
},
"sort": [
"berta"
]
},
{
"_index": "test",
"_type": "person",
"_id": "3",
"_score": null,
"_source": {
"name": "Kali Hahn"
},
"sort": [
"hahn"
]
},
{
"_index": "test",
"_type": "person",
"_id": "5",
"_score": null,
"_source": {
"name": "Sofia Mandez Kaloo"
},
"sort": [
"kaloo"
]
},
{
"_index": "test",
"_type": "person",
"_id": "4",
"_score": null,
"_source": {
"name": "Karolann Klein"
},
"sort": [
"karolann"
]
}
]
}
}
With sort on original value
Query
POST /test/person/_search
{
"query": {
"prefix": {"name": {"value": "ka"}}
},
"sort": {"name.original": {"order": "asc"}}
}
Result
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": null,
"hits": [
{
"_index": "test",
"_type": "person",
"_id": "1",
"_score": null,
"_source": {
"name": "Berta Kassulke"
},
"sort": [
"Berta Kassulke"
]
},
{
"_index": "test",
"_type": "person",
"_id": "2",
"_score": null,
"_source": {
"name": "Kaley Bartoletti"
},
"sort": [
"Kaley Bartoletti"
]
},
{
"_index": "test",
"_type": "person",
"_id": "3",
"_score": null,
"_source": {
"name": "Kali Hahn"
},
"sort": [
"Kali Hahn"
]
},
{
"_index": "test",
"_type": "person",
"_id": "4",
"_score": null,
"_source": {
"name": "Karolann Klein"
},
"sort": [
"Karolann Klein"
]
},
{
"_index": "test",
"_type": "person",
"_id": "5",
"_score": null,
"_source": {
"name": "Sofia Mandez Kaloo"
},
"sort": [
"Sofia Mandez Kaloo"
]
}
]
}
}
Intended result
Sorted by name ASC but prioritising the matches on the most left words
Kaley Bartoletti
Kali Hahn
Karolann Klein
Berta Kassulke
Sofia Mandez Kaloo
Good Question. One way to achieve this would be with the combination of edge ngram filter and span first query
This is my setting
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase",
"edge_filter",
"asciifolding"
]
}
},
"filter": {
"edge_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 8
}
}
}
},
"mappings": {
"person": {
"properties": {
"name": {
"type": "string",
"analyzer": "my_custom_analyzer",
"search_analyzer": "standard",
"fields": {
"standard": {
"type": "string"
}
}
}
}
}
}
}
After that I inserted your sample documents. Then I wrote the following query with dis_max. Notice that end parameter for first span query is 1 so this will prioritize(higher score) leftmost match. I am first sorting by score and then by name.
{
"query": {
"dis_max": {
"tie_breaker": 0.7,
"boost": 1.2,
"queries": [
{
"match": {
"name": "ka"
}
},
{
"span_first": {
"match": {
"span_term": {
"name": "ka"
}
},
"end": 1
}
},
{
"span_first": {
"match": {
"span_term": {
"name": "ka"
}
},
"end": 2
}
}
]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"name.standard": {
"order": "asc"
}
}
]
}
The result I get
"hits": [
{
"_index": "esedge",
"_type": "policy_data",
"_id": "2",
"_score": 0.72272325,
"_source": {
"name": "Kaley Bartoletti"
},
"sort": [
0.72272325,
"bartoletti"
]
},
{
"_index": "esedge",
"_type": "policy_data",
"_id": "3",
"_score": 0.72272325,
"_source": {
"name": "Kali Hahn"
},
"sort": [
0.72272325,
"hahn"
]
},
{
"_index": "esedge",
"_type": "policy_data",
"_id": "4",
"_score": 0.72272325,
"_source": {
"name": "Karolann Klein"
},
"sort": [
0.72272325,
"karolann"
]
},
{
"_index": "esedge",
"_type": "policy_data",
"_id": "1",
"_score": 0.54295504,
"_source": {
"name": "Berta Kassulke"
},
"sort": [
0.54295504,
"berta"
]
},
{
"_index": "esedge",
"_type": "policy_data",
"_id": "5",
"_score": 0.2905494,
"_source": {
"name": "Sofia Mandez Kaloo"
},
"sort": [
0.2905494,
"kaloo"
]
}
]
I hope this helps.