Elasticsearch nested query and filter - elasticsearch

I have the following Elasticsearch mapping:
"show": {
"properties": {
"startsAt": {
"type": "date"
},
"venue": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"address": {
"type": "string",
"index": "no"
},
"location": {
"type": "geo_point",
"lat_lon": true
},
"section": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
I want to find an exact match using the show.startsAt, show.venue.location, and show.venue.section properties. I've been trying the following query, but it's not taking show.venue.section into account.
bool: {
must: [
{match: {startsAt: starts}},
{
nested: {
path: 'venue',
query: {
match: {'venue.section': section}
},
filter: {
geo_distance: {
distance: '1m',
'venue.location': location
}
}
}
}
]
}

This query worked for me:
query: {
bool: {
must: [
{match: {startsAt: starts}},
{
nested: {
path: 'venue',
filter: {
bool: {
must: [
{
geo_distance: {
distance: '1m',
'venue.location': location
}
},
{
term: {'venue.section': section}
}
]
}
}
}
}
]
}
}

Related

ElasticSearch query nested path filter OR

I have following index:
PUT /ab11
{
"mappings": {
"properties": {
"product_id": {
"type": "keyword"
},
"data": {
"type": "nested",
"properties": {
"p_id": {
"type": "keyword"
}
}
}
}
}
}
PUT /ab11/_doc/1
{
"product_id": "123",
"data": [
{
"p_id": "a"
},
{
"p_id": "b"
},
{
"p_id": "c"
}
]
}
I want to do query like following sql does(NOTE: I want to do filter not query, because I don't care about score) :
select * from abc11 where data.pid = "a" or data.pid = "b"
You can do it like this because the terms query has OR semantics by default:
{
"query": {
"nested": {
"path": "data",
"query": {
"terms": {
"data.p_id": [
"a",
"b"
]
}
}
}
}
}
Basically, select all documents which have either "a" or "b" in their data.p_id nested docs.

Elasticsearch Index type doesn't changed after updating status

I've made some _bulk insert successfully , now I'm trying to make query with date range and filter something like:
{
"query": {
"bool": {
"must": [{
"terms": {
"mt_id": [613]
}
},
{
"range": {
"time": {
"gt": 1470009600000,
"lt": 1470009600000
}
}
}]
}
}
Unfortunately I got no results , Now I noticed that the index mapping is created after bulk insert as following:
{
"agg__ex_2016_8_3": {
"mappings": {
"player": {
"properties": {
"adLoad": {
"type": "long"
},
"mt_id": {
"type": "long"
},
"time": {
"type": "string"
}
}
},
As a solution I tried to change the index mapping with:
PUT /agg__ex_2016_8_3/_mapping/player
{
"properties" : {
"mt_id" : {
"type" : "long",
"index": "not_analyzed"
}
}
}
got
{
"acknowledged": true
}
and PUT /agg__ex_2016_8_3/_mapping/player
{
"properties" : {
"time" : {
"type" : "date",
"format" : "yyyy/MM/dd HH:mm:ss"
}
}
}
got:
{
"error": {
"root_cause": [
{
"type": "remote_transport_exception",
"reason": "[vj_es_c1-esc13][10.132.69.145:9300][indices:admin/mapping/put]"
}
],
"type": "illegal_argument_exception",
"reason": "mapper [time] of different type, current_type [string], merged_type [date]"
},
"status": 400
}
but nothing happened , and still doesn't get any results.
What i'm doing wrong ? ( I must work with http , not using curl)
Thanks!!
Try this:
# 1. delete index
DELETE agg__ex_2016_8_3
# 2. recreate it with the proper mapping
PUT agg__ex_2016_8_3
{
"mappings": {
"player": {
"properties": {
"adLoad": {
"type": "long"
},
"mt_id": {
"type": "long"
},
"time": {
"type": "date"
}
}
}
}
}
# 3. create doc
PUT agg__ex_2016_8_3/player/104
{
"time": "1470009600000",
"domain": "organisemyhouse.com",
"master_domain": "613###organisemyhouse.com",
"playerRequets": 4,
"playerLoads": 0,
"c_Id": 0,
"cb_Id": 0,
"mt_Id": 613
}
# 4. search
POST agg__ex_2016_8_3/_search
{
"query": {
"bool": {
"must": [
{
"terms": {
"mt_Id": [
613
]
}
},
{
"range": {
"time": {
"gte": 1470009600000,
"lte": 1470009600000
}
}
}
]
}
}
}

ElasticSearch Snowball Analyzer not working with nested query

I have created an index with the following mapping
PUT http://localhost:9200/test1
{
"mappings": {
"searchText": {
"properties": {
"catalogue_product": {
"type":"nested",
"properties": {
"id": {
"type": "string",
"index":"not_analyzed"
},
"long_desc": {
"type":"nested",
"properties": {
"translation": {
"type":"nested",
"properties": {
"en-GB": {
"type": "string",
"anlayzer": "snowball"
},
"fr-FR": {
"type": "string",
"anlayzer": "snowball"
}
}
}
}
}
}
}
}
}
}
}
I have put one record using
PUT http://localhost:9200/test1/searchText/1
{
"catalogue_product": {
"id": "18437",
"long_desc": {
"translation": {
"en-GB": "C120 - circuit breaker - C120H - 4P - 125A - B curve",
"fr-FR": "Disjoncteur C120H 4P 125A courbe B 15000A"
}
}
}
}
Then if i do a search for the word
breaker
inside
catalogue_product.long_desc.translation.en-GB
I get the added record
POST http://localhost:9200/test1/searchText/_search
{
"query": {
"nested": {
"path": "catalogue_product.long_desc.translation",
"query": {
"match": {
"catalogue_product.long_desc.translation.en-GB": "breaker"
}
}
}
}
}
if replace the word
breaker
with
breakers
, I dont get any records in spite of the en-GB field having analyzer=snowball in the mapping
POST http://localhost:9200/test1/searchText/_search
{
"query": {
"nested": {
"path": "catalogue_product.long_desc.translation",
"query": {
"match": {
"catalogue_product.long_desc.translation.en-GB": "breakers"
}
}
}
}
}
I am going crazy with this. Where am I going wrong?
I tried a new mapping with analyzer as english instead of snowball, but that did not work either :(
Any help is appreciated
Dude , its a typo. Its analyzer and not anlayzer
PUT http://localhost:9200/test1
{
"mappings": {
"searchText": {
"properties": {
"catalogue_product": {
"type":"nested",
"properties": {
"id": {
"type": "string",
"index":"not_analyzed"
},
"long_desc": {
"type":"nested",
"properties": {
"translation": {
"type":"nested",
"properties": {
"en-GB": {
"type": "string",
"analyzer": "snowball"
},
"fr-FR": {
"type": "string",
"analyzer": "snowball"
}
}
}
}
}
}
}
}
}
}
}

Elasticsearch how to use multi_match with wildcard

I have a User object with properties Name and Surname. I want to search these fields using one query, and I found multi_match in the documentation, but I don't know how to properly use that with a wildcard. Is it possible?
I tried with a multi_match query but it didn't work:
{
"query": {
"multi_match": {
"query": "*mar*",
"fields": [
"user.name",
"user.surname"
]
}
}
}
Alternatively you could use a query_string query with wildcards.
"query": {
"query_string": {
"query": "*mar*",
"fields": ["user.name", "user.surname"]
}
}
This will be slower than using an nGram filter at index-time (see my other answer), but if you are looking for a quick and dirty solution...
Also I am not sure about your mapping, but if you are using user.name instead of name your mapping needs to look like this:
"your_type_name_here": {
"properties": {
"user": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"surname": {
"type": "string"
}
}
}
}
}
Such a query worked for me:
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"should": [
{"query": {"wildcard": {"user.name": {"value": "*mar*"}}}},
{"query": {"wildcard": {"user.surname": {"value": "*mar*"}}}}
]
}
}
}
}
}
Similar to what you are doing, except that in my case there could be different masks for different fields.
I just did this now:
GET _search {
"query": {
"bool": {
"must": [
{
"range": {
"theDate": {
"gte": "2014-01-01",
"lte": "2014-12-31"
}
}
},
{
"match" : {
"Country": "USA"
}
}
],
"should": [
{
"wildcard" : { "Id_A" : "0*" }
},
{
"wildcard" : { "Id_B" : "0*" }
}
],"minimum_number_should_match": 1
}
}
}
Similar to suggestion above, but this is simple and worked for me:
{
"query": {
"bool": {
"must":
[
{
"wildcard" : { "processname.keyword" : "*system*" }
},
{
"wildcard" : { "username" : "*admin*" }
},
{
"wildcard" : { "device_name" : "*10*" }
}
]
}
}
}
I would not use wildcards, it will not scale well. You are asking a lot of the search engine at query time. You can use the nGram filter, to do the processing at index-time not search time.
See this discussion on the nGram filter.
After indexing the name and surname correctly (change your mapping, there are examples in the above link) you can use multi-match but without wildcards and get the expected results.
description: {
type: 'keyword',
normalizer: 'useLowercase',
},
product: {
type: 'object',
properties: {
name: {
type: 'keyword',
normalizer: 'useLowercase',
},
},
},
activity: {
type: 'object',
properties: {
name: {
type: 'keyword',
normalizer: 'useLowercase',
},
},
},
query:
query: {
bool: {
must: [
{
bool: {
should: [
{
wildcard: {
description: {
value: `*${value ? value : ''}*`,
boost: 1.0,
rewrite: 'constant_score',
},
},
},
{
wildcard: {
'product.name': {
value: `*${value ? value : ''}*`,
boost: 1.0,
rewrite: 'constant_score',
},
},
},
{
wildcard: {
'activity.name': {
value: `*${value ? value : ''}*`,
boost: 1.0,
rewrite: 'constant_score',
},
},
},
],
},
},
{
match: {
recordStatus: RecordStatus.Active,
},
},
{
bool: {
must_not: [
{
term: {
'user.id': req.currentUser?.id,
},
},
],
},
},
{
bool: {
should: tags
? tags.map((name: string) => {
return {
nested: {
path: 'tags',
query: {
match: {
'tags.name': name,
},
},
},
};
})
: [],
},
},
],
filter: {
bool: {
must_not: {
terms: {
id: existingIds ? existingIds : [],
},
},
},
},
},
},
sort: [
{
updatedAt: {
order: 'desc',
},
},
],

Using a custom_score to sort by a nested child's timestamp

I'm pretty new to elasticsearch and have been banging my head trying to get this sorting to work. The general idea is to search email message threads with nested messages and nested participants. The goal is to display search results at the thread level, sorting by the participant who is doing the search and either the last_received_at or last_sent_at column depending on which mailbox they are in.
My understanding is that you can't sort by a single child's value among many nested children. So in order to do this I saw a couple of suggestions for using a custom_score with a script, then sorting on the score. My plan is to dynamically change the sort column and then run a nested custom_score query that will return the date of one of the participants as the score. I've been noticing some issues with both the score format being strange (eg. always has 4 zeros at the end) and it may not be returning the date that I was expecting.
Below are simplified versions of the index and the query in question. If anyone has any suggestions, I'd be very grateful. (FYI - I am using elasticsearch version 0.20.6.)
Index:
mappings: {
message_thread: {
properties: {
id: {
type: long
}
subject: {
dynamic: true
properties: {
id: {
type: long
}
name: {
type: string
}
}
}
participants: {
dynamic: true
properties: {
id: {
type: long
}
name: {
type: string
}
last_sent_at: {
format: dateOptionalTime
type: date
}
last_received_at: {
format: dateOptionalTime
type: date
}
}
}
messages: {
dynamic: true
properties: {
sender: {
dynamic: true
properties: {
id: {
type: long
}
}
}
id: {
type: long
}
body: {
type: string
}
created_at: {
format: dateOptionalTime
type: date
}
recipient: {
dynamic: true
properties: {
id: {
type: long
}
}
}
}
}
version: {
type: long
}
}
}
}
Query:
{
"query": {
"bool": {
"must": [
{
"term": { "participants.id": 3785 }
},
{
"custom_score": {
"query": {
"filtered": {
"query": { "match_all": {} },
"filter": {
"term": { "participants.id": 3785 }
}
}
},
"params": { "sort_column": "participants.last_received_at" },
"script": "doc[sort_column].value"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"term": { "messages.recipient.id": 3785 }
}
]
}
},
"sort": [ "_score" ]
}
Solution:
Thanks to #imotov, here is the final result. The participants were not properly nested in the index (while the messages didn't need to be). In addition, include_in_root was used for the participants to simplify the query (participants are small records and not a real size issue, although #imotov also provided an example without it). He then restructured the JSON request to use a dis_max query.
curl -XDELETE "localhost:9200/test-idx"
curl -XPUT "localhost:9200/test-idx" -d '{
"mappings": {
"message_thread": {
"properties": {
"id": {
"type": "long"
},
"messages": {
"properties": {
"body": {
"type": "string",
"analyzer": "standard"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"id": {
"type": "long"
},
"recipient": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
}
}
},
"sender": {
"dynamic": "true",
"properties": {
"id": {
"type": "long"
}
}
}
}
},
"messages_count": {
"type": "long"
},
"participants": {
"type": "nested",
"include_in_root": true,
"properties": {
"id": {
"type": "long"
},
"last_received_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"last_sent_at": {
"type": "date",
"format": "yyyy-MM-dd'\''T'\''HH:mm:ss'\''Z'\''"
},
"name": {
"type": "string",
"analyzer": "standard"
}
}
},
"subject": {
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "string"
}
}
}
}
}
}
}'
curl -XPUT "localhost:9200/test-idx/message_thread/1" -d '{
"id" : 1,
"subject" : {"name": "Test Thread"},
"participants" : [
{"id" : 87793, "name" : "John Smith", "last_received_at" : null, "last_sent_at" : "2010-10-27T17:26:58Z"},
{"id" : 3785, "name" : "David Jones", "last_received_at" : "2010-10-27T17:26:58Z", "last_sent_at" : null}
],
"messages" : [{
"id" : 1,
"body" : "This is a test.",
"sender" : { "id" : 87793 },
"recipient" : { "id" : 3785},
"created_at" : "2010-10-27T17:26:58Z"
}]
}'
curl -XPUT "localhost:9200/test-idx/message_thread/2" -d '{
"id" : 2,
"subject" : {"name": "Elastic"},
"participants" : [
{"id" : 57834, "name" : "Paul Johnson", "last_received_at" : "2010-11-25T17:26:58Z", "last_sent_at" : "2010-10-25T17:26:58Z"},
{"id" : 3785, "name" : "David Jones", "last_received_at" : "2010-10-25T17:26:58Z", "last_sent_at" : "2010-11-25T17:26:58Z"}
],
"messages" : [{
"id" : 2,
"body" : "More testing of elasticsearch.",
"sender" : { "id" : 57834 },
"recipient" : { "id" : 3785},
"created_at" : "2010-10-25T17:26:58Z"
},{
"id" : 3,
"body" : "Reply message.",
"sender" : { "id" : 3785 },
"recipient" : { "id" : 57834},
"created_at" : "2010-11-25T17:26:58Z"
}]
}'
curl -XPOST localhost:9200/test-idx/_refresh
echo
# Using include in root
curl "localhost:9200/test-idx/message_thread/_search?pretty=true" -d '{
"query": {
"filtered": {
"query": {
"nested": {
"path": "participants",
"score_mode": "max",
"query": {
"custom_score": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"participants.id": 3785
}
}
}
},
"params": {
"sort_column": "participants.last_received_at"
},
"script": "doc[sort_column].value"
}
}
}
},
"filter": {
"query": {
"multi_match": {
"query": "test",
"fields": ["subject.name", "participants.name", "messages.body"],
"operator": "and",
"use_dis_max": true
}
}
}
}
},
"sort": ["_score"],
"fields": []
}
'
# Not using include in root
curl "localhost:9200/test-idx/message_thread/_search?pretty=true" -d '{
"query": {
"filtered": {
"query": {
"nested": {
"path": "participants",
"score_mode": "max",
"query": {
"custom_score": {
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"participants.id": 3785
}
}
}
},
"params": {
"sort_column": "participants.last_received_at"
},
"script": "doc[sort_column].value"
}
}
}
},
"filter": {
"query": {
"bool": {
"should": [{
"match": {
"subject.name":"test"
}
}, {
"nested" : {
"path": "participants",
"query": {
"match": {
"name":"test"
}
}
}
}, {
"match": {
"messages.body":"test"
}
}
]
}
}
}
}
},
"sort": ["_score"],
"fields": []
}
'
There are a couple of issues here. You are asking about nested objects, but participants are not defined in your mapping as nested objects. The second possible issue is that score has type float, so it might not have enough precision to represent timestamp as is. If you can figure out how to fit this value into float, you can take a look at this example: Elastic search - tagging strength (nested/child document boosting). However, if you are developing a new system, it might be prudent to upgrade to 0.90.0.Beta1, which supports sorting on nested fields.

Resources