Discard all result of same user if one match in result set - elasticsearch

My requirement is if user has not performed e2 then that user will not come in result. For example here user_id 15371082 has performed e2 ,e3 but because user has performed e2 so both record of user(e2,e3) for userid 15371082 will discard from result
{
"id": 1,
"name": "a",
"user_id": 15371080,
"event" : 'e1'
},
{
"id": 1,
"name": "a",
"user_id": 15371082,
"event" : 'e2'
},
{
"id": 1,
"name": "a",
"user_id": 15371081,
"event" : 'e3'
},
{
"id": 1,
"name": "a",
"user_id": 15371082,
"event" : 'e3'
}
Expected result
{
"id": 1,
"name": "a",
"user_id": 15371080,
"event" : 'e1'
},
{
"id": 1,
"name": "a",
"user_id": 15371081,
"event" : 'e3'
}
My result should like above

Right way would be to create unique document for each user_id and add event as nested document.
Mapping:
PUT eventindex/_mappings
{
"properties": {
"name":{
"type": "text"
},
"user_id":{
"type": "integer"
},
"event":{
"type": "nested",
"properties": {
"name":{
"type":"text"
}
}
}
}
}
Data:
[
{
"_index" : "eventindex",
"_type" : "_doc",
"_id" : "tT1IL20Bcyz1xvxninMr",
"_score" : 1.0,
"_source" : {
"name" : "b",
"user_id" : 2,
"event" : [
{
"name" : "e1"
},
{
"name" : "e3"
}
]
}
},
{
"_index" : "eventindex",
"_type" : "_doc",
"_id" : "tj1ML20Bcyz1xvxngXNn",
"_score" : 1.0,
"_source" : {
"name" : "a",
"user_id" : 1,
"event" : [
{
"name" : "e2"
},
{
"name" : "e3"
}
]
}
}
]
````
Query:
````
GET eventindex/_search
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "event",
"query": {
"term": {
"event.name": {
"value": "e2"
}
}
}
}
}
]
}
}
}
````
Result:
````
[
{
"_index" : "eventindex",
"_type" : "_doc",
"_id" : "tT1IL20Bcyz1xvxninMr",
"_score" : 0.0,
"_source" : {
"name" : "b",
"user_id" : 2,
"event" : [
{
"name" : "e1"
},
{
"name" : "e3"
}
]
}
}
]
````

Related

Elasticsearch: sorted nested array

Is it possible to configure the mapping of an index, or the discover view of this in index in a way that an array inside the documents is / will be sorted?
Background: I have a es index with documents containing an array:
This array is updated from time to time with new entries (objects containing a timestamp), and I would like this arrays to be sorted according to the timestamp inside the objects.
If your field is define as nested type then you can use inner_hits to sort the array of object. it will return the sorted object array inside inner_hits for each document.
You can define field as nested like below:
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"openTimes": {
"type": "nested",
"properties": {
"date": {
"type": "date"
},
"name": {
"type": "keyword"
}
}
}
}
}
}
Let consider below is your sample data:
{"index": { } }
{ "name": "second on 6th (3rd on the 5th)", "openTimes": [ { "date": "2018-12-05T12:00:00" ,"name":"abc"}, { "date": "2018-12-06T11:00:00","name":"xyz" }] }
{"index": { } }
{ "name": "third on 6th (1st on the 5th)", "openTimes": [ {"date": "2018-12-05T10:00:00","name":"abc"}, { "date": "2018-12-06T12:00:00","name":"xyz" }] }
{"index": { } }
{ "name": "first on the 6th (2nd on the 5th)", "openTimes": [ {"date": "2018-12-05T11:00:00","name":"abc" }, { "date": "2018-12-06T10:00:00","name":"xyz" }] }
Below is Query:
{
"query": {
"nested": {
"path": "openTimes",
"query": {
"match_all": {}
},
"inner_hits": {
"sort": {
"openTimes.date": "desc"
}
}
}
}
}
Sample Response:
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_score" : 1.0,
"_source" : {
"name" : "second on 6th (3rd on the 5th)",
"openTimes" : [
{
"date" : "2018-12-05T12:00:00",
"name" : "abc"
},
{
"date" : "2018-12-06T11:00:00",
"name" : "xyz"
}
]
},
"inner_hits" : {
"openTimes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_nested" : {
"field" : "openTimes",
"offset" : 1
},
"_score" : null,
"_source" : {
"date" : "2018-12-06T11:00:00",
"name" : "xyz"
},
"sort" : [
1544094000000
]
},
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_nested" : {
"field" : "openTimes",
"offset" : 0
},
"_score" : null,
"_source" : {
"date" : "2018-12-05T12:00:00",
"name" : "abc"
},
"sort" : [
1544011200000
]
}
]
}
}
}
}

How remove one element from list type field from ElasticSearch

How to remove one element from the list type field from ElasticSearch.
There are some of my data and I want to remove some elements from a list type field.
// The data of user list with book field
[
{
"userId": "1",
"books": [
{
"id": "1",
"name": "book1"
},
{
"id": "2",
"name": "book2"
}
]
},
{
"userId": "2",
"books": [
{
"id": "2",
"name": "book2"
},
{
"id": "3",
"name": "book3"
}
]
},
{
"userId": "13",
"books": [
{
"id": "2",
"name": "book2"
},
{
"id": "5",
"name": "book5"
}
]
}
]
What I want to do:
find the users whose userId in [1, 2, 3], and delete the book with id 2 from their books list.
The result expected:
[
{
"userId": "1",
"books": [
{
"id": "1",
"name": "book1"
}
]
},
{
"userId": "2",
"books": [
{
"id": "3",
"name": "book3"
}
]
},
{
"userId": "13",
"books": [
{
"id": "2",
"name": "book2"
},
{
"id": "5",
"name": "book5"
}
]
}
]
I'm newer of ElasticSearch, it's a very difficult problem for me. It will be better if you can provide a request command of the Curl command.
You can use below update_by_query query with painless script.
POST book/_update_by_query
{
"query": {
"match_all": {}
},
"script": {
"source": """
for (int i = 0; i < ctx._source.booklist.length; ++i)
{
if(params.userid.contains(ctx._source.booklist[i]['userId']))
{
for (int j = 0; j < ctx._source.booklist[i]['books'].length; ++j)
{
if(ctx._source.booklist[i]['books'][j]["id"]=="2"){
ctx._source.booklist[i]['books'].remove(j);
}
}
}
}""",
"lang": "painless",
"params": {
"userid":["1","2","3"]
}
}
}
Below is document before executing above query:
{
"_index" : "book",
"_type" : "_doc",
"_id" : "aA1Vd34BATX2P1U_9ugg",
"_score" : 1.0,
"_source" : {
"booklist" : [
{
"userId" : "1",
"books" : [
{
"id" : "1",
"name" : "book1"
},
{
"id" : "2",
"name" : "book2"
}
]
},
{
"userId" : "2",
"books" : [
{
"id" : "2",
"name" : "book2"
},
{
"id" : "3",
"name" : "book3"
}
]
},
{
"userId" : "13",
"books" : [
{
"id" : "2",
"name" : "book2"
},
{
"id" : "5",
"name" : "book5"
}
]
}
]
}
}
Below is document after executing above query:
{
"_index" : "book",
"_type" : "_doc",
"_id" : "aA1Vd34BATX2P1U_9ugg",
"_score" : 1.0,
"_source" : {
"booklist" : [
{
"books" : [
{
"name" : "book1",
"id" : "1"
}
],
"userId" : "1"
},
{
"books" : [
{
"name" : "book3",
"id" : "3"
}
],
"userId" : "2"
},
{
"books" : [
{
"name" : "book2",
"id" : "2"
},
{
"name" : "book5",
"id" : "5"
}
],
"userId" : "13"
}
]
}
}

Filter document on items in an array ElasticSearch using condition AND

I have data:
[
{
"NAME": "John Doe",
"CLASS":[1,10,30]
},
{
"NAME": "Albert",
"CLASS": [1,10,40]
},
{
"NAME": "XINN",
"CLASS": [10,30]
},
{
"NAME": "UJANG",
"CLASS": [1,40]
},
{
"NAME": "BAMBANG",
"CLASS": [30,40]
}
]
I have the following query DSL:
{
query: {
terms: {
class: [1,10]
}
}
}
and I want what will appear is:
[{"NAME": "John Doe","CLASS":[1,10,30]},{"NAME": "Albert","CLASS": [1,10,40]}]
How do I change my search to match the result?
You need to combine multiple term queries in must clause.
Query
{
"query": {
"bool": {
"must": [
{
"term": {
"CLASS": {
"value": 1
}
}
},
{
"term": {
"CLASS": {
"value": 10
}
}
}
]
}
}
}
Result
"hits" : [
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "2EdK6XsBP61bDf9bI3R1",
"_score" : 2.0,
"_source" : {
"NAME" : "John Doe",
"CLASS" : [
1,
10,
30
]
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "2UdK6XsBP61bDf9bMHT5",
"_score" : 2.0,
"_source" : {
"NAME" : "Albert",
"CLASS" : [
1,
10,
40
]
}
}
]

How to build simple terms query for nested object?

I have index like this:
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "text"
}
},
"type": "nested"
},
"experience": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "text"
}
},
"type": "nested"
}
}
}
}
I insert this object:
POST job_offers/_doc
{
"title": "Junior Ruby on Rails Developer",
"location": [
{
"slug": "new-york",
"name": "New York"
},
{
"slug": "atlanta",
"name": "Atlanta"
},
{
"slug": "remote",
"name": "Remote"
}
],
"experience": [
{
"slug": "junior",
"name": "Junior"
}
]
}
This query returns 0 documents.
GET job_offers/_search
{
"query": {
"terms": {
"location.slug": [
"remote",
"new-york"
]
}
}
}
Can you explain me why? I thought it should return documents where location.slug is remote or new-york.
Nested- Query have a different syntax
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": ["remote","new-york"]
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_score" : 1.0,
"_source" : {
"title" : "Junior Ruby on Rails Developer",
"location" : [
{
"slug" : "new-york",
"name" : "New York"
},
{
"slug" : "atlanta",
"name" : "Atlanta"
},
{
"slug" : "remote",
"name" : "Remote"
}
],
"experience" : [
{
"slug" : "junior",
"name" : "Junior"
}
]
}
}
]
It will return entire document where location.slug matches "remote" or "new-york". If you want to get matched nested document , you need to use inner_hits
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": ["remote","new-york"]
}
},
"inner_hits": {} --> note
}
}
}
Result:
"hits" : [
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_score" : 1.0,
"_source" : {
"title" : "Junior Ruby on Rails Developer",
"location" : [
{
"slug" : "new-york",
"name" : "New York"
},
{
"slug" : "atlanta",
"name" : "Atlanta"
},
{
"slug" : "remote",
"name" : "Remote"
}
],
"experience" : [
{
"slug" : "junior",
"name" : "Junior"
}
]
},
"inner_hits" : { --> will give matched nested object
"location" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_nested" : {
"field" : "location",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"slug" : "new-york",
"name" : "New York"
}
},
{
"_index" : "job_offers",
"_type" : "_doc",
"_id" : "wWjoXnEBs0rCGpYsvUf4",
"_nested" : {
"field" : "location",
"offset" : 2
},
"_score" : 1.0,
"_source" : {
"slug" : "remote",
"name" : "Remote"
}
}
]
}
}
}
}
]
Also I see that you are using two fields for same data with different types. if data is same in both fields(name and slug) and only data type is different, you can use fields for that
It is often useful to index the same field in different ways for
different purposes. This is the purpose of multi-fields. For instance,
a string field could be mapped as a text field for full-text search,
and as a keyword field for sorting or aggregations:
In that case your mapping will become below
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
},
"type": "nested"
},
"experience": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
},
"type": "nested"
}
}
}
}

How to search in nested object's array matching all items

I have an index with nested objects houses.
My index contains these documents:
{
"_id": "hello",
"name": "pippos",
"houses": [
{
"address": "garden square",
"id1": 1,
"id2": 5
},
{
"address": "top square",
"id1": 1,
"id2": 5
}
]
},
{
"_id": "hellone",
"name": "pippoone",
"houses": [
{
"address": "central square",
"id1": 1,
"id2": 9
},
{
"address": "minimale square",
"id1": 1,
"id2": 5
}
]
}
Using this query I receive both documents:
GET /pippis/_search
{
"query": {
"nested": {
"path": "houses",
"query": {
"bool": {
"must": [
{ "match": { "houses.id1": 1 }},
{ "match": { "houses.id2": 5 }}
]
}
}
}
}
}
I want only documents having all houses with id1=1 and id2=5
Mapping:
PUT /user
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"houses": {
"type": "nested",
"properties": {
"address": {
"type": "text"
},
"id1": {
"type": "integer"
},
"id2": {
"type": "integer"
}
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "user",
"_type" : "_doc",
"_id" : "5kQ6-2wBWSK8eKKSSozQ",
"_score" : 1.0,
"_source" : {
"name" : "pippos",
"houses" : [
{
"address" : "garden square",
"id1" : 1,
"id2" : 5
},
{
"address" : "top square",
"id1" : 1,
"id2" : 5
}
]
}
},
{
"_index" : "user",
"_type" : "_doc",
"_id" : "50Q9-2wBWSK8eKKStIzf",
"_score" : 1.0,
"_source" : {
"name" : "pippoone",
"houses" : [
{
"address" : "central square",
"id1" : 1,
"id2" : 9
},
{
"address" : "minimale square",
"id1" : 1,
"id2" : 5
}
]
}
},
{
"_index" : "user",
"_type" : "_doc",
"_id" : "6ERM-2wBWSK8eKKS3IzD",
"_score" : 1.0,
"_source" : {
"name" : "pippoone1",
"houses" : [
{
"address" : "central square",
"id1" : 2,
"id2" : 9
},
{
"address" : "minimale square",
"id1" : 2,
"id2" : 5
}
]
}
}
]
}
Query:
GET /user/_search
{
"query": {
"bool": {
"must_not": [ -----> Not of documents returned in nested query
{
"nested": {
"path": "houses",
"query": {
"bool": {
"should": [ -----> get documents where id1 is not 1 or id2 is not 5
{
"bool": {
"must_not": [
{
"match": {
"houses.id1": 1
}
}
]
}
},
{
"bool": {
"must_not": [
{
"match": {
"houses.id2": 5
}
}
]
}
}
]
}
}
}
}
]
}
}
}
Result:
[
{
"_index" : "user",
"_type" : "_doc",
"_id" : "5kQ6-2wBWSK8eKKSSozQ",
"_score" : 0.0,
"_source" : {
"name" : "pippos",
"houses" : [
{
"address" : "garden square",
"id1" : 1,
"id2" : 5
},
{
"address" : "top square",
"id1" : 1,
"id2" : 5
}
]
}
}
]

Resources