Search all unique terms from a given query in elastic search

Search all unique terms from a given query in elastic search - elasticsearch

I am trying to search for all the unique names in the index test_nested.
GET test_nested/_mappings
{
"test_nested": {
"mappings": {
"my_type": {
"properties": {
"group": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
GET test_nested/_search
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "test_nested",
"_type": "my_type",
"_id": "AWG5iVBz4bQsVnslc9gL",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "Linux"
},
{
"name": "Android (operating system)"
},
{
"name": "Widows 10"
}
]
}
},
{
"_index": "test_nested",
"_type": "my_type",
"_id": "AWG5ieKW4bQsVnslc9gM",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "Bitcoin"
},
{
"name": "PHP"
},
{
"name": "Microsoft Windows"
}
]
}
},
{
"_index": "test_nested",
"_type": "my_type",
"_id": "AWG5irrV4bQsVnslc9gN",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "Windows XP"
}
]
}
},
{
"_index": "test_nested",
"_type": "my_type",
"_id": "1",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "iOS"
},
{
"name": "Android (operating system)"
},
{
"name": "Widows 10"
},
{
"name": "Widows XP"
}
]
}
}
]
}
}
I want all the unique names for a term. i.e. if I search for "wi"* then I should get [Microsoft Windows, Widows 10, Windows XP]

I don't know exactly what you mean but I use that query to list all statuses:
GET order/default/_search
{
"size": 0,
"aggs": {
"status_terms": {
"terms": {
"field": "status.keyword",
"missing": "N/A",
"min_doc_count": 0,
"order": {
"_key": "asc"
}
}
}
}
}
My model has status field and that query lists all statuses.
This is bucket aggregations
One of fields in result is:
sum_other_doc_count - Elastic returns the top unique terms. So if you have many different terms then some of them will not appear in the results. This field is a sum of documents which will not be a part of the response.
For nested objects try to read and use Nested Query docs

I found the solution. Hope it helps someone.
GET record_new/_search
{
"size": 0,
"query": {
"term": {
"software_tags": {
"value": "windows"
}
}
},
"aggs": {
"software_tags": {
"terms": {
"field": "software_tags.keyword",
"include" : ".*Windows.*",
"size": 10000,
"order": {
"_count": "desc"
}
}
}
}
}

Related

Elasticsearch - Is it possible to collapse first then aggregate data of a nested field?

I am using Elasticsearch and I want to group our results by a specific field, returning top n documents per group. The document have a nested filed and I want to aggregate all the documents' nested field for each group.
Example
I have 5 documents and each have a groupId and also a nested field peoples. I want group these documents by the groupId. And then for each group, I want to get top 2 people(some documents may contain same people).
PUT test/_mapping
{
"properties": {
"groupId":{
"type":"keyword"
},
"id":{
"type":"keyword"
},
"name":{
"type":"text"
},
"people":{
"type":"nested",
"properties":{
"email":{
"type":"keyword"
}
}
}
}
}
PUT test/_doc/1
{
"name": "docs1",
"groupId": "1",
"people":[{
"email":"people1#test.com"
}]
}
PUT test/_doc/2
{
"name": "docs2",
"groupId": "1",
"people":[{
"email":"people2.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/3
{
"name": "docs3",
"groupId": "2",
"people":[{
"email":"people3.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/4
{
"name": "docs4",
"groupId": "1",
"people":[{
"email":"people4.1#test.com"
},
{
"email":"people4.2#test.com"
}]
}
PUT test/_doc/5
{
"name": "docs5",
"groupId": "3",
"people":[{
"email":"people5.1#test.com"
},
{
"email":"people5.2#test.com"
}]
}
Search query
GET test/_search
{
"collapse": {
"field": "groupId",
"inner_hits": {
"name":"inner",
"size": 2
}
},
"sort": [
{
"groupId": {
"order": "asc"
}
}
],
"size": 2,
"from": 0
}
Result
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": null,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
},
"fields": {
"groupId": [
"1"
]
},
"sort": [
"1"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": 0,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
}
},
{
"_index": "test",
"_id": "2",
"_score": 0,
"_source": {
"name": "docs2",
"groupId": "1",
"people": [
{
"email": "people2.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
},
{
"_index": "test",
"_id": "3",
"_score": null,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
},
"fields": {
"groupId": [
"2"
]
},
"sort": [
"2"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "3",
"_score": 0,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
}
]
}
}
Expecting is to aggregate a groupPeople field for each group and it contains top n people of that group(should not affected by the inner_hit size, like for groupId=1, it contains 3 documents and 5 people).

The query that you're looking for is this one:
POST test/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId",
"size": 10
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}
If you need pagination, you can achieve the same using the composite aggregation:
POST test/_search
{
"size": 0,
"aggs": {
"pages": {
"composite": {
"sources": [
{
"groups": {
"terms": {
"field": "groupId"
}
}
}
]
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}

Querying array with nested objects in Elasticsearch to get multiple objects

I have data in Elasticsearch in the below format -
"segments": [
{"id": "ABC", "value":123},
{"id": "PQR", "value":345},
{"id": "DEF", "value":567},
{"id": "XYZ", "value":789},
]
I want to retrieve all segments where id is "ABC" or "DEF".
I looked up the docs (https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-nested-query.html) and few examples on YouTube but the all look to retrieve only a single object while I want to retrieve more than 1.
Is there a way to do this?

You can use nested query with inner hits as shown here.
I hope your index mapping is looks like below and segments field is define as nested
"mappings": {
"properties": {
"segments": {
"type": "nested",
"properties": {
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "long"
}
}
}
}
}
You can use below Query:
{
"_source" : false,
"query": {
"nested": {
"path": "segments",
"query": {
"terms": {
"segments.id.keyword": [
"ABC",
"DEF"
]
}
},
"inner_hits": {}
}
}
}
Response:
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_score": 1,
"inner_hits": {
"segments": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 0
},
"_score": 1,
"_source": {
"id": "ABC",
"value": 123
}
},
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 2
},
"_score": 1,
"_source": {
"id": "DEF",
"value": 567
}
}
]
}
}
}
}
]
}

Elasticsearch template to support case insensitive searches

I've setup a normalizer on an index field to support case insensitive searches, cant seem to get it to work.
GET users/
Returns the following mapping:
{
"users": {
"aliases": {},
"mappings": {
"user": {
"properties": {
"active": {
"type": "boolean"
},
"first_name": {
"type": "keyword",
"fields": {
"normalize": {
"type": "keyword",
"normalizer": "search_normalizer"
}
}
},
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "users",
"creation_date": "1567936315432",
"analysis": {
"normalizer": {
"search_normalizer": {
"filter": [
"lowercase"
],
"type": "custom"
}
}
},
"number_of_replicas": "1",
"uuid": "5SknFdwJTpmF",
"version": {
"created": "6040299"
}
}
}
}
}
Although first_name is normalized to lowercase, queries on the first_name field are case sensitive.
Using the following query for a user with first name Dave
GET users/_search
{
"query": {
"bool": {
"should": [
{
"regexp": {
"first_name": {
"value": ".*dave.*"
}
}
}
]
}
}
}
GET users/_analyze
{
"analyzer" : "standard",
"text": "Dave"
}
returns
{
"tokens": [
{
"token": "dave",
"start_offset": 0,
"end_offset": 4,
"type": "<ALPHANUM>",
"position": 0
}
]
}
Although "Dave" is tokenized to "dave" the following query
GET users/_search
{
"query": {
"match": {
"first_name": "dave"
}
}
}
Returns no hits.
Is there an issue with my current mapping? or the query?

I think you have missed first_name.normalize in query
Indexing Records
{"first_name": "Daveraj"}
{"index": {}}
{"first_name": "RajdaveN"}
{"index": {}}
{"first_name": "Dave"}
Query
"query": {
"bool": {
"should": [
{
"regexp": {
"first_name.normalize": {
"value": ".*dave.*"
}
}
}
]
}
}
}
Result
"took": 10,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1.0,
"hits": [
{
"_index": "test3",
"_type": "test3_type",
"_id": "M8-lEG0BLCpzI1hbBWYC",
"_score": 1.0,
"_source": {
"first_name": "Dave"
}
},
{
"_index": "test3",
"_type": "test3_type",
"_id": "Mc-lEG0BLCpzI1hbBWYC",
"_score": 1.0,
"_source": {
"first_name": "Daveraj"
}
},
{
"_index": "test3",
"_type": "test3_type",
"_id": "Ms-lEG0BLCpzI1hbBWYC",
"_score": 1.0,
"_source": {
"first_name": "RajdaveN"
}
}
]
}
}```

You have created a normalized multi-field: first_name.normalize , but you are searching on the original field first_name which doesn't have any analyzer specified (will default to index-default analyzer or standard).
The examples given here might help:
https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-fields.html
You need to explicitly specify the multi-field you want to search on, note even though a multi-field cant have its own content, it indexes different terms as opposed to its parent (although not always) as a result of possibly being analyzed using diff analyzers/char/token filters.

Elasticsearch returns documents with a query must_not exists

Elasticsearch: 6.5.4
Issue: I'm executing a bool query (sample to follow) where I'm checking for the existence of a specific field. The issue is, I'm getting results back where the field does exist but has an empty array.
My question is, how do I properly execute a query and only get results where nlp is not added to the document at all.
Sample query:
{
"size": 100,
"sort": [{
"publishedAt": {
"order": "asc"
}
}],
"_source": {
"includes": ["nlp"]
},
"query": {
"bool": {
"must_not": {
"exists": {
"field": "nlp.categories.gcp"
}
}
}
}
}
Sample Mapping:
(This was automatically created by Elastic Search, with the exception of the null_value, I tried adding that).
{
"mapping": {
"article": {
"properties": {
"nlp": {
"properties": {
"categories": {
"properties": {
"gcp": {
"properties": {
"confidence": {
"type": "float"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"null_value": "[]",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
}
}
}
Sample Result:
{
"took": 68,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1126581,
"max_score": null,
"hits": [
{
"_index": "news",
"_type": "article",
"_id": "UTuVmmsBE1H01hY9Rn6i",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": []
}
}
},
"sort": [
1509940860000
]
},
{
"_index": "news",
"_type": "article",
"_id": "2w6PmmsBIpi-jAhhO13F",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": []
}
}
},
"sort": [
1510027260000
]
}
]
}
}
When the nlp.categories.gcp has values in it, a typical response would look like this.
{
"took": 26,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 475690,
"max_score": null,
"hits": [
{
"_index": "news",
"_type": "article",
"_id": "6Q6JmmsBIpi-jAhhAlcm",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": [
{
"confidence": 0.8999999761581421,
"name": "/Travel/Hotels & Accommodations"
}
]
}
}
},
"sort": [
1510215565000
]
},
{
"_index": "news",
"_type": "article",
"_id": "rzunmmsBE1H01hY9sLyE",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": [
{
"confidence": 0.9399999976158142,
"name": "/Travel/Hotels & Accommodations"
}
]
}
}
},
"sort": [
1510228881000
]
}
]
}
}

How to highlight nested fields in Elasticsearch

Although the Lucene logic structure, I'm trying to make my nested fields to be highlighted when some search result is present in their content.
Here is the explanation from Elasticsearch documentation (mapping nested type`)
Internal Implementation
Internally, nested objects are indexed as additional documents, but, since they can be guaranteed to be indexed within the same "block", it allows for extremely fast joining with parent docs.
Those internal nested documents are automatically masked away when doing operations against the index (like searching with a match_all query), and they bubble out when using the nested query.
Because nested docs are always masked to the parent doc, the nested docs can never be accessed outside the scope of the nested query. For example stored fields can be enabled on fields inside nested objects, but there is no way of retrieving them, since stored fields are fetched outside of the nested query scope.
0. In my case
I have an Elasticsearch index containing a mapping like the following:
{
"my_documents": {
"dynamic_date_formats": [
"dd.MM.yyyy",
"yyyy-MM-dd",
"yyyy-MM-dd HH:mm:ss"
],
"index_analyzer": "Analyzer2_index",
"search_analyzer": "Analyzer2_search_decompound",
"_timestamp": {
"enabled": true
},
"properties": {
"identifier": {
"type": "string"
},
"description": {
"type": "multi_field",
"fields": {
"sort": {
"type": "string",
"index": "not_analyzed"
},
"description": {
"type": "string"
}
}
},
"files": {
"type": "nested",
"include_in_root": true,
"properties": {
"content": {
"type": "string",
"include_in_root": true
}
}
},
"and then some other": "normal string fields"
}
}
}
I'm trying to execute a query like this:
{
"size": 100,
"query": {
"bool": {
"should": [
{
"nested": {
"path": "files",
"query": {
"bool": {
"should": {
"match": {
"content": {
"query": "burpcontrol",
"minimum_should_match": "85%"
}
}
}
}
}
}
},
{
"match": {
"description": {
"query": "burpcontrol",
"minimum_should_match": "85%"
}
}
},
{
"match": {
"identifier": {
"query": "burpcontrol",
"minimum_should_match": "85%"
}
}
} ]
}
},
"highlight": {
"pre_tags": [
"<span style=\"background-color: yellow\">"
],
"post_tags": [
"</span>"
],
"order": "score",
"no_match_size": 100,
"fragment_size": 50,
"number_of_fragments": 3,
"require_field_match": true,
"fields": {
"files.content": {},
"description": {},
"identifier": {}
}
}
}
The problem I have are:
1. require_field_match
If I use "require_field_match": false I obtain that, even if highlighting doesn't work on nested fields, the search term is highlighted anyway in ALL the fields.
This is the solution I'm actually using, but the performances are horrible. For 50 documents my query needs 25secs. 100 documents about 50secs. 10 documents 5secs.
And if I remove the nested field from the highlighting everything works fast as light!
2 .include_in_root
I would like to have a flattened version of my nested fields (so to store them as normal objects/fields.
To do this I should specify
"files": { "type": "nested", "include_in_root": true, ...
but I don't know why, after reindexing, I cannot see any additional flattened field in the document root (while I was expecting something like "files.content":["content1", "content2", "..."]).
If it would work it would be instead possible to access (in the flattened field) the content of the nested field, and perform the highlighting on it.
Do you know if is it possible to achieve a good (and performant) highlighting on nested fields or, at least, suggest me why my query is so slow? (I already optimised the fragments)

There are a number of things you can do here, with a parent/child relationship. I'll go over a few, and hopefully that will lead you in the right direction; it will still take lots of testing to figure out whether this solution is going to be more performant for you. Also, I left out a few of the details of your setup, for clarity. Please forgive the long post.
I set up a parent/child mapping as follows:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"parent_doc": {
"properties": {
"identifier": {
"type": "string"
},
"description": {
"type": "string"
}
}
},
"child_doc": {
"_parent": {
"type": "parent_doc"
},
"properties": {
"content": {
"type": "string"
}
}
}
}
}
Then added some test docs:
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"parent_doc","_id":1}}
{"identifier": "first", "description":"some special text"}
{"index":{"_index":"test_index","_type":"child_doc","_parent":1}}
{"content":"text that is special"}
{"index":{"_index":"test_index","_type":"child_doc","_parent":1}}
{"content":"text that is not"}
{"index":{"_index":"test_index","_type":"parent_doc","_id":2}}
{"identifier": "second", "description":"some different text"}
{"index":{"_index":"test_index","_type":"child_doc","_parent":2}}
{"content":"different child text, but special"}
{"index":{"_index":"test_index","_type":"parent_doc","_id":3}}
{"identifier": "third", "description":"we don't want this parent"}
{"index":{"_index":"test_index","_type":"child_doc","_parent":3}}
{"content":"or this child"}
If I'm understanding your specs correctly, we would want a query for "special" to return every one of these documents except the last two (correct me if I'm wrong). We want docs that match the text, have a child that matches the text, or have a parent that matches the text.
We can get back parents that match the query like this:
POST /test_index/parent_doc/_search
{
"query": {
"match": {
"description": "special"
}
},
"highlight": {
"fields": {
"description": {},
"identifier": {}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.1263815,
"hits": [
{
"_index": "test_index",
"_type": "parent_doc",
"_id": "1",
"_score": 1.1263815,
"_source": {
"identifier": "first",
"description": "some special text"
},
"highlight": {
"description": [
"some <em>special</em> text"
]
}
}
]
}
}
And we can get back children that match the query like this:
POST /test_index/child_doc/_search
{
"query": {
"match": {
"content": "special"
}
},
"highlight": {
"fields": {
"content": {}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.92364895,
"hits": [
{
"_index": "test_index",
"_type": "child_doc",
"_id": "geUFenxITZSL7epvB568uA",
"_score": 0.92364895,
"_source": {
"content": "text that is special"
},
"highlight": {
"content": [
"text that is <em>special</em>"
]
}
},
{
"_index": "test_index",
"_type": "child_doc",
"_id": "IMHXhM3VRsCLGkshx52uAQ",
"_score": 0.80819285,
"_source": {
"content": "different child text, but special"
},
"highlight": {
"content": [
"different child text, but <em>special</em>"
]
}
}
]
}
}
We can get back parents that match the text and children that match the text like this:
POST /test_index/parent_doc,child_doc/_search
{
"query": {
"multi_match": {
"query": "special",
"fields": ["description", "content"]
}
},
"highlight": {
"fields": {
"description": {},
"identifier": {},
"content": {}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1.1263815,
"hits": [
{
"_index": "test_index",
"_type": "parent_doc",
"_id": "1",
"_score": 1.1263815,
"_source": {
"identifier": "first",
"description": "some special text"
},
"highlight": {
"description": [
"some <em>special</em> text"
]
}
},
{
"_index": "test_index",
"_type": "child_doc",
"_id": "geUFenxITZSL7epvB568uA",
"_score": 0.75740534,
"_source": {
"content": "text that is special"
},
"highlight": {
"content": [
"text that is <em>special</em>"
]
}
},
{
"_index": "test_index",
"_type": "child_doc",
"_id": "IMHXhM3VRsCLGkshx52uAQ",
"_score": 0.6627297,
"_source": {
"content": "different child text, but special"
},
"highlight": {
"content": [
"different child text, but <em>special</em>"
]
}
}
]
}
}
However, to get back all the docs related to this query, we need to use a bool query:
POST /test_index/parent_doc,child_doc/_search
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "special",
"fields": [
"description",
"content"
]
}
},
{
"has_child": {
"type": "child_doc",
"query": {
"match": {
"content": "special"
}
}
}
},
{
"has_parent": {
"type": "parent_doc",
"query": {
"match": {
"description": "special"
}
}
}
}
]
}
},
"highlight": {
"fields": {
"description": {},
"identifier": {},
"content": {}
}
},
"fields": ["_parent", "_source"]
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0.8866254,
"hits": [
{
"_index": "test_index",
"_type": "parent_doc",
"_id": "1",
"_score": 0.8866254,
"_source": {
"identifier": "first",
"description": "some special text"
},
"highlight": {
"description": [
"some <em>special</em> text"
]
}
},
{
"_index": "test_index",
"_type": "child_doc",
"_id": "geUFenxITZSL7epvB568uA",
"_score": 0.67829096,
"_source": {
"content": "text that is special"
},
"fields": {
"_parent": "1"
},
"highlight": {
"content": [
"text that is <em>special</em>"
]
}
},
{
"_index": "test_index",
"_type": "child_doc",
"_id": "IMHXhM3VRsCLGkshx52uAQ",
"_score": 0.18709806,
"_source": {
"content": "different child text, but special"
},
"fields": {
"_parent": "2"
},
"highlight": {
"content": [
"different child text, but <em>special</em>"
]
}
},
{
"_index": "test_index",
"_type": "child_doc",
"_id": "NiwsP2VEQBKjqu1M4AdjCg",
"_score": 0.12531912,
"_source": {
"content": "text that is not"
},
"fields": {
"_parent": "1"
}
},
{
"_index": "test_index",
"_type": "parent_doc",
"_id": "2",
"_score": 0.12531912,
"_source": {
"identifier": "second",
"description": "some different text"
}
}
]
}
}
(I included the "_parent" field to make it easier to see why docs were included in the results, as shown here).
Let me know if this helps.
Here is the code I used:
http://sense.qbox.io/gist/d69a4d6531dc063faa4b4e094cff2a472a73c5a6

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Search all unique terms from a given query in elastic search - elasticsearch

I found the solution. Hope it helps someone. GET record_new/_search { "size": 0, "query": { "term": { "software_tags": { "value": "windows" } } }, "aggs": { "software_tags": { "terms": { "field": "software_tags.keyword", "include" : ".Windows.", "size": 10000, "order": { "_count": "desc" } } } } }

Related

Elasticsearch - Is it possible to collapse first then aggregate data of a nested field?

Querying array with nested objects in Elasticsearch to get multiple objects

Elasticsearch template to support case insensitive searches

Elasticsearch returns documents with a query must_not exists

How to highlight nested fields in Elasticsearch

Categories

Resources