why data can't get by elasticsearch? - elasticsearch

Elastic search version 6.2.4
I made elastic search environment and made mapping like this.
{
"state":"open",
"settings":{
"index":{
"number_of_shards":"5",
"provided_name":"lara_cart",
"creation_date":"1529082175034",
"analysis":{
"filter":{
"engram":{
"type":"edgeNGram",
"min_gram":"1",
"max_gram":"36"
},
"maxlength":{
"type":"length",
"max":"36"
},
"word_delimiter":{
"split_on_numerics":"false",
"generate_word_parts":"true",
"preserve_original":"true",
"generate_number_parts":"true",
"catenate_all":"true",
"split_on_case_change":"true",
"type":"word_delimiter",
"catenate_numbers":"true"
}
},
"char_filter":{
"normalize":{
"mode":"compose",
"name":"nfkc",
"type":"icu_normalizer"
},
"whitespaces":{
"pattern":"\s[2,]",
"type":"pattern_replace",
"replacement":"\u0020"
}
},
"analyzer":{
"keyword_analyzer":{
"filter":[
"lowercase",
"trim",
"maxlength"
],
"char_filter":[
"normalize",
"whitespaces"
],
"type":"custom",
"tokenizer":"keyword"
},
"autocomplete_index_analyzer":{
"filter":[
"lowercase",
"trim",
"maxlength",
"engram"
],
"char_filter":[
"normalize",
"whitespaces"
],
"type":"custom",
"tokenizer":"keyword"
},
"autocomplete_search_analyzer":{
"filter":[
"lowercase",
"trim",
"maxlength"
],
"char_filter":[
"normalize",
"whitespaces"
],
"type":"custom",
"tokenizer":"keyword"
}
},
"tokenizer":{
"engram":{
"type":"edgeNGram",
"min_gram":"1",
"max_gram":"36"
}
}
},
"number_of_replicas":"1",
"uuid":"5xyW07F-RRCuIJlvBufNbA",
"version":{
"created":"6020499"
}
}
},
"mappings":{
"products":{
"properties":{
"sale_end_at":{
"format":"yyyy-MM-dd HH:mm:ss",
"type":"date"
},
"image_5":{
"type":"text"
},
"image_4":{
"type":"text"
},
"created_at":{
"format":"yyyy-MM-dd HH:mm:ss",
"type":"date"
},
"description":{
"analyzer":"keyword_analyzer",
"type":"text",
"fields":{
"autocomplete":{
"search_analyzer":"autocomplete_search_analyzer",
"analyzer":"autocomplete_index_analyzer",
"type":"text"
}
}
},
"sale_start_at":{
"format":"yyyy-MM-dd HH:mm:ss",
"type":"date"
},
"sale_price":{
"type":"integer"
},
"category_id":{
"type":"integer"
},
"updated_at":{
"format":"yyyy-MM-dd HH:mm:ss",
"type":"date"
},
"price":{
"type":"integer"
},
"image_1":{
"type":"text"
},
"name":{
"analyzer":"keyword_analyzer",
"type":"text",
"fields":{
"autocomplete":{
"search_analyzer":"autocomplete_search_analyzer",
"analyzer":"autocomplete_index_analyzer",
"type":"text"
},
"keyword":{
"analyzer":"keyword_analyzer",
"type":"text"
}
}
},
"image_3":{
"type":"text"
},
"categories":{
"type":"nested",
"properties":{
"parent_category_id":{
"type":"integer"
},
"updated_at":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"name":{
"analyzer":"keyword_analyzer",
"type":"text",
"fields":{
"autocomplete":{
"search_analyzer":"autocomplete_search_analyzer",
"analyzer":"autocomplete_index_analyzer",
"type":"text"
}
}
},
"created_at":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"id":{
"type":"long"
}
}
},
"id":{
"type":"long"
},
"image_2":{
"type":"text"
},
"stock":{
"type":"integer"
}
}
}
},
"aliases":[
],
"primary_terms":{
"0":1,
"1":1,
"2":1,
"3":1,
"4":1
},
"in_sync_allocations":{
"0":[
"clYoJWUKTru2Z78h0OINwQ"
],
"1":[
"MGQC73KiQsuigTPg4SQG4g"
],
"2":[
"zW6v82gNRbe3wWKefLOAug"
],
"3":[
"5TKrfz7HRAatQsJudKX9-w"
],
"4":[
"gqiblStYSYy_NA6fYtkghQ"
]
}
}
I want to use suggest search by autocomplete filed.
So I added a document like this.
{
"_index":"lara_cart",
"_type":"products",
"_id":"19",
"_version":1,
"_score":1,
"_source":{
"id":19,
"name":"Conqueror, whose.",
"description":"I should think you'll feel it a bit, if you wouldn't mind,' said Alice: 'besides, that's not a regular rule: you invented it just missed her. Alice caught the flamingo and brought it back, the fight.",
"category_id":81,
"stock":79,
"price":11533,
"sale_price":15946,
"sale_start_at":null,
"sale_end_at":null,
"image_1":"https://lorempixel.com/640/480/?56260",
"image_2":"https://lorempixel.com/640/480/?15012",
"image_3":"https://lorempixel.com/640/480/?14138",
"image_4":"https://lorempixel.com/640/480/?94728",
"image_5":"https://lorempixel.com/640/480/?99832",
"created_at":"2018-06-01 16:12:41",
"updated_at":"2018-06-01 16:12:41",
"deleted_at":null,
"categories":{
"id":81,
"name":"A secret, kept.",
"parent_category_id":"33",
"created_at":"2018-06-01 16:12:41",
"updated_at":"2018-06-01 16:12:41",
"deleted_at":null
}
}
}
After that, I try to search by below query.
But, this query can't get anything.
Do you know how to resolve it?
I think to cause is mapping and setting cause.
{
"query":{
"bool":{
"must":[
{
"term":{
"name.autocomplete":"Conqueror"
}
}
],
"must_not":[
],
"should":[
]
}
},
"from":0,
"size":10,
"sort":[
],
"aggs":{
}
}

It's just because of the field that you are using is analyzed and "term" couldn't support the query
you can try "match" on the field which analyzer is autocomplete; may be some basic knowledge of autocomplete and n-grams will help you better understanding this problem.
e.g.
you defined the following analyzer:
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
}
}
After that you can test the autocomplete with following request:
GET /my_index/_analyze?analyzer=autocomplete
quick brown
as configured abrove, the autocomplete will generate n-grams for the input query with the edges from 1 ~ 20. And the return for the request is:
q
qu
qui
quic
quick
b
br
bro
brow
brown
As we all know that term query is a query that will search the field which exactly contains the query world, just like where condition of mysql.

Related

How to implement space ignored search across a document with multiple fields in Elasticsearch

I'm trying to implement a space agnostic product catalog search solution using Elasticsearch for a chemical oriented product database. The use case is as follows:
Consider a chemical with the name: Dimethyl Sulphoxide
Some manufacturers label it as Dimethylsulphoxide and some as Dimethyl Sulphoxide. So I could have two Item entries in my ES document as follows
Item 1: {"item_name":"Dimethyl Sulphoxide", "brand":"Merck"}
Item 2: {"item_name":"Dimethylsulphoxide","brand":"Spectrochem"}
Now ideally, If the user enters either string (i.e Dimethyl sulphoxide or Dimethylsulphoxide), I want both the documents to be displayed in the hits.
To achieve this I'm doing two things:
1)At index time, I'm currently running the item_name field through a custom analyzer that consists of the following flow:
Tokenizing with keyword, then filtering with lowercase, then filtering with word joiner(with catenate_all), then filtering with an edge_ngram filter.
So the string "Dimethyl Sulphoxide" becomes ("Dimethyl","Sulphoxide") then ("dimethyl","sulphoxide"), then ("dimethyl","sulphoxide","dimethylsulphoxide"), then ("d","di","dim",dime"....."dimethyl","s","su","sul"....,"sulphoxide","d","di"......,"dimethylsulphoxide")
I'm also running the other fields in the product document, such as the brand field with the same analyzer.
At query time, I'm running the query search string via a similar analyzer without the edge_ngram. So a query string of "Dimethyl Sul" will become ("Dimethyl","Sul") then ("dimethyl","sul") then ("dimethyl","sul","dimethylsul") by specifying a custom search analyzer for each field at index time.
Now I'm able to surface both the results when the user searches a string with or without space, but this approach is coming in the way of my other use cases.
Consider the second use case where the user should also be able to search for an item by the name + the brand and other fields, all in one search box. For example, A user could search for one of the items above by entering, "dimethyl spectrochem" or "sulphoxide merck".
To allow this, I have tried using a multi_match with type as cross fields query and a combined_fields query, both with the operator as AND. But this combination of word_joiner in query string with cross_fields/combined_fields is giving me undesired results for my second use case.
When a user enters, "dimethyl spectrochem", the query search analyzer generates three tokens ("dimethyl","spectrochem" and "dimethylspectrochem") and when these are passed to the cross_fields/combined_fields it essentially generates a query as :
+("item_name":"dimethyl","brand":"dimethyl")
+("item_name":"spectrochem","brand":"spectrochem")
+("item_name":"dimethylspectrochem","brand":"dimethylspectrochem")
Given the way how cross_field works , it looks for a document where all the three queryStrings are present in either field. Since it's unable to find "dimethylspectrochem" in a single field it returns zero results.
Is there a way I can satisfy both use cases?
The mapping that I have specified during index creation is below
curl -XPUT http://localhost:9200/test-item-summary-5 -H 'Content-Type: application/json' -d'
{
"settings":{
"analysis":{
"tokenizer":{
"whitespace":{
"type":"whitespace"
},
"keyword":{
"type":"keyword"
}
},
"filter":{
"lowercase":{
"type":"lowercase"
},
"shingle_word_joiner":{
"type":"shingle",
"token_separator":""
},
"word_joiner":{
"type":"word_delimiter_graph",
"catenate_all":true,
"split_on_numerics":false,
"stem_english_possessive":false
},
"edge_ngram_filter":{
"type":"edge_ngram",
"min_gram":1,
"max_gram":20,
"token_chars":[
"letter",
"digit"
]
}
},
"analyzer":{
"whitespaceWithEdgeNGram":{
"tokenizer":"keyword",
"filter":[
"lowercase",
"word_joiner",
"edge_ngram_filter"
]
},
"spaceIgnoredWithLowerCase":{
"tokenizer":"keyword",
"char_filter":[
"dash_char_filter"
],
"filter":[
"lowercase",
"word_joiner"
]
},
"shingleSearchAnalyzer":{
"tokenizer":"whitespace",
"char_filter":[
"dash_char_filter"
],
"filter":[
"lowercase",
"shingle_word_joiner"
]
},
"whitespaceWithLowerCase":{
"tokenizer":"whitespace",
"char_filter":[
"dash_char_filter"
],
"filter":[
"lowercase"
]
}
},
"char_filter":{
"dash_char_filter":{
"type":"mapping",
"mappings":[
"- => ",
", => ",
". => ",
"( => ",
") => ",
"? => ",
"! => ",
": => ",
"; => ",
"_ => ",
"% => ",
"& => ",
"+ => ",
"\" => ",
"\/ => ",
"\\[ => ",
"\\] => ",
"* => ",
"\u0027 => "
]
}
}
}
},
"mappings":{
"properties":{
"item_code":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"mfr_item_code":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"brand":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"name":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"short_name":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"alias":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"attrs":{
"type":"nested",
"properties":{
"name":{
"type":"text",
"index":"false"
},
"value":{
"type":"text",
"copy_to":"item:attrs:value",
"index":"false"
},
"primaryAttribute":{
"type":"boolean",
"index":"false"
}
}
},
"variant_summaries":{
"type":"nested",
"properties":{
"item_code":{
"type":"text",
"index":"false"
},
"variant_code":{
"type":"text",
"copy_to":"variant:variant_code",
"index":"false"
},
"mfr_item_code":{
"type":"text",
"index":"false"
},
"mfr_variant_code":{
"type":"text",
"copy_to":"variant:mfr_variant_code",
"index":"false"
},
"brand":{
"type":"text",
"index":"false"
},
"unit":{
"type":"text",
"copy_to":"variant:unit",
"index":"false"
},
"unit_mag":{
"type":"float",
"copy_to":"variant:unit",
"index":"false"
},
"primary_alternate_unit":{
"type":"nested",
"properties":{
"unit":{
"type":"text",
"copy_to":"variant:unit",
"index":"false"
},
"unit_mag":{
"type":"float",
"copy_to":"variant:unit",
"index":"false"
}
}
},
"attrs":{
"type":"nested",
"properties":{
"name":{
"type":"text",
"index":"false"
},
"value":{
"type":"text",
"copy_to":"variant:attrs:value",
"index":"false"
},
"primaryAttribute":{
"type":"boolean",
"index":"false"
}
}
},
"image":{
"type":"text",
"index":"false"
},
"in_stock":{
"type":"boolean",
"index":"false"
}
}
},
"added_by":{
"type":"text",
"index":"false"
},
"modified_by":{
"type":"text",
"index":"false"
},
"created_on":{
"type":"date",
"index":"false"
},
"updated_on":{
"type":"date",
"index":"false"
},
"is_deleted":{
"type":"boolean",
"index":"false"
},
"variant:variant_code":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"variant:mfr_variant_code":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"variant:attrs:value":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"variant:unit":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
},
"item:attrs:value":{
"type":"text",
"analyzer":"whitespaceWithEdgeNGram",
"search_analyzer":"shingleSearchAnalyzer"
}
}
}
}'
Any suggestions on implementing a space ignored search across multiple fields would be highly appreciated.

How to filter aggregation results in elasticsearch (v 6.3)

I have an array for field commodity line ex:[3,35,1,11,12],[3,12]. I am trying to query the field for autocomplete results and i need output as 3 and 35 when i match with 3. My indexing works fine for all the scenarios except when i am working with an array data type.
I will need to filter the aggregation results to give 3 and 35, which i am unable to retrieve.i need to use facet_filter or filter with prefix .Similar to facet.prefix in solr.
Let me know if i need to change the query or the mapping?
Query :
GET contracts/doc/_search
{
"size":0,
"query":{
"bool":{
"must":{
"match":{
"commodity_line.autocomplete":"3"
}
}
}
},
"aggs" : {
"names":{
"terms":{
"field":"commodity_line.keyword"
}
}
}
}
Mapping :
PUT contracts
{
"settings":{
"analysis":{
"filter":{
"gramFilter": {
"type": "edge_ngram",
"min_gram" : 1,
"max_gram" : 20,
"token_chars": [
"letter",
"symbol",
"digit"
]
}
},
"analyzer":{
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"trim",
"gramFilter",
"asciifolding"
]
}
}
}
}
,
"mappings":{
"doc":{
"properties":{
"commodity_line" :{
"type":"text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"autocomplete":{
"type":"text",
"analyzer":"autocomplete",
"search_analyzer":"standard"
}
}
}
}
}
}
}
I have found an solution,
I had to match it with a prefix rather than filtering the results.
"aggs" : {
"names":{
"terms":{
"field":"commodity_line.keyword",
"include" : "3.*"
}
}

ElasticSearch accented and no accented words management

I created an index :
PUT members
{
"settings":{
"number_of_shards":1,
"analysis":{
"analyzer":{
"accentedNames":{
"tokenizer":"standard",
"filter":[
"lowercase",
"asciifolding"
]
},
"standardNames":{
"tokenizer":"standard",
"filter":[
"lowercase"
]
}
}
}
},
"mappings":{
"member":{
"properties":{
"id":{
"type":"text"
},
"name":{
"type":"text",
"analyzer":"standardNames",
"fields":{
"accented":{
"type":"text",
"analyzer":"accentedNames"
}
}
}
}
}
}
}
Assume that some documents are in this set (EDIT):
{"1", "Maéllys Macron"};
{"2", "Maêllys Alix"};
{"3", "Maëllys Rosa"};
{"4", "Maèllys Alix"};
{"5", "Maellys du Bois"};
I wanted to have this result :
If I want to get documents named "Maéllys", I expect to get "Maéllys Richard" as the best match, and others with the same score.
What I did is to use my analyzers with a such request :
GET members/member/_search
{
"query":{
"multi_match" : {
"query" : "Maéllys",
"fields" : [ "name", "name.accented" ]
}
}
}
"Maéllys Richard" has the best score. The documents "Ma(ê|ë|é|è)llys Richard have the same score that is higher than "Maellys Richard" document.
Can someone help me ?
Thanks.

Elasticsearch nGram search query containing blanks

I have created the following index:
{
"settings":{
"number_of_shards":1,
"number_of_replicas":0,
"blocks":{
"read_only_allow_delete":false,
"read_only":false
},
"analysis":{
"filter":{
"autocomplete_filter":{
"type":"ngram",
"min_gram":3,
"max_gram":30
}
},
"analyzer":{
"autocomplete":{
"type":"custom",
"tokenizer":"standard",
"filter":[
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings":{
"movie":{
"properties":{
"title":{
"type":"text"
},
"actors":{
"type":"nested",
"include_in_all":true,
"properties":{
"name":{
"type":"text",
"analyzer":"autocomplete",
"search_analyzer": "standard"
},
"age":{
"type":"long",
"index":"false"
}
}
}
}
}
}
}
And I have inserted the following data via _bulk endpoint:
{"index":{"_index":"movies","_type":"movie","_id":1}}
{"title":"Ocean's 11", "actors":[{"name":"Brad Pitt","age":54}, {"name":"George Clooney","age":56}, {"name":"Julia Roberts","age":50}, {"name":"Andy Garcia","age":61}]}
{"index":{"_index":"movies","_type":"movie","_id":2}}
{"title":"Usual suspects", "actors":[{"name":"Kevin Spacey","age":58}, {"name":"Benicio del Toro","age":50}]}
{"index":{"_index":"movies","_type":"movie","_id":3}}
{"title":"Fight club", "actors":[{"name":"Brad Pitt","age":54}, {"name":"Edward Norton","age":48}, {"name":"Helena Bonham Carter","age":51}, {"name":"Jared Leto","age":46}]}
{"index":{"_index":"movies","_type":"movie","_id":24}
{"title":"Fight club", "actors":[{"name":"Brad Garrett","age":57}, {"name":"Ben Stiller","age":52}, {"name":"Robin Williams","age":63}]}
Now I want to search the index by actor name. For instance, when I search for brad I get all movies having an actor named brad, which is good.
But when I search for rad p I want only the movies with Brad Pitt, and not Brad Garrett, but I get Brad Garrett.
This is my search query :
{
"query":{
"nested":{
"path":"actors",
"query":{
"match":{
"actors.name":{
"query":"rad p",
"analyzer":"standard"
}
}
},
"inner_hits":{
}
}
}
}
Endpoint I am calling is
/movies/movie/_search?pretty
My question is, how to correctly implement the mentioned feature?
Thanks
BTW elasticsearch version is 6.1.0.
This is because of the standard tokenizer which will split the input into tokens based on whitespaces and punctuation, so Brad Pitt becomes brad and pitt and hence you will not have a token with rad p in it.
What you need to do is to change the tokenizer to (e.g.) keyword so that you consider the full input as one token which you can then apply ngram on.
Or easier, you can simply use the ngram tokenizer and not the token filter
As Val has said, you have to use the nGram tokenizer to do this, and I also had to change my search query to:
{
"query":{
"nested":{
"path":"searchable",
"query":{
"bool":{
"must":{
"match":{
"searchable.searchKeyword":{
"query":"%1$s"
}
}
}
}
},
"inner_hits":{
}
}
}
}
My new index with the nGram tokenizer:
{
"number_of_shards":1,
"number_of_replicas":0,
"blocks":{
"read_only_allow_delete":false,
"read_only":false
},
"analysis":{
"analyzer":{
"autocomplete":{
"tokenizer":"search_tokenizer",
"filter":[
"lowercase",
"asciifolding"
]
}
},
"tokenizer":{
"search_tokenizer":{
"type":"ngram",
"token_chars":[
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
],
"min_gram":3,
"max_gram":30
}
}
}
}

Elasticsearch - Cardinality over Full Field Value

I have a document that looks like this:
{
"_id":"some_id_value",
"_source":{
"client":{
"name":"x"
},
"project":{
"name":"x November 2016"
}
}
}
I am attempting to perform a query that will fetch me the count of unique project names for each client. For this, I am using a query with cardinality over the project.name. I am sure that there are only 4 unique project names for this particular client. However, when I run my query, I get a count of 5, which I know is wrong.
The project names all contain the name of the client. For instance, if a client is "X", project names will be "X Testing November 2016", or "X Jan 2016", etc. I don't know if that is a consideration.
This is the mapping for the document type
{
"mappings":{
"vma_docs":{
"properties":{
"client":{
"properties":{
"contact":{
"type":"string"
},
"name":{
"type":"string"
}
}
},
"project":{
"properties":{
"end_date":{
"format":"yyyy-MM-dd",
"type":"date"
},
"project_type":{
"type":"string"
},
"name":{
"type":"string"
},
"project_manager":{
"index":"not_analyzed",
"type":"string"
},
"start_date":{
"format":"yyyy-MM-dd",
"type":"date"
}
}
}
}
}
}
}
This is my search query
{
"fields":[
"client.name",
"project.name"
],
"query":{
"bool":{
"must":{
"match":{
"client.name":{
"operator":"and",
"query":"ABC systems"
}
}
}
}
},
"aggs":{
"num_projects":{
"cardinality":{
"field":"project.name"
}
}
},
"size":5
}
These are the results I get (I have only posted 2 results for the sake of brevity). Please find that the num_projects aggregation returns 5, but must only return 4, which are the total number of projects.
{
"hits":{
"hits":[
{
"_score":5.8553367,
"_type":"vma_docs",
"_id":"AVTMIM9IBwwoAW3mzgKz",
"fields":{
"project.name":[
"ABC"
],
"client.name":[
"ABC systems Pvt Ltd"
]
},
"_index":"vma"
},
{
"_score":5.8553367,
"_type":"vma_docs",
"_id":"AVTMIM9YBwwoAW3mzgK2",
"fields":{
"project.name":[
"ABC"
],
"client.name":[
"ABC systems Pvt Ltd"
]
},
"_index":"vma"
}
],
"total":18,
"max_score":5.8553367
},
"_shards":{
"successful":5,
"failed":0,
"total":5
},
"took":4,
"aggregations":{
"num_projects":{
"value":5
}
},
"timed_out":false
}
FYI: The project names are ABC, ABC Nov 2016, ABC retest November, ABC Mobile App
You need the following mapping for your project.name field:
{
"mappings": {
"vma_docs": {
"properties": {
"client": {
"properties": {
"contact": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"project": {
"properties": {
"end_date": {
"format": "yyyy-MM-dd",
"type": "date"
},
"project_type": {
"type": "string"
},
"name": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"project_manager": {
"index": "not_analyzed",
"type": "string"
},
"start_date": {
"format": "yyyy-MM-dd",
"type": "date"
}
}
}
}
}
}
}
It's basically a subfield called raw where the same value put in project.name is put in project.name.raw but without touching it (tokenizing or analyzing it). And then the query you need to use is:
{
"fields": [
"client.name",
"project.name"
],
"query": {
"bool": {
"must": {
"match": {
"client.name": {
"operator": "and",
"query": "ABC systems"
}
}
}
}
},
"aggs": {
"num_projects": {
"cardinality": {
"field": "project.name.raw"
}
}
},
"size": 5
}

Resources