Elastic Search always returning score as zero - elasticsearch

I ran the index and I am trying to execute few queries to validate the data. I found that whatever request I sent the elastic search score is zero for all the results. I am trying different combinations in Kibana and all are documents returned contains _score as zero.
Below is my request and response:
GET _search
{
"version": true,
"size": 500,
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [],
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"match_phrase": {
"name": {
"query": "RED"
}
}
}
],
"should": [],
"must_not": []
}
},
"highlight": {
"pre_tags": [
"#kibana-highlighted-field#"
],
"post_tags": [
"#/kibana-highlighted-field#"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
}
}
Response is :
{
"took" : 126,
"timed_out" : false,
"_shards" : {
"total" : 11,
"successful" : 11,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "products",
"_type" : "product",
"_id" : "11",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "1",
"avgreviews" : 5.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : false,
"promotedprice" : null,
"listprice" : 1200.2,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED386_1",
"height" : 68.0,
"shortdescription" : "Loft Sofa Bed Wood Red Medium",
"commercialuse" : true,
"customisable" : true,
"weight" : "5656.0KG",
"typeCode" : "Bed - King Single",
"colour" : "Red",
"depth" : 51.0,
"name" : "LOFT SOFA BED WOOD RED MEDIUM",
"online" : true,
"materialcode" : "Fabric",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"LOFT SOFA BED WOOD #kibana-highlighted-field#RED#/kibana-highlighted-field# MEDIUM"
]
}
},
{
"_index" : "products",
"_type" : "product",
"_id" : "53",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "1",
"avgreviews" : 5.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : true,
"promotedprice" : null,
"listprice" : 9635.0,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED393_1",
"height" : 66.0,
"shortdescription" : "tolix Stool Wood Red",
"commercialuse" : false,
"customisable" : false,
"weight" : "6525.0KG",
"typeCode" : "Bar Stool",
"colour" : "Silver",
"depth" : 25.0,
"name" : "LILLI / TOLIX STOOL WOOD RED",
"online" : true,
"materialcode" : "Metal",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Retro"
},
"highlight" : {
"name" : [
"LILLI / TOLIX STOOL WOOD #kibana-highlighted-field#RED#/kibana-highlighted-field#"
]
}
},
{
"_index" : "products",
"_type" : "product",
"_id" : "125",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "1",
"avgreviews" : 3.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : true,
"promotedprice" : null,
"listprice" : 6500.0,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED405_1",
"height" : 55.0,
"shortdescription" : "Ialian Design New GasLift Chanelle Queen Size Red PU Leather Wodden
Bed frame",
"commercialuse" : false,
"customisable" : false,
"weight" : "5693.0KG",
"typeCode" : "Bed - Queen",
"colour" : "red",
"depth" : 58.0,
"name" : "ITALIAN DESIGN NEW GASLIFT CHANELLE QUEEN SIZE RED PU LEATHER WOODEN BED FRAME",
"online" : true,
"materialcode" : "Timber",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"ITALIAN DESIGN NEW GASLIFT CHANELLE QUEEN SIZE #kibana-highlighted-field#RED#/kibana-highlighted-field# PU LEATHER WOODEN BED FRAME"
]
}
},
{
"_index" : "products",
"_type" : "product",
"_id" : "707",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "2",
"avgreviews" : 2.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : false,
"promotedprice" : null,
"listprice" : 6326.0,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED478_2",
"height" : 36.0,
"shortdescription" : "Leaf and Vine Rug Brown Cream red",
"commercialuse" : false,
"customisable" : true,
"weight" : "6548.0KG",
"typeCode" : "Shag Rug",
"colour" : "Brown",
"depth" : 47.0,
"name" : "LEAF AND VINE RUG BROWN CREAM RED",
"online" : true,
"materialcode" : "Plastic",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"LEAF AND VINE RUG BROWN CREAM #kibana-highlighted-field#RED#/kibana-highlighted-field#"
]
}
}
]
}
}
Can you point me to right direction tofix this score value.
Thanks.

Sree, all you need is to use your match_phrase in the must clause instead of the filter
Your query part should look like this :
"query": {
"bool": {
"must": [
{
"match_phrase": {
"name": {
"query": "RED"
}
}
}
],
"filter": [
{
"match_all": {}
}
],
"should": [],
"must_not": []
}
}
Output with 1 indexed document :
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.2876821,
"hits" : [
{
"_index" : "someidx",
"_type" : "_doc",
"_id" : "c37m5W4BifZmUly9Ni-X",
"_version" : 1,
"_score" : 0.2876821,
"_source" : {
"sellercode" : "1",
"avgreviews" : 5.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : false,
"promotedprice" : null,
"listprice" : 1200.2,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED386_1",
"height" : 68.0,
"shortdescription" : "Loft Sofa Bed Wood Red Medium",
"commercialuse" : true,
"customisable" : true,
"weight" : "5656.0KG",
"typeCode" : "Bed - King Single",
"colour" : "Red",
"depth" : 51.0,
"name" : "LOFT SOFA BED WOOD RED MEDIUM",
"online" : true,
"materialcode" : "Fabric",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"LOFT SOFA BED WOOD #kibana-highlighted-field#RED#/kibana-highlighted-field# MEDIUM"
]
}
}
]
}
}

Related

Elastic Search Query for Relevancy Given a Phrase Rather Than Just One Word

Elastic Search querying/boosting is not working as I would expect it to...
I have an index where documents look like this:
{
"entity_id" : "x",
"entity_name" : "y",
"description": "search engine",
"keywords" : [
"Google"
]
}
Im trying to get the document to show up with a relevancy score when querying by a search phrase that contains one of the keywords.
like this:
{
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "What are some of products for Google?",
"boost": 10,
"fields": ["keywords"]
}
}
],
"filter": {
"term" : { "entity_name" : "y" }
}
}
}
}
The problem is that my results are not as expected for three reasons:
The result contains hits that do not have any relevancy to "Google" or "Products" or any words in the search phrase.
The document that I am expecting to get returned has a _score = 0.0
The document that I am expecting to get returned has a mysterious "_ignored" : [ "description.keyword"],
The response looks like this:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_score" : 0.0,
"_source": {
"entity_id" : "a",
"entity_name" : "y",
"description": "some other entity",
"keywords": ["Other"]
}
},
{
"_score" : 0.0,
"_ignored" : [
"description.keyword"
],
"_source": {
"entity_id" : "x",
"entity_name" : "y",
"description": "search engine",
"keywords": ["Google"]
}
}
]
}
}
What am I doing wrong?
TLDR;
You use the wrong query type, query_string is not suitable for your needs, maybe use match
To understand
First and foremost:
_ignored is a field that track all the fields that where malformed at index time, and thus are going to be ignored at search time. [doc]
Why is my score 0:
It is because of the query_string query. [doc]
Returns documents based on a provided query string, using a parser with a strict syntax.
eg:
"query": "(new york city) OR (big apple)"
The query_string query splits (new york
city) OR (big apple) into two parts: new york city and big apple.
To illustrate my point, look at the example bellow:
POST /so_relevance_score/_doc
{
"entity_id" : "x",
"entity_name" : "y",
"description": "search engine",
"keywords" : [
"Google"
]
}
POST /so_relevance_score/_doc
{
"entity_id" : "x",
"entity_name" : "y",
"description": "consumer electronic",
"keywords" : [
"Apple"
]
}
GET /so_relevance_score/_search
{
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "What are some of products for Google?",
"boost": 10,
"fields": ["keywords"]
}
}
],
"filter": {
"term" : { "entity_name" : "y" }
}
}
}
}
will return the following results:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "so_relevance_score",
"_type" : "_doc",
"_id" : "0uYgP34Bpf2xEaYqLYai",
"_score" : 0.0,
"_source" : {
"entity_id" : "x",
"entity_name" : "y",
"description" : "search engine",
"keywords" : [
"Google"
]
}
},
{
"_index" : "so_relevance_score",
"_type" : "_doc",
"_id" : "1eYmP34Bpf2xEaYquoZC",
"_score" : 0.0,
"_source" : {
"entity_id" : "x",
"entity_name" : "y",
"description" : "consumer electronic",
"keywords" : [
"Apple"
]
}
}
]
}
}
Score is 0 for both document. Which means that both documents are as relevant on this query for ElasticSearch.
But if you were to change the query type to match
GET /so_relevance_score/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"keywords": "What are some of products for Google?"
}
}
],
"filter": {
"term" : { "entity_name" : "y" }
}
}
}
}
I get:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "so_relevance_score",
"_type" : "_doc",
"_id" : "0uYgP34Bpf2xEaYqLYai",
"_score" : 0.6931471,
"_source" : {
"entity_id" : "x",
"entity_name" : "y",
"description" : "search engine",
"keywords" : [
"Google"
]
}
},
{
"_index" : "so_relevance_score",
"_type" : "_doc",
"_id" : "1eYmP34Bpf2xEaYquoZC",
"_score" : 0.0,
"_source" : {
"entity_id" : "x",
"entity_name" : "y",
"description" : "consumer electronic",
"keywords" : [
"Apple"
]
}
}
]
}
}
With a relevance score !
If you want to fine tune your results, I suggest diving into the documentation for query types [doc]

On Elasticsearch, how to aggregate based on the number of items in a field?

On Elasticsearch I have a field named Itinerary that can contain multiple values (from 1 up to 6), for example in the picture below there's 2 items in the field.
"Itinerary": [
{
"Carrier": "LH",
"Departure": "2021-07-04T06:55:00Z",
"Number": "1493",
"Arrival": "2021-07-04T08:40:00Z",
},
{
"Carrier": "LH",
"Departure": "2021-07-04T13:30:00Z",
"Number": "422",
"Arrival": "2021-07-04T16:05:00Z",
}
}
]
Is there a way I can aggregate based on the number of items in the field? Having something like:
1 item : 2
2 item : 4
...
Itinerary type needs to be define as nested type
"Itinerary":
{
"type": "nested"
}
Terms aggregation to group on a field. You can use script to get count of array or better introduce a field which has count of array
Top hits aggregation to get documents under that group
{
"size": 0,
"aggs": {
"NAME": {
"terms": {
"script": {
"source": "doc['Itinerary.Carrier.keyword'].length"
}
},
"aggs": {
"NAME": {
"top_hits": {
"size": 10
}
}
}
}
}
}
Result:
"aggregations" : {
"NAME" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2,
"doc_count" : 2,
"NAME" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "8OW1lnsBRh1xpgSkIOlq",
"_score" : 1.0,
"_source" : {
"Itinerary" : [
{
"Carrier" : "LH",
"Departure" : "2021-07-04T06:55:00Z",
"Number" : "1493",
"Arrival" : "2021-07-04T08:40:00Z"
},
{
"Carrier" : "LH",
"Departure" : "2021-07-04T13:30:00Z",
"Number" : "422",
"Arrival" : "2021-07-04T16:05:00Z"
}
]
}
},
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "8uW6lnsBRh1xpgSkAun1",
"_score" : 1.0,
"_source" : {
"Itinerary" : [
{
"Carrier" : "LH2",
"Departure" : "2021-07-04T06:55:00Z",
"Number" : "14931",
"Arrival" : "2021-07-04T08:40:00Z"
},
{
"Carrier" : "LH2",
"Departure" : "2021-07-04T13:30:00Z",
"Number" : "4221",
"Arrival" : "2021-07-04T16:05:00Z"
}
]
}
}
]
}
}
},
{
"key" : 3,
"doc_count" : 1,
"NAME" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "8eW1lnsBRh1xpgSkdukQ",
"_score" : 1.0,
"_source" : {
"Itinerary" : [
{
"Carrier" : "LH1",
"Departure" : "2021-07-04T06:55:00Z",
"Number" : "14931",
"Arrival" : "2021-07-04T08:40:00Z"
},
{
"Carrier" : "LH1",
"Departure" : "2021-07-04T13:30:00Z",
"Number" : "4221",
"Arrival" : "2021-07-04T16:05:00Z"
},
{
"Carrier" : "LH1",
"Departure" : "2021-07-04T13:30:00Z",
"Number" : "3221",
"Arrival" : "2021-07-04T16:05:00Z"
}
]
}
}
]
}
}
}
]
}
}

Elasticsearch ngram tokenizer returns all results regardless of query input

I am trying to build a query to search for records in the following format: TR000002_1_2020.
Users should be able to search for results the following ways:
TR000002 or 2_1_2020 or TR000002_1_2020 or 2020. I figured an ngram tokenization query would be best suited for my needs. I am using Elasticsearch 6.8 so I cannot use the built in Search-As-You-Type introduced in E7.
Here's my implementation I followed from docs here. The only thing I modified was EdgeNGram -> NGram as the user can search from any point of the text.
My Analysis block looks like this:
.Analysis(a => a
.Analyzers(aa => aa
.Custom("autocomplete", ca => ca
.Tokenizer("autocomplete")
.Filters(new string[] {
"lowercase"
})
)
.Custom("autocomplete_search", ca => ca
.Tokenizer("lowercase")
)
)
.Tokenizers(t => t
.NGram("autocomplete", e => e
.MinGram(2)
.MaxGram(16)
.TokenChars(new TokenChar[] {
TokenChar.Letter,
TokenChar.Digit,
TokenChar.Punctuation,
TokenChar.Symbol
})
)
)
)
Then in my mapping I define:
.Text(t => t
.Name(tr => tr.TestRecordId)
.Analyzer("autocomplete")
.SearchAnalyzer("autocomplete_search")
)
When I search for TR000002, my query returns all results instead of just the records that contain those specific characters. What am I doing wrong? Is there a better tokenizer for this specific use case? Thanks!
EDIT: Here's a sample of what is returned:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 27,
"max_score" : 0.105360515,
"hits" : [
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "3",
"_score" : 0.105360515,
"_source" : {
"id" : 3,
"testRecordId" : "TR000002_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 12,
"testStatus" : {
"testStatusId" : 12,
"name" : "Complete: Postponed Until Further Notice"
},
"discriminator" : "SingleEventEffectsRecord",
"testRecordServiceOrders" : [
{
"testRecordId" : 3,
"serviceOrderId" : 9,
"serviceOrder" : {
"serviceOrderId" : 9,
"serviceOrderNumber" : "105702"
}
}
],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
},
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"sEETestRates" : [ ]
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "11",
"_score" : 0.105360515,
"_source" : {
"id" : 11,
"testRecordId" : "TR000011_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"partLDC" : "12",
"waferLot" : "1",
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"testStartDate" : "2020-07-30T00:00:00",
"actualCompletionDate" : "2020-07-31T00:00:00"
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "17",
"_score" : 0.105360515,
"_source" : {
"id" : 17,
"testRecordId" : "TR000017_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "lewallen",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false
}
},
Also here's what shows for mapping:
"testRecordId" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
I guess I should also mention, I've been testing this query in the console like so:
GET test-records-development/_search
{
"query": {
"match": {
"testRecordId": {
"query": "TR000002_1_2020"
}
}
}
}
EDIT 2: Added API response from index _settings endpoint:
{
"test-records-development-09-09-2020-02-00-00" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "test-records-development-09-09-2020-02-00-00",
"creation_date" : "1599617013874",
"analysis" : {
"analyzer" : {
"autocomplete" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "autocomplete"
},
"autocomplete_search" : {
"type" : "custom",
"tokenizer" : "lowercase"
}
},
"tokenizer" : {
"autocomplete" : {
"token_chars" : [
"letter",
"digit",
"punctuation",
"symbol"
],
"min_gram" : "2",
"type" : "ngram",
"max_gram" : "16"
}
}
},
"number_of_replicas" : "0",
"uuid" : "FSeCa0YwRCOJVbjfxYGkig",
"version" : {
"created" : "6080199"
}
}
}
}
}
As I don't have the analyzer setting access in JSON format,I can't confirm it but most probably issue is with your search analyzer autocomplete_search which is creating search time tokens which are matching the index time tokens.
For example: you are searching for TR000002_1_2020 and if it creates 2020 as a token and for document containing TR000011_1_2020 also creates a 2020 token than your query will match it.
You can use the analyze API to check the generated tokens based on a analyzer and as mentioned earlier mostly there is some tokens which are matching as shown above.

spring data elastic search not searching properly on fields

I want to search on three fields of my Index : title, authorName and Description;
My search looks like :
SearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(QueryBuilders.multiMatchQuery(criteria)
.field("title")
.field("authorName")
.field("description")
.type(MultiMatchQueryBuilder.Type.BEST_FIELDS))
.withPageable(PageRequest.of(page, size))
.build();
Page<Ebook> ebookList = ebookRepo.search(searchQuery);
I have these document indexed :
{
"took" : 12,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"id" : 9,
"size" : null,
"numberOfPages" : 10,
"companyId" : 656,
"price" : 10.0,
"title" : "Welcome to my life",
"authorName" : "JB2",
"description" : "Welcome to my life 3",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 10,
"average" : 5.0,
"starsByUserId" : {
"131" : 5
},
"stars" : {
"5" : 1
}
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon",
"subCategoryList" : [ ]
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588336718863",
"userReview" : [
{
"id" : 11,
"comment" : "I dont like it. I give 5 stars",
"commentDate" : 1588336718989,
"appUserId" : 131,
"stars" : 5
}
],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"id" : 5,
"size" : null,
"numberOfPages" : 10,
"companyId" : 2,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "kosted 3",
"description" : "Ceci est une autre description 3",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 6,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon"
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588372761403",
"userReview" : [ ],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"id" : 3,
"size" : null,
"numberOfPages" : 10,
"companyId" : 2,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "kosted 2",
"description" : "Ceci est une autre description 2",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 4,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon"
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588372758036",
"userReview" : [ ],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"id" : 6,
"size" : null,
"numberOfPages" : 10,
"companyId" : 655,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "JBB",
"description" : "Welcome to my life 2",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 7,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon",
"subCategoryList" : [ ]
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588336718576",
"userReview" : [ ],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"id" : 1,
"size" : null,
"numberOfPages" : 10,
"companyId" : 2,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "kosted",
"description" : "Book about my life",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 2,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon"
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588372748126",
"userReview" : [ ],
"status" : "INITIATED"
}
}
]
}
}
When I search with the term "JB" or "jb" even "JB2", I have 0 result when there are Ebooks with authorName JB2.
I use spring 2.2.4.RELEASE and spring data elastic search 3.2.4.RELEASE.
What did I do wrong in my code ? Any idea about how could I correct it ?
Thanks in advance

How can i extend an elastic search date range histogram aggregation query?

Hi I have an elastic search index named mep-report.
Each document has a status field. The possible values for status fields are "ENROUTE", "SUBMITTED", "DELIVERED", "FAILED" . Below is the sample elastic search index with 6 documents.
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 13,
"successful" : 13,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1094313,
"max_score" : 1.0,
"hits" : [
{
"_index" : "mep-reports-2019.09.11",
"_type" : "doc",
"_id" : "68e8e03f-baf8-4bfc-a920-58e26edf835c-353899837500",
"_score" : 1.0,
"_source" : {
"status" : "ENROUTE",
"#timestamp" : "2019-09-11T10:21:26.000Z"
},
{
"_index" : "mep-reports-2019.09.11",
"_type" : "doc",
"_id" : "68e8e03f-baf8-4bfc-a920-58e26edf835c-353899837501",
"_score" : 1.0,
"_source" : {
"status" : "ENROUTE",
"#timestamp" : "2019-09-11T10:21:26.000Z"
},
{
"_index" : "mep-reports-2019.09.11",
"_type" : "doc",
"_id" : "68e8e03f-baf8-4bfc-a920-58e26edf835c-353899837502",
"_score" : 1.0,
"_source" : {
"status" : "SUBMITTED",
"#timestamp" : "2019-09-11T10:21:26.000Z"
}
},
{
"_index" : "mep-reports-2019.09.11",
"_type" : "doc",
"_id" : "68e8e03f-baf8-4bfc-a920-58e26edf835c-353899837503",
"_score" : 1.0,
"_source" : {
"status" : "DELIVERED",
"#timestamp" : "2019-09-11T10:21:26.000Z"
}
},
{
"_index" : "mep-reports-2019.09.11",
"_type" : "doc",
"_id" : "68e8e03f-baf8-4bfc-a920-58e26edf835c-353899837504",
"_score" : 1.0,
"_source" : {
"status" : "FAILED",
"#timestamp" : "2019-09-11T10:21:26.000Z"
},
{
"_index" : "mep-reports-2019.09.11",
"_type" : "doc",
"_id" : "68e8e03f-baf8-4bfc-a920-58e26edf835c-353899837504",
"_score" : 1.0,
"_source" : {
"status" : "FAILED",
"#timestamp" : "2019-09-11T10:21:26.000Z"
}
}
}
I would like to find an aggregation histogram distribution something like to get messages_processed, message_delivered,messages_failed .
messages_processed : 3 ( 2 documents in status ENROUTE + 1 Document with status SUBMITTED )
message_delivered 1 ( 1 document with status DELIVERED )
messages_failed : 2 ( 2 documents with status FAILED )
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 13,
"successful" : 13,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 21300,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"performance_over_time" : {
"buckets" : [
{
"key_as_string" : "2020-02-21",
"key" : 1582243200000,
"doc_count" : 6,
"message_processed": 3,
"message_delivered": 1,
"message_failed": 2
}
]
}
}
}
So the following is my current query and i would like to modify it to get some additional statistics such as message_processed , message_delivered, message_failed. kindly let me know .
{ "size": 0, "query": { "bool": { "must": [ { "range": { "#timestamp": { "from": "2020-02-21T00:00Z", "to": "2020-02-21T23:59:59.999Z", "include_lower": true, "include_upper": true, "format": "yyyy-MM-dd'T'HH:mm:ss.SSSZ ||yyyy-MM-dd'T'HH:mmZ", "boost": 1.0 } } } ], "adjust_pure_negative": true, "boost": 1.0 } }, "aggregations": { "performance_over_time": { "date_histogram": { "field": "#timestamp", "format": "yyyy-MM-dd", "interval": "1d", "offset": 0, "order": { "_key": "asc" }, "keyed": false, "min_doc_count": 0 } } } }
You are almost there with the query, you just need to add Terms Aggregation and looking at your request, I've come up with a Scripted Terms Aggregation.
I've also modified the date histogram aggregation field interval to calendar_interval so that you get the values as per the calendar date.
Query Request:
POST <your_index_name>/_search
{
"size": 0,
"query":{
"bool":{
"must":[
{
"range":{
"#timestamp":{
"from":"2019-09-10",
"to":"2019-09-12",
"include_lower":true,
"include_upper":true,
"boost":1.0
}
}
}
],
"adjust_pure_negative":true,
"boost":1.0
}
},
"aggs":{
"message_processed":{
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d" <----- Note this
},
"aggs": {
"my_messages": {
"terms": {
"script": { <----- Core Logic of Terms Agg
"source": """
if(doc['status'].value=="ENROUTE" || doc['status'].value == "SUBMITTED"){
return "message_processed";
}else if(doc['status'].value=="DELIVERED"){
return "message_delivered"
}else {
return "message_failed"
}
""",
"lang": "painless"
},
"size": 10
}
}
}
}
}
}
Note that the core logic what you are looking for is inside the scripted terms aggregation. Logic is self explainable if you go through it. Feel free to modify the logic that fits you.
For the sample date you've shared, you would get the result in the below format:
Response:
{
"took" : 144,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"message_processed" : {
"buckets" : [
{
"key_as_string" : "2019-09-11T00:00:00.000Z",
"key" : 1568160000000,
"doc_count" : 6,
"my_messages" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "message_processed",
"doc_count" : 3
},
{
"key" : "message_failed",
"doc_count" : 2
},
{
"key" : "message_delivered",
"doc_count" : 1
}
]
}
}
]
}
}
}

Resources