spring data elastic search not searching properly on fields - spring

I want to search on three fields of my Index : title, authorName and Description;
My search looks like :
SearchQuery searchQuery = new NativeSearchQueryBuilder()
.withQuery(QueryBuilders.multiMatchQuery(criteria)
.field("title")
.field("authorName")
.field("description")
.type(MultiMatchQueryBuilder.Type.BEST_FIELDS))
.withPageable(PageRequest.of(page, size))
.build();
Page<Ebook> ebookList = ebookRepo.search(searchQuery);
I have these document indexed :
{
"took" : 12,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"id" : 9,
"size" : null,
"numberOfPages" : 10,
"companyId" : 656,
"price" : 10.0,
"title" : "Welcome to my life",
"authorName" : "JB2",
"description" : "Welcome to my life 3",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 10,
"average" : 5.0,
"starsByUserId" : {
"131" : 5
},
"stars" : {
"5" : 1
}
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon",
"subCategoryList" : [ ]
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588336718863",
"userReview" : [
{
"id" : 11,
"comment" : "I dont like it. I give 5 stars",
"commentDate" : 1588336718989,
"appUserId" : 131,
"stars" : 5
}
],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"id" : 5,
"size" : null,
"numberOfPages" : 10,
"companyId" : 2,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "kosted 3",
"description" : "Ceci est une autre description 3",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 6,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon"
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588372761403",
"userReview" : [ ],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"id" : 3,
"size" : null,
"numberOfPages" : 10,
"companyId" : 2,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "kosted 2",
"description" : "Ceci est une autre description 2",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 4,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon"
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588372758036",
"userReview" : [ ],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"id" : 6,
"size" : null,
"numberOfPages" : 10,
"companyId" : 655,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "JBB",
"description" : "Welcome to my life 2",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 7,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon",
"subCategoryList" : [ ]
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588336718576",
"userReview" : [ ],
"status" : "INITIATED"
}
},
{
"_index" : "ebook",
"_type" : "ebook",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"id" : 1,
"size" : null,
"numberOfPages" : 10,
"companyId" : 2,
"price" : 10,
"title" : "Welcome to my life",
"authorName" : "kosted",
"description" : "Book about my life",
"language" : "FR",
"ebookPath" : null,
"ebookDownloadUrl" : null,
"rating" : {
"ratingId" : 2,
"average" : 0.0,
"starsByUserId" : { },
"stars" : { }
},
"coverPath" : null,
"coverDownloadUrl" : null,
"iconPath" : null,
"iconDownloadUrl" : null,
"category" : {
"id" : 1,
"name" : "Webtoon"
},
"subCategory" : {
"id" : 2,
"name" : "Adventure"
},
"repositoryGeneratedId" : "1588372748126",
"userReview" : [ ],
"status" : "INITIATED"
}
}
]
}
}
When I search with the term "JB" or "jb" even "JB2", I have 0 result when there are Ebooks with authorName JB2.
I use spring 2.2.4.RELEASE and spring data elastic search 3.2.4.RELEASE.
What did I do wrong in my code ? Any idea about how could I correct it ?
Thanks in advance

Related

Elasticsearch ngram tokenizer returns all results regardless of query input

I am trying to build a query to search for records in the following format: TR000002_1_2020.
Users should be able to search for results the following ways:
TR000002 or 2_1_2020 or TR000002_1_2020 or 2020. I figured an ngram tokenization query would be best suited for my needs. I am using Elasticsearch 6.8 so I cannot use the built in Search-As-You-Type introduced in E7.
Here's my implementation I followed from docs here. The only thing I modified was EdgeNGram -> NGram as the user can search from any point of the text.
My Analysis block looks like this:
.Analysis(a => a
.Analyzers(aa => aa
.Custom("autocomplete", ca => ca
.Tokenizer("autocomplete")
.Filters(new string[] {
"lowercase"
})
)
.Custom("autocomplete_search", ca => ca
.Tokenizer("lowercase")
)
)
.Tokenizers(t => t
.NGram("autocomplete", e => e
.MinGram(2)
.MaxGram(16)
.TokenChars(new TokenChar[] {
TokenChar.Letter,
TokenChar.Digit,
TokenChar.Punctuation,
TokenChar.Symbol
})
)
)
)
Then in my mapping I define:
.Text(t => t
.Name(tr => tr.TestRecordId)
.Analyzer("autocomplete")
.SearchAnalyzer("autocomplete_search")
)
When I search for TR000002, my query returns all results instead of just the records that contain those specific characters. What am I doing wrong? Is there a better tokenizer for this specific use case? Thanks!
EDIT: Here's a sample of what is returned:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 27,
"max_score" : 0.105360515,
"hits" : [
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "3",
"_score" : 0.105360515,
"_source" : {
"id" : 3,
"testRecordId" : "TR000002_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 12,
"testStatus" : {
"testStatusId" : 12,
"name" : "Complete: Postponed Until Further Notice"
},
"discriminator" : "SingleEventEffectsRecord",
"testRecordServiceOrders" : [
{
"testRecordId" : 3,
"serviceOrderId" : 9,
"serviceOrder" : {
"serviceOrderId" : 9,
"serviceOrderNumber" : "105702"
}
}
],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
},
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"sEETestRates" : [ ]
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "11",
"_score" : 0.105360515,
"_source" : {
"id" : 11,
"testRecordId" : "TR000011_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"partLDC" : "12",
"waferLot" : "1",
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"testStartDate" : "2020-07-30T00:00:00",
"actualCompletionDate" : "2020-07-31T00:00:00"
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "17",
"_score" : 0.105360515,
"_source" : {
"id" : 17,
"testRecordId" : "TR000017_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "lewallen",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false
}
},
Also here's what shows for mapping:
"testRecordId" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
I guess I should also mention, I've been testing this query in the console like so:
GET test-records-development/_search
{
"query": {
"match": {
"testRecordId": {
"query": "TR000002_1_2020"
}
}
}
}
EDIT 2: Added API response from index _settings endpoint:
{
"test-records-development-09-09-2020-02-00-00" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "test-records-development-09-09-2020-02-00-00",
"creation_date" : "1599617013874",
"analysis" : {
"analyzer" : {
"autocomplete" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "autocomplete"
},
"autocomplete_search" : {
"type" : "custom",
"tokenizer" : "lowercase"
}
},
"tokenizer" : {
"autocomplete" : {
"token_chars" : [
"letter",
"digit",
"punctuation",
"symbol"
],
"min_gram" : "2",
"type" : "ngram",
"max_gram" : "16"
}
}
},
"number_of_replicas" : "0",
"uuid" : "FSeCa0YwRCOJVbjfxYGkig",
"version" : {
"created" : "6080199"
}
}
}
}
}
As I don't have the analyzer setting access in JSON format,I can't confirm it but most probably issue is with your search analyzer autocomplete_search which is creating search time tokens which are matching the index time tokens.
For example: you are searching for TR000002_1_2020 and if it creates 2020 as a token and for document containing TR000011_1_2020 also creates a 2020 token than your query will match it.
You can use the analyze API to check the generated tokens based on a analyzer and as mentioned earlier mostly there is some tokens which are matching as shown above.

Elastic Search always returning score as zero

I ran the index and I am trying to execute few queries to validate the data. I found that whatever request I sent the elastic search score is zero for all the results. I am trying different combinations in Kibana and all are documents returned contains _score as zero.
Below is my request and response:
GET _search
{
"version": true,
"size": 500,
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [],
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"match_phrase": {
"name": {
"query": "RED"
}
}
}
],
"should": [],
"must_not": []
}
},
"highlight": {
"pre_tags": [
"#kibana-highlighted-field#"
],
"post_tags": [
"#/kibana-highlighted-field#"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
}
}
Response is :
{
"took" : 126,
"timed_out" : false,
"_shards" : {
"total" : 11,
"successful" : 11,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "products",
"_type" : "product",
"_id" : "11",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "1",
"avgreviews" : 5.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : false,
"promotedprice" : null,
"listprice" : 1200.2,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED386_1",
"height" : 68.0,
"shortdescription" : "Loft Sofa Bed Wood Red Medium",
"commercialuse" : true,
"customisable" : true,
"weight" : "5656.0KG",
"typeCode" : "Bed - King Single",
"colour" : "Red",
"depth" : 51.0,
"name" : "LOFT SOFA BED WOOD RED MEDIUM",
"online" : true,
"materialcode" : "Fabric",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"LOFT SOFA BED WOOD #kibana-highlighted-field#RED#/kibana-highlighted-field# MEDIUM"
]
}
},
{
"_index" : "products",
"_type" : "product",
"_id" : "53",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "1",
"avgreviews" : 5.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : true,
"promotedprice" : null,
"listprice" : 9635.0,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED393_1",
"height" : 66.0,
"shortdescription" : "tolix Stool Wood Red",
"commercialuse" : false,
"customisable" : false,
"weight" : "6525.0KG",
"typeCode" : "Bar Stool",
"colour" : "Silver",
"depth" : 25.0,
"name" : "LILLI / TOLIX STOOL WOOD RED",
"online" : true,
"materialcode" : "Metal",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Retro"
},
"highlight" : {
"name" : [
"LILLI / TOLIX STOOL WOOD #kibana-highlighted-field#RED#/kibana-highlighted-field#"
]
}
},
{
"_index" : "products",
"_type" : "product",
"_id" : "125",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "1",
"avgreviews" : 3.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : true,
"promotedprice" : null,
"listprice" : 6500.0,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED405_1",
"height" : 55.0,
"shortdescription" : "Ialian Design New GasLift Chanelle Queen Size Red PU Leather Wodden
Bed frame",
"commercialuse" : false,
"customisable" : false,
"weight" : "5693.0KG",
"typeCode" : "Bed - Queen",
"colour" : "red",
"depth" : 58.0,
"name" : "ITALIAN DESIGN NEW GASLIFT CHANELLE QUEEN SIZE RED PU LEATHER WOODEN BED FRAME",
"online" : true,
"materialcode" : "Timber",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"ITALIAN DESIGN NEW GASLIFT CHANELLE QUEEN SIZE #kibana-highlighted-field#RED#/kibana-highlighted-field# PU LEATHER WOODEN BED FRAME"
]
}
},
{
"_index" : "products",
"_type" : "product",
"_id" : "707",
"_version" : 3,
"_score" : 0.0,
"_source" : {
"sellercode" : "2",
"avgreviews" : 2.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : false,
"promotedprice" : null,
"listprice" : 6326.0,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED478_2",
"height" : 36.0,
"shortdescription" : "Leaf and Vine Rug Brown Cream red",
"commercialuse" : false,
"customisable" : true,
"weight" : "6548.0KG",
"typeCode" : "Shag Rug",
"colour" : "Brown",
"depth" : 47.0,
"name" : "LEAF AND VINE RUG BROWN CREAM RED",
"online" : true,
"materialcode" : "Plastic",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"LEAF AND VINE RUG BROWN CREAM #kibana-highlighted-field#RED#/kibana-highlighted-field#"
]
}
}
]
}
}
Can you point me to right direction tofix this score value.
Thanks.
Sree, all you need is to use your match_phrase in the must clause instead of the filter
Your query part should look like this :
"query": {
"bool": {
"must": [
{
"match_phrase": {
"name": {
"query": "RED"
}
}
}
],
"filter": [
{
"match_all": {}
}
],
"should": [],
"must_not": []
}
}
Output with 1 indexed document :
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.2876821,
"hits" : [
{
"_index" : "someidx",
"_type" : "_doc",
"_id" : "c37m5W4BifZmUly9Ni-X",
"_version" : 1,
"_score" : 0.2876821,
"_source" : {
"sellercode" : "1",
"avgreviews" : 5.0,
"saleprice" : null,
"sellable" : null,
"freedelivery" : false,
"promotedprice" : null,
"listprice" : 1200.2,
"noofreviews" : 1,
"outdooruse" : false,
"warrantycode" : null,
"australiasellable" : true,
"newrelease" : null,
"sku" : "VSTALDBED386_1",
"height" : 68.0,
"shortdescription" : "Loft Sofa Bed Wood Red Medium",
"commercialuse" : true,
"customisable" : true,
"weight" : "5656.0KG",
"typeCode" : "Bed - King Single",
"colour" : "Red",
"depth" : 51.0,
"name" : "LOFT SOFA BED WOOD RED MEDIUM",
"online" : true,
"materialcode" : "Fabric",
"assemblyrequired" : null,
"category" : [ ],
"stylecode" : "Contemporary"
},
"highlight" : {
"name" : [
"LOFT SOFA BED WOOD #kibana-highlighted-field#RED#/kibana-highlighted-field# MEDIUM"
]
}
}
]
}
}

Why doesn't kibana display all the search results?

Here is my search query:
GET /bank/_search?q=*&sort=account_number:asc&pretty
which matches all of the 1000 docs in the bank index:
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open bank LRl6fcZsSR6a0BMxIAQzIA 1 1 1000 0 414.3kb 414.3kb
green open .kibana_task_manager 2hiY91XzQQKAzmnXhpQLTA 1 0 2 0 12.8kb 12.8kb
green open .kibana_1 G4vY0_JASzqERwKlbqMqAg 1 0 4 0 14.7kb 14.7kb
yellow open customer 0B2gsBy3Rp-5vkMFhto-Wg 1 1 2 0 6.7kb 6.7kb
Below are my search results. Under "hits" at the top, you can see that there were 1000 hits, which is what I expected (all the _docs). Yet, kibana only displays 9 of the hits. Where are the rest?
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1000,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "0",
"_score" : null,
"_source" : {
"account_number" : 0,
"balance" : 16623,
"firstname" : "Bradshaw",
"lastname" : "Mckenzie",
"age" : 29,
"gender" : "F",
"address" : "244 Columbus Place",
"employer" : "Euron",
"email" : "bradshawmckenzie#euron.com",
"city" : "Hobucken",
"state" : "CO"
},
"sort" : [
0
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "1",
"_score" : null,
"_source" : {
"account_number" : 1,
"balance" : 39225,
"firstname" : "Amber",
"lastname" : "Duke",
"age" : 32,
"gender" : "M",
"address" : "880 Holmes Lane",
"employer" : "Pyrami",
"email" : "amberduke#pyrami.com",
"city" : "Brogan",
"state" : "IL"
},
"sort" : [
1
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"account_number" : 2,
"balance" : 28838,
"firstname" : "Roberta",
"lastname" : "Bender",
"age" : 22,
"gender" : "F",
"address" : "560 Kingsway Place",
"employer" : "Chillium",
"email" : "robertabender#chillium.com",
"city" : "Bennett",
"state" : "LA"
},
"sort" : [
2
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"account_number" : 3,
"balance" : 44947,
"firstname" : "Levine",
"lastname" : "Burks",
"age" : 26,
"gender" : "F",
"address" : "328 Wilson Avenue",
"employer" : "Amtap",
"email" : "levineburks#amtap.com",
"city" : "Cochranville",
"state" : "HI"
},
"sort" : [
3
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "4",
"_score" : null,
"_source" : {
"account_number" : 4,
"balance" : 27658,
"firstname" : "Rodriquez",
"lastname" : "Flores",
"age" : 31,
"gender" : "F",
"address" : "986 Wyckoff Avenue",
"employer" : "Tourmania",
"email" : "rodriquezflores#tourmania.com",
"city" : "Eastvale",
"state" : "HI"
},
"sort" : [
4
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "5",
"_score" : null,
"_source" : {
"account_number" : 5,
"balance" : 29342,
"firstname" : "Leola",
"lastname" : "Stewart",
"age" : 30,
"gender" : "F",
"address" : "311 Elm Place",
"employer" : "Diginetic",
"email" : "leolastewart#diginetic.com",
"city" : "Fairview",
"state" : "NJ"
},
"sort" : [
5
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"account_number" : 6,
"balance" : 5686,
"firstname" : "Hattie",
"lastname" : "Bond",
"age" : 36,
"gender" : "M",
"address" : "671 Bristol Street",
"employer" : "Netagy",
"email" : "hattiebond#netagy.com",
"city" : "Dante",
"state" : "TN"
},
"sort" : [
6
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "7",
"_score" : null,
"_source" : {
"account_number" : 7,
"balance" : 39121,
"firstname" : "Levy",
"lastname" : "Richard",
"age" : 22,
"gender" : "M",
"address" : "820 Logan Street",
"employer" : "Teraprene",
"email" : "levyrichard#teraprene.com",
"city" : "Shrewsbury",
"state" : "MO"
},
"sort" : [
7
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "8",
"_score" : null,
"_source" : {
"account_number" : 8,
"balance" : 48868,
"firstname" : "Jan",
"lastname" : "Burns",
"age" : 35,
"gender" : "M",
"address" : "699 Visitation Place",
"employer" : "Glasstep",
"email" : "janburns#glasstep.com",
"city" : "Wakulla",
"state" : "AZ"
},
"sort" : [
8
]
},
{
"_index" : "bank",
"_type" : "_doc",
"_id" : "9",
"_score" : null,
"_source" : {
"account_number" : 9,
"balance" : 24776,
"firstname" : "Opal",
"lastname" : "Meadows",
"age" : 39,
"gender" : "M",
"address" : "963 Neptune Avenue",
"employer" : "Cedward",
"email" : "opalmeadows#cedward.com",
"city" : "Olney",
"state" : "OH"
},
"sort" : [
9
]
}
]
}
}
Okay:
hits.hits – actual array of search results (defaults to first 10 documents)
You can control the size of what kibana outputs like this:
GET /bank/_search
{
"query": { "match_all": {} },
"size": 50
}
If size isn't specified:
GET /bank/_search
{
"query": { "match_all": {} },
}
then size defaults to 10.
By default the size parameter is set to a value of 10 and therefore you are able to see only 10 results. To get more results you can adjust this parameter according to you needs. Sometimes it would be better to use size parameter along with from parameter to get results page wise as in when not whole data is required in one go.
So either you can use "size": 1000 or you can set "from": 0, "size": 100 to get first 100 results and the keep on sending same query and just change the value of from param on each request. For e.g. to get next 100 results set "from": 100.
To get all 1000 results add size param as below:
{
"query":{
// your query here
},
"size": 1000
}
You can read more on from/size here.
As a query parameter you can add size as
GET /bank/_search?q=*&sort=account_number:asc&size=1000&pretty

Query with match to get all values for a given field! ElasticSearch

I'm pretty new to elastic search and would like to write a query for all of the values a specific field? I mean, say i have a field "Number" and "change_manager_group", is there a query to perform list all the numbers of which "change_manager_group = Change Managers - 2"
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 10,
"successful" : 10,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1700,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test-tem-changes",
"_type" : "_doc",
"_id" : "CHG0393073_1554800400000",
"_score" : 1.0,
"_source" : {
"work_notes" : "",
"priority" : "4 - Low",
"planned_start" : 1554800400000,
"Updated_by" : "system",
"Updated" : 1554819333000,
"phase" : "Requested",
"Number" : "CHG0312373",
"change_manager_group" : "Change Managers - 1",
"approval" : "Approved",
"downtime" : "false",
"close_notes" : "",
"Standard_template_version" : "",
"close_code" : null,
"actual_start" : 1554819333000,
"closed_by" : "",
"Type" : "Normal"
}
},
{
"_index" : "test-tem-changes",
"_type" : "_doc",
"_id" : "CHG0406522_0",
"_score" : 1.0,
"_source" : {
"work_notes" : "",
"priority" : "4 - Low",
"planned_start" : 0,
"Updated_by" : "svcmdeploy_automation",
"Updated" : 1553320559000,
"phase" : "Requested",
"Number" : "CHG041232",
"change_manager_group" : "Change Managers - 2",
"approval" : "Approved",
"downtime" : "false",
"close_notes" : "Change Installed",
"Standard_template_version" : "",
"close_code" : "Successful",
"actual_start" : 1553338188000,
"closed_by" : "",
"Type" : "Automated"
}
},
{
"_index" : "test-tem-changes",
"_type" : "_doc",
"_id" : "CHG0406526_0",
"_score" : 1.0,
"_source" : {
"work_notes" : "",
"priority" : "4 - Low",
"planned_start" : 0,
"Updated_by" : "svcmdeploy_automation",
"Updated" : 1553321854000,
"phase" : "Requested",
"Number" : "CHG0412326",
"change_manager_group" : "Change Managers - 2",
"approval" : "Approved",
"downtime" : "false",
"close_notes" : "Change Installed",
"Standard_template_version" : "",
"close_code" : "Successful",
"actual_start" : 1553339629000,
"closed_by" : "",
"Type" : "Automated"
}
},
I tried this after a bit of googling, but that errors out
curl -XGET "http://localhost:9200/test-tem-changes/_search?pretty=true" -H 'Content-Type: application/json' -d '
> {
> "query" : { "Number" : {"query" : "*"} }
> }
> '
What am i missing here?
To get all the documents where change_manager_group ==Change Managers - 2 you want to use a Term Query. Below I am wrapping it in a filter context so that it is faster (does not score relevance).
If change_manager_group is not a keyword mapped field, you may have to use change_manager_group.keyword depending on your mapping.
GET test-tem-changes/_search
{
"query": {
"bool": {
"filter": {
"term": {
"change_manager_group": "Change Managers - 2"
}
}
}
}
}

Why is ascending geo distance sorting faster than descending geo distance sorting

I'm using Elasticsearch 6.6 and have an index (1 shard, 1 replica) with the geonames (https://www.geonames.org/) dataset indexed (indexsize =1.3 gb, 11.8 mio geopoints).
I was playing around a bit with the geo distance sorting query, sorting the whole index for some origin points. So after some testing I saw that sorting ascending is always faster than sorting descending. here is an example query (i also tested with bigger "size"-parameter):
POST /geonames/_search?request_cache=false
{
"size":1,
"sort" : [
{
"_geo_distance" : {
"location" : [8, 49],
"order" : "asc",
"unit" : "m",
"mode" : "min",
"distance_type" : "arc",
"ignore_unmapped": true
}
}
]
}
Here is the answer for ascending sorting (with explain and profile True):
{
"took" : 1374,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 11858060,
"max_score" : null,
"hits" : [
{
"_shard" : "[geonames][0]",
"_node" : "qXTymyB9QLmxhPtGEtA_mA",
"_index" : "geonames",
"_type" : "doc",
"_id" : "L781LmkBrQo0YN4qP48D",
"_score" : null,
"_source" : {
"id" : "3034701",
"name" : "Forêt de Wissembourg",
"location" : {
"lat" : "49.00924",
"lon" : "8.01542"
}
},
"sort" : [
1523.4121312414704
],
"_explanation" : {
"value" : 1.0,
"description" : "*:*",
"details" : [ ]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[qXTymyB9QLmxhPtGEtA_mA][geonames][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time_in_nanos" : 265223567,
"breakdown" : {
"score" : 0,
"build_scorer_count" : 54,
"match_count" : 0,
"create_weight" : 10209,
"next_doc" : 253091268,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 11858087,
"score_count" : 0,
"build_scorer" : 263948,
"advance" : 0,
"advance_count" : 0
}
}
],
"rewrite_time" : 1097,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 1044167746,
"children" : [
{
"name" : "SimpleFieldCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 508296683
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
and here for descending, just switched the parameter from asc to desc (also with profile and explain):
{
"took" : 2226,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 11858060,
"max_score" : null,
"hits" : [
{
"_shard" : "[geonames][0]",
"_node" : "qXTymyB9QLmxhPtGEtA_mA",
"_index" : "geonames",
"_type" : "doc",
"_id" : "Mq80LmkBrQo0YN4q11bA",
"_score" : null,
"_source" : {
"id" : "4036351",
"name" : "Bollons Seamount",
"location" : {
"lat" : "-49.66667",
"lon" : "-176.16667"
}
},
"sort" : [
1.970427111052182E7
],
"_explanation" : {
"value" : 1.0,
"description" : "*:*",
"details" : [ ]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[qXTymyB9QLmxhPtGEtA_mA][geonames][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time_in_nanos" : 268521404,
"breakdown" : {
"score" : 0,
"build_scorer_count" : 54,
"match_count" : 0,
"create_weight" : 9333,
"next_doc" : 256458664,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 11858087,
"score_count" : 0,
"build_scorer" : 195265,
"advance" : 0,
"advance_count" : 0
}
}
],
"rewrite_time" : 1142,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 1898324618,
"children" : [
{
"name" : "SimpleFieldCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 1368306442
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
So my question is, why is it like this ? As I understood Es calculates the distance from the origin point to every other point and then sorts them. So why is the descending sorting so much slower ?
Asking the same question on the Elasticsearch board and getting an answer.
So apparantly Elasticsearch uses differnt searching strategies/algorithms for ascending end descending distance sorting.
For the descending sorting it calculates the distance from the origin to every point end then sorts.
For the ascending sorting it uses boundingboxes to filter points near the origin and only calculate the distances for points inside the boundingboxes.

Resources