Elasticsearch: sum of total term frequency in ONE document

Elasticsearch: sum of total term frequency in ONE document - elasticsearch

I need sumttf of ONE document in a field. However I can get sumttf of all documents only...
I need to be able to access the variable in script like _index['field'].sumttf() of that particular document. This is what I've got so far.
Mapping:
{"document2" : {
"mappings" : {
"document2" : {
"_all" : {
"enabled" : false
},
"properties" : {
"content" : {
"type" : "string",
"term_vector" : "yes",
"fields" : {
"with_shingles" : {
"type" : "string",
"analyzer" : "my_shingle_analyzer"
}
}
},
...
Term vector:
"_index" : "document2",
"_type" : "document2",
"_id" : "709718",
"_version" : 1,
"term_vectors" : {
"content" : {
"field_statistics" : {
"sum_doc_freq" : 60676474,
"doc_count" : 198373,
"sum_ttf" : 224960172
},
terms" : {
"0" : {
"term_freq" : 8
},
"0.5" : {
"term_freq" : 1
},
"003a0e45ea07a" : {
"term_freq" : 1
},
"005" : {
"term_freq" : 1
},
"0081989" : {
"term_freq" : 1
},
"01" : {
"term_freq" : 1
},
"01.08.2002" : {
"term_freq" : 1
},
...

Related

ElasticSearch - knn search. Sometimes returns _score = null

The more I pass an array to knn_vetcors, the more sources have _score=null
For example - I sent array with length 2 and I got 3 results with valid _score. But if i sent array with length 60 I got all results with _score is null
Request
{
"_source":[],
"collapse":{
"field":"id"
},
"query":{
"knn":{
"vector":{
"k":10,
"vector":[
0,
// array size - 46
0
]
}
}
},
"size":100,
"track_scores":false
}
Response (first and second scores is null but third is float)
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 7,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "sb_index_images_ba7587a1-35ab-482f-93d8-a433dd132556_1667904180",
"_type" : "_doc",
"_id" : "207445df53a7b54c76ff76c0bec352c9",
"_score" : null,
"fields" : {
"id" : [
"377007"
]
}
},
{
"_index" : "sb_index_images_ba7587a1-35ab-482f-93d8-a433dd132556_1667904180",
"_type" : "_doc",
"_id" : "ea374a9b90d83ab93a77fb03226cafd3",
"_score" : null,
"fields" : {
"id" : [
"377009"
]
}
},
{
"_index" : "sb_index_images_ba7587a1-35ab-482f-93d8-a433dd132556_1667904180",
"_type" : "_doc",
"_id" : "1f93035d08e2b7af7d482a89f36e3c7c",
"_score" : 0.134376,
"fields" : {
"id" : [
"377014"
]
}
}
]
}
}
Mapping my index
{
"sb_index_images_ba7587a1-35ab-482f-93d8-a433dd132556_1667904180" : {
"mappings" : {
"properties" : {
"colors" : {
"type" : "long"
},
"colors_vector" : {
"type" : "knn_vector",
"dimension" : 9
},
"id" : {
"type" : "keyword"
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"params" : {
"properties" : {
"0d23f34d9f2168ab98e5542149eb2f3d" : {
"properties" : {
"name" : {
"type" : "keyword",
"ignore_above" : 256
},
"value" : {
"type" : "keyword",
"eager_global_ordinals" : true,
"ignore_above" : 256,
"fields" : {
"float" : {
"type" : "float",
"ignore_malformed" : true
}
}
}
}
}
}
},
"vector" : {
"type" : "knn_vector",
"dimension" : 2048
}
}
}
}
}

Elasticsearch ngram tokenizer returns all results regardless of query input

I am trying to build a query to search for records in the following format: TR000002_1_2020.
Users should be able to search for results the following ways:
TR000002 or 2_1_2020 or TR000002_1_2020 or 2020. I figured an ngram tokenization query would be best suited for my needs. I am using Elasticsearch 6.8 so I cannot use the built in Search-As-You-Type introduced in E7.
Here's my implementation I followed from docs here. The only thing I modified was EdgeNGram -> NGram as the user can search from any point of the text.
My Analysis block looks like this:
.Analysis(a => a
.Analyzers(aa => aa
.Custom("autocomplete", ca => ca
.Tokenizer("autocomplete")
.Filters(new string[] {
"lowercase"
})
)
.Custom("autocomplete_search", ca => ca
.Tokenizer("lowercase")
)
)
.Tokenizers(t => t
.NGram("autocomplete", e => e
.MinGram(2)
.MaxGram(16)
.TokenChars(new TokenChar[] {
TokenChar.Letter,
TokenChar.Digit,
TokenChar.Punctuation,
TokenChar.Symbol
})
)
)
)
Then in my mapping I define:
.Text(t => t
.Name(tr => tr.TestRecordId)
.Analyzer("autocomplete")
.SearchAnalyzer("autocomplete_search")
)
When I search for TR000002, my query returns all results instead of just the records that contain those specific characters. What am I doing wrong? Is there a better tokenizer for this specific use case? Thanks!
EDIT: Here's a sample of what is returned:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 27,
"max_score" : 0.105360515,
"hits" : [
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "3",
"_score" : 0.105360515,
"_source" : {
"id" : 3,
"testRecordId" : "TR000002_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 12,
"testStatus" : {
"testStatusId" : 12,
"name" : "Complete: Postponed Until Further Notice"
},
"discriminator" : "SingleEventEffectsRecord",
"testRecordServiceOrders" : [
{
"testRecordId" : 3,
"serviceOrderId" : 9,
"serviceOrder" : {
"serviceOrderId" : 9,
"serviceOrderNumber" : "105702"
}
}
],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
},
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"sEETestRates" : [ ]
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "11",
"_score" : 0.105360515,
"_source" : {
"id" : 11,
"testRecordId" : "TR000011_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"partLDC" : "12",
"waferLot" : "1",
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"testStartDate" : "2020-07-30T00:00:00",
"actualCompletionDate" : "2020-07-31T00:00:00"
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "17",
"_score" : 0.105360515,
"_source" : {
"id" : 17,
"testRecordId" : "TR000017_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "lewallen",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false
}
},
Also here's what shows for mapping:
"testRecordId" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
I guess I should also mention, I've been testing this query in the console like so:
GET test-records-development/_search
{
"query": {
"match": {
"testRecordId": {
"query": "TR000002_1_2020"
}
}
}
}
EDIT 2: Added API response from index _settings endpoint:
{
"test-records-development-09-09-2020-02-00-00" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "test-records-development-09-09-2020-02-00-00",
"creation_date" : "1599617013874",
"analysis" : {
"analyzer" : {
"autocomplete" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "autocomplete"
},
"autocomplete_search" : {
"type" : "custom",
"tokenizer" : "lowercase"
}
},
"tokenizer" : {
"autocomplete" : {
"token_chars" : [
"letter",
"digit",
"punctuation",
"symbol"
],
"min_gram" : "2",
"type" : "ngram",
"max_gram" : "16"
}
}
},
"number_of_replicas" : "0",
"uuid" : "FSeCa0YwRCOJVbjfxYGkig",
"version" : {
"created" : "6080199"
}
}
}
}
}

As I don't have the analyzer setting access in JSON format,I can't confirm it but most probably issue is with your search analyzer autocomplete_search which is creating search time tokens which are matching the index time tokens.
For example: you are searching for TR000002_1_2020 and if it creates 2020 as a token and for document containing TR000011_1_2020 also creates a 2020 token than your query will match it.
You can use the analyze API to check the generated tokens based on a analyzer and as mentioned earlier mostly there is some tokens which are matching as shown above.

Why are my completion suggester options empty?

I'm currently trying to setup my suggestion implementation.
My index settings / mappings:
{
"settings" : {
"analysis" : {
"analyzer" : {
"trigrams" : {
"tokenizer" : "mesh_default_ngram_tokenizer",
"filter" : [ "lowercase" ]
},
"suggestor" : {
"type" : "custom",
"tokenizer" : "standard",
"char_filter" : [ "html_strip" ],
"filter" : [ "lowercase" ]
}
},
"tokenizer" : {
"mesh_default_ngram_tokenizer" : {
"type" : "nGram",
"min_gram" : "3",
"max_gram" : "3"
}
}
}
},
"mappings" : {
"default" : {
"properties" : {
"uuid" : {
"type" : "string",
"index" : "not_analyzed"
},
"language" : {
"type" : "string",
"index" : "not_analyzed"
},
"fields" : {
"properties" : {
"content" : {
"type" : "string",
"index" : "analyzed",
"analyzer" : "trigrams",
"fields" : {
"suggest" : {
"type" : "completion",
"analyzer" : "suggestor"
}
}
}
}
}
}
}
}
My query:
{
"suggest": {
"query-suggest" : {
"text" : "som",
"completion" : {
"field" : "fields.content.suggest"
}
}
},
"_source": ["fields.content", "uuid", "language"]
}
The query result:
{
"took" : 44,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 0.0,
"hits" : [ {
"_index" : "node-08c5d084d4e842b385d084d4e8a2b301-fe6212a62ad94590a212a62ad9759026-44874a2a8d2e4483874a2a8d2e44830c-draft",
"_type" : "default",
"_id" : "c6b7391075cc437ab7391075cc637a05-en",
"_score" : 0.0,
"_source" : {
"language" : "en",
"fields" : {
"content" : "This is<pre>another set of <strong>important</strong>content s<b>om</b>e text with more content you can poke a stick at"
},
"uuid" : "c6b7391075cc437ab7391075cc637a05"
}
}, {
"_index" : "node-08c5d084d4e842b385d084d4e8a2b301-fe6212a62ad94590a212a62ad9759026-44874a2a8d2e4483874a2a8d2e44830c-draft",
"_type" : "default",
"_id" : "96e2c6765b6841fea2c6765b6871fe36-en",
"_score" : 0.0,
"_source" : {
"language" : "en",
"fields" : {
"content" : "This is<pre>another set of <strong>important</strong>content no text with more content you can poke a stick at"
},
"uuid" : "96e2c6765b6841fea2c6765b6871fe36"
}
}, {
"_index" : "node-08c5d084d4e842b385d084d4e8a2b301-fe6212a62ad94590a212a62ad9759026-44874a2a8d2e4483874a2a8d2e44830c-draft",
"_type" : "default",
"_id" : "fd1472555e9d4d039472555e9d5d0386-en",
"_score" : 0.0,
"_source" : {
"language" : "en",
"fields" : {
"content" : "This is<pre>another set of <strong>important</strong>content someth<strong>ing</strong> completely different"
},
"uuid" : "fd1472555e9d4d039472555e9d5d0386"
}
}, {
"_index" : "node-08c5d084d4e842b385d084d4e8a2b301-fe6212a62ad94590a212a62ad9759026-44874a2a8d2e4483874a2a8d2e44830c-draft",
"_type" : "default",
"_id" : "5a3727b134064de4b727b134063de4c4-en",
"_score" : 0.0,
"_source" : {
"language" : "en",
"fields" : {
"content" : "This is<pre>another set of <strong>important</strong>content some<strong>what</strong> strange content"
},
"uuid" : "5a3727b134064de4b727b134063de4c4"
}
}, {
"_index" : "node-08c5d084d4e842b385d084d4e8a2b301-fe6212a62ad94590a212a62ad9759026-44874a2a8d2e4483874a2a8d2e44830c-draft",
"_type" : "default",
"_id" : "865257b6be4340c69257b6be4340c603-en",
"_score" : 0.0,
"_source" : {
"language" : "en",
"fields" : {
"content" : "This is<pre>another set of <strong>important</strong>content some <strong>more</strong> content you can poke a stick at too"
},
"uuid" : "865257b6be4340c69257b6be4340c603"
}
} ]
},
"suggest" : {
"query-suggest" : [ {
"text" : "som",
"offset" : 0,
"length" : 3,
"options" : [ ]
} ]
}
}
I'm currently using Elasticsearch 2.4.6 and I can't update
There are 5 document in my index and only 4 contain the word "some".
Why do I see 5 hits but no options?
The options are not empty if I start my suggest text with the first word of the field string. (e.g: this)
Is my usage of the suggest feature valid when dealing with fields that contain full html pages? I'm not sure whether the feature was meant to handle many tokens per document.
I already tried to use ngram tokenizer for my suggestor analyzer but that did not change the situation. Any hint or feedback would be appreciated.

It seems that the issue I'm seeing is a restriction is completion suggesters:
Matching always starts at the beginning of the text. So, for example, “Smi” will match “Smith, Fed” but not “Fed Smith”. However, you could list both “Smith, Fed” and “Fed Smith” as two different inputs for the one output.
http://rea.tech/implementing-autosuggest-in-elasticsearch/

Fetching unique data in Elasticsearch

I have following data
ID: 1, fldname: pawan
ID: 1, fldname: pawan1
ID: 1, fldname: pawan2
ID: 2, fldname: pawan3
ID: 3, fldname: pawan4
ID: 4, fldname: pawan5
I am trying to get unique data based on ID field, similar to what we get in MySQL while firing group by queries like:
select * from table_name where fldname like 'pawan%' group by ID
This will return unique values. Same works in sphinx search when we use group by function.
Is there any way to get unique values in elasticsearch..?
Below is my sample mapping:
"mappings": {
"my_type": {
"properties": {
"docid": {
"type": "keyword"
},
"flgname": {
"type": "text"
}
}
}
}

I suggest that you slightly modify your mapping:
{
"record" : {
"dynamic" : "false",
"_all" : {
"enabled" : false
},
"properties" : {
"docid" : {
"type" : "long"
},
"flgname" : {
"type" : "text"
}
}
}
}
so that docid is a long
Then you could try fuzzy queries for filtering, together with aggregations, like this one here which retrieves the minimum, maximum, average and count of docid's:
{
"from" : 0,
"size" : 10,
"_source" : true,
"query" : {
"bool" : {
"must" : [ {
"match" : {
"flgname" : {
"query" : "pawan",
"operator" : "OR",
"fuzziness" : "1",
"prefix_length" : 1,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
}
} ]
}
},
"aggs" : {
"my_cardinality" : {
"cardinality" : {
"field" : "docid"
}
},
"my_avg" : {
"avg" : {
"field" : "docid"
}
},
"my_min" : {
"min" : {
"field" : "docid"
}
},
"my_max" : {
"max" : {
"field" : "docid"
}
}
}
}
By the way this is the result of the above query on the data you proposed:
{
"took" : 47,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"failed" : 0
},
"hits" : {
"total" : 6,
"max_score" : 0.9808292,
"hits" : [ {
"_index" : "stack_overflow1",
"_type" : "record",
"_id" : "40b5eac0-743b-4a6a-a06d-3ae4d56f4aca",
"_score" : 0.9808292,
"_source" : {
"docid" : "1",
"flgname" : "pawan"
}
}, {
"_index" : "stack_overflow1",
"_type" : "record",
"_id" : "27821c39-e722-4361-bc07-0dcd5181a1ad",
"_score" : 0.7846634,
"_source" : {
"docid" : "2",
"flgname" : "pawan3"
}
}, {
"_index" : "stack_overflow1",
"_type" : "record",
"_id" : "86fcd9c1-a688-4a6a-9c45-e91791a8b902",
"_score" : 0.7846634,
"_source" : {
"docid" : "4",
"flgname" : "pawan5"
}
}, {
"_index" : "stack_overflow1",
"_type" : "record",
"_id" : "fb00a3cc-f1b8-4073-8808-f2ddbc4979e2",
"_score" : 0.55451775,
"_source" : {
"docid" : "1",
"flgname" : "pawan1"
}
}, {
"_index" : "stack_overflow1",
"_type" : "record",
"_id" : "18e5e20d-17a7-4d59-b2f1-7bf325a4c4df",
"_score" : 0.55451775,
"_source" : {
"docid" : "3",
"flgname" : "pawan4"
}
}, {
"_index" : "stack_overflow1",
"_type" : "record",
"_id" : "fbf49af6-f574-4ad2-8686-cbbedc5e70c4",
"_score" : 0.23014566,
"_source" : {
"docid" : "1",
"flgname" : "pawan2"
}
} ]
},
"aggregations" : {
"my_cardinality" : {
"value" : 4
},
"my_max" : {
"value" : 4.0
},
"my_avg" : {
"value" : 2.0
},
"my_min" : {
"value" : 1.0
}
}
}

If you make flgname also a keyword, then you can use sub-aggregation to aggregate over docID and subaggregate over flgname. Result will be similar to the SQL query you mentioned.
Query would look like:
{ "size": 0,
"query": {
"regexp":{
"flgname": "pawa.*"
}
},
"aggs" : {
"docids": {
"terms": {"field": "docid"},
"aggs": { "flgnam": { "terms": {"field": "flgname"}}}}
}
}

Elasticsearch wildcard query with spaces

I'm trying to do a wildcard query with spaces. It easily matches the words on term basis but not on field basis.
I've read the documentation which says that I need to have the field as not_analyzed but with this type set, it returns nothing.
This is the mapping with which it works on term basis:
{
"denshop" : {
"mappings" : {
"products" : {
"properties" : {
"code" : {
"type" : "string"
},
"id" : {
"type" : "long"
},
"name" : {
"type" : "string"
},
"price" : {
"type" : "long"
},
"url" : {
"type" : "string"
}
}
}
}
}
}
This is the mapping with which the exact same query returns nothing:
{
"denshop" : {
"mappings" : {
"products" : {
"properties" : {
"code" : {
"type" : "string"
},
"id" : {
"type" : "long"
},
"name" : {
"type" : "string",
"index" : "not_analyzed"
},
"price" : {
"type" : "long"
},
"url" : {
"type" : "string"
}
}
}
}
}
}
The query is here:
curl -XPOST http://127.0.0.1:9200/denshop/products/_search?pretty -d '{"query":{"wildcard":{"name":"*test*"}}}'
Response with the not_analyzed property:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
Response without not_analyzed:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [ {
...
EDIT: Adding requested info
Here is the list of documents:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 1.0,
"hits" : [ {
"_index" : "denshop",
"_type" : "products",
"_id" : "3L1",
"_score" : 1.0,
"_source" : {
"id" : 3,
"name" : "Testovací produkt 2",
"code" : "",
"price" : 500,
"url" : "http://www.denshop.lh/damske-obleceni/testovaci-produkt-2/"
}
}, {
"_index" : "denshop",
"_type" : "products",
"_id" : "4L1",
"_score" : 1.0,
"_source" : {
"id" : 4,
"name" : "Testovací produkt 3",
"code" : "",
"price" : 666,
"url" : "http://www.denshop.lh/damske-obleceni/testovaci-produkt-3/"
}
}, {
"_index" : "denshop",
"_type" : "products",
"_id" : "2L1",
"_score" : 1.0,
"_source" : {
"id" : 2,
"name" : "Testovací produkt",
"code" : "",
"price" : 500,
"url" : "http://www.denshop.lh/damske-obleceni/testovaci-produkt/"
}
}, {
"_index" : "denshop",
"_type" : "products",
"_id" : "5L1",
"_score" : 1.0,
"_source" : {
"id" : 5,
"name" : "Testovací produkt 4",
"code" : "",
"price" : 666,
"url" : "http://www.denshop.lh/damske-obleceni/testovaci-produkt-4/"
}
}, {
"_index" : "denshop",
"_type" : "products",
"_id" : "6L1",
"_score" : 1.0,
"_source" : {
"id" : 6,
"name" : "Testovací produkt 5",
"code" : "",
"price" : 666,
"url" : "http://www.denshop.lh/tricka-tilka-tuniky/testovaci-produkt-5/"
}
} ]
}
}
Without the not_analyzed it returns with this:
curl -XPOST http://127.0.0.1:9200/denshop/products/_search?pretty -d '{"query":{"wildcard":{"name":"*testovací*"}}}'
But not with this (notice the space before asterisk):
curl -XPOST http://127.0.0.1:9200/denshop/products/_search?pretty -d '{"query":{"wildcard":{"name":"*testovací *"}}}'
When I add the not_analyzed to mapping, it returns no hits no matter what I put in the wildcard query.

Add a custom analyzer that should lowercase the text. Then in your search query, before passing the text to it have it lowercased in your client application.
To, also, keep the original analysis chain, I've added a sub-field to your name field that will use the custom analyzer.
PUT /denshop
{
"settings": {
"analysis": {
"analyzer": {
"keyword_lowercase": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase"
]
}
}
}
},
"mappings": {
"products": {
"properties": {
"name": {
"type": "string",
"fields": {
"lowercase": {
"type": "string",
"analyzer": "keyword_lowercase"
}
}
}
}
}
}
}
And the query will work on the sub-field:
GET /denshop/products/_search
{
"query": {
"wildcard": {
"name.lowercase": "*testovací *"
}
}
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Elasticsearch: sum of total term frequency in ONE document - elasticsearch

Related

ElasticSearch - knn search. Sometimes returns _score = null

Elasticsearch ngram tokenizer returns all results regardless of query input

Why are my completion suggester options empty?

Fetching unique data in Elasticsearch

Elasticsearch wildcard query with spaces

Categories

Resources