Elasticsearch not returning result with same token? - elasticsearch

Data inserted in ElasticSearch is in korean so I cannot present exact case but let's say
i have a word ABBCC that has been tokenized as ["A","BBCC"] and another word AZZXXXtokenized as ["A","ZZXXX"].
if i search ABBCC, then shouldn't AZZXXX come up since they have same token? or is this not how elasticsearch work?
this is how I checked analyzed words:
GET recpost_test/_analyze
{
"analyzer": "my_analyzer",
"text":"my query String!"
}
this is how i created my index:
PUT recpost
{
"settings": {
"index": {
"analysis": {
"tokenizer": {
"nori_user_dict": {
"type": "nori_tokenizer",
"decompound_mode": "mixed",
"user_dictionary": "userdict_ko.txt"
}
},
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "nori_user_dict"
}
},
"filter": {
"substring": {
"type": "edgeNGram",
"min_gram": 1,
"max_gram": 10
}
}
}
}
}
}
this is how i searched:
GET recpost/_search
{
"_source": [""],
"from": 0,
"size": 2,
"query":{
"multi_match": {
"query" : "my query String!",
"type": "best_fields",
"fields" : [
"brandkor",
"content",
"itemname",
"name",
"review",
"shortreview^2",
"title^3"]
}
}
}
EDIT:
I tried adding "analyzer" field to search and still doesn't work
GET recpost/_search
{
"_source": [""],
"from": 0,
"size": 2,
"query":{
"multi_match": {
"query" : "깡스",
"analyzer": "my_analyzer",
"type": "best_fields",
"fields" : [
"brandkor",
"content",
"itemname",
"name",
"review",
"shortreview^2",
"title^3"]
}
}
}
EDIT2: This is my mapping:
{
"recpost_test" : {
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"brandkor" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"content" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"field_statistics" : {
"type" : "boolean"
},
"fields" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"itemname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"offsets" : {
"type" : "boolean"
},
"payloads" : {
"type" : "boolean"
},
"positions" : {
"type" : "boolean"
},
"review" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"shortreview" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"term_statistics" : {
"type" : "boolean"
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}

I dont see that you mounted your fields to your index(mapping).
so for all I know, is that you're indexing all of the fields (brandkor, content, ...etc) as text .. and basically you're matching exact values.
unless you correlated each field with its analyzer.

Related

Elasticsearch - I am struggling to write a query on a field where 2 criteria need to be met

I am struggling to write a query on a field where 2 criteria need to be met for a dashboard in Kibana. My field name is test:keyword and I need the results to be where Test A and Test B have the result:keyword (another field) as PASS
{ "query": {
"match_phrase": {
"test.keyword": "EOL_Overall_test_result" }
}
}
so I need another criteria and test.keyword:"EOL_flash_app_fw"
and these both need to have the result as:
result.keyword:"PASS"
{
"mte" : {
"mappings" : {
"properties" : {
"EESWVer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"acdID" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"board" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"create" : {
"properties" : {
"board" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"device" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"reason" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"result" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"test" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timeStamp" : {
"type" : "date"
}
}
},
"device" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"reason" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"result" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"test" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timeStamp" : {
"type" : "date"
}
}
}
}
}
DOCUMENT SAMPLE
{
"_index": "mte",
"_type": "result",
"_id": "fY1Amn4BTPepfjg1c5x5",
"_version": 1,
"_score": 1,
"_source": {
"timeStamp": "2022-01-27T14:37:01+08:00",
"test": "EOL_Overall_test_result",
"hostname": "eol-test-0",
"acdID": "0x00A2F16A",
"EESWVer": "0.3.0",
"device": "",
"result": "PASS",
"reason": "0b001111111110011011111111",
"board": "JENMUL90"
},
"fields": {
"acdID.keyword": [
"0x00A2F16A"
],
"reason": [
"0b001111111110011011111111"
],
"device.keyword": [
""
],
"test": [
"EOL_Overall_test_result"
],
"test.keyword": [
"EOL_Overall_test_result"
],
"result.keyword": [
"PASS"
],
"EESWVer.keyword": [
"0.3.0"
],
"board.keyword": [
"JENMU90"
],
"result": [
"PASS"
],
"timeStamp": [
"2022-01-27T06:37:01.000Z"
],
"hostname": [
"eol-test-0"
],
"reason.keyword": [
"0b001111111110011011111111"
],
"acdID": [
"0x00A2F16A"
],
"EESWVer": [
"0.3.0"
],
"hostname.keyword": [
"eol-test-0"
],
"device": [
""
],
"board": [
"JENMUL90"
]
}
}
Can you try this query, as far as I understand, it should work the way you expect (not sure though as the test field seems to only contain one single value):
{
"query": {
"bool": {
"filter": [
{
"terms": {
"test.keyword": [
"EOL_Overall_test_result",
"EOL_flash_app_fw"
]
}
},
{
"term": {
"result.keyword": "PASS"
}
}
]
}
}
}

Search match multiple values in single field in Elasticsearch

I'm using Elasticsearch for search for all document has string REQUEST and partnerId=2960 and customerId= in message field
I'm using this query but nothing return
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"message": "REQUEST"
}
},
{
"match_phrase": {
"message": "partnerId=2960"
}
},
{
"match_phrase": {
"message": "customerId="
}
}
]
}
}
}
response:
{
"took" : 213,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
My data:
{
"_index": "demoapp-log_2021.05.31",
"_type": "_doc",
"_id": "epU5wXkBQRhN_CfKNayH",
"_version": 1,
"_score": null,
"_source": {
"tags": [
"beats_input_codec_plain_applied",
"demoapp-log"
],
"message": [
"2021-05-31 14:00:21,433 INFO c.m.v.w.r.CollectionService_v_2_0_0 Line 434 - Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}",
"Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}"
],
"type": "beats",
"agent": {
"hostname": "web01"
},
"host": {},
"loglevel": "INFO",
"logger": "c.m.v.w.r.CollectionService_v_2_0_0",
"line": "434",
"ecs": {},
"input": {
"type": "log"
},
"#version": "1",
"fields": {
"service": "payapp",
"app_id": "demoapp-log"
},
"log": {
"file": {
"path": "/home/collection/live/logs/c_api.log"
}
},
"#timestamp": "2021-05-31T07:00:21.433Z",
"index_day": "2021.05.31"
},
"fields": {
"#timestamp": [
"2021-05-31T07:00:21.433Z"
]
},
"sort": [
1622444421433
]
}
(https://pastebin.com/UZWmXLpq )
My mapping for index
{
"demoapp-log_2021.05.31" : {
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"agent" : {
"properties" : {
"ephemeral_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"ecs" : {
"properties" : {
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"fields" : {
"properties" : {
"app_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"max_bytes" : {
"type" : "long"
},
"service" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"host" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"index_day" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"input" : {
"properties" : {
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"line" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log" : {
"properties" : {
"file" : {
"properties" : {
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"flags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"offset" : {
"type" : "long"
}
}
},
"logger" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"loglevel" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
(https://pastebin.com/8CMm94MY )
(I updated my answer below based on your comment)
Your search doesn't work because "REQUEST" doesn't appear in your message as a standalone word but attached to other word like: "partnerRequestId" or "_REQUEST_".
So if you want "REQUEST" to match with "_REQUEST_" in a case sensitive manner, you must change the analyzer of the message field.
By default the analyzer doesn't split on underscore.
First, you need to create a new index with a custom mapping (you can later reindex your existing index into this new one).
Example of an index with only the "message" field and an analyser that split on underscore and non word character (see the doc for more on tokenizer and analyzer):
PUT my-index
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "underscore"
}
},
"tokenizer": {
"underscore": {
"type": "pattern",
"pattern": "[_\\W]+"
}
}
}
},
"mappings" : {
"properties" : {
"message" : {
"type" : "text",
"analyzer":"my_analyzer"
}
}
}
}
Then you can add your document to this new index:
POST my-index/_doc
{
"message": [
"2021-05-31 14:00:21,433 INFO c.m.v.w.r.CollectionService_v_2_0_0 Line 434 - Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}",
"Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}"
]
}
And finally run this search that does the job:
GET my-index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"message": "partnerId=2960"
}
},
{
"match": {
"message": "customerId"
}
},
{
"match":{
"message": "REQUEST"
}
}
]
}
}
}
Also I don't know if it is intentional or not but the "message" field is indexed as both a text and a keyword. Looks to me like you don't need the keyword on this field.

Apply query in elasticsearch to get true/false value without filtering

I have got this kind of mapping on my ES index
{
"vabaco_dhp_development_persons" : {
"mappings" : {
"person" : {
"properties" : {
"active" : {
"type" : "boolean"
},
"booking_resources" : {
"type" : "nested",
"properties" : {
"available_days" : {
"type" : "nested",
"properties" : {
"available_date" : {
"type" : "text"
},
"last_slot_time" : {
"type" : "date"
}
}
},
"booking_resource_detail" : {
"properties" : {
"from_age" : {
"type" : "long"
},
"to_age" : {
"type" : "long"
}
}
},
"booking_resource_price" : {
"type" : "float"
},
"booking_resource_restriction" : {
"properties" : {
"insurer_restrictions" : {
"properties" : {
"insurer_ic" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"booking_resource_service_restrictions" : {
"type" : "nested",
"properties" : {
"insurance_service_code" : {
"type" : "text"
},
"insurer_provider_ic" : {
"type" : "text"
},
"location_id" : {
"type" : "integer"
}
}
},
"city_id" : {
"type" : "integer"
},
"doctor_languages" : {
"properties" : {
"language_id" : {
"type" : "integer"
}
}
},
"doctor_speciality" : {
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
},
"location" : {
"properties" : {
"address" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"brand_name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"district" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"id" : {
"type" : "integer"
},
"name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"user_profile_city" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
},
"searchable_text" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"service_provision_type" : {
"properties" : {
"id" : {
"type" : "integer"
},
"id_name" : {
"type" : "text"
},
"name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
},
"covid_19" : {
"type" : "boolean"
},
"first_name" : {
"properties" : {
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"id" : {
"type" : "integer"
},
"last_name" : {
"properties" : {
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"person_star" : {
"properties" : {
"avg" : {
"type" : "float"
},
"disable_rating" : {
"type" : "boolean"
},
"star_count" : {
"type" : "integer"
}
}
},
"priority_city" : {
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"type" : "object"
},
"priority" : {
"type" : "integer"
}
}
},
"uuid" : {
"type" : "text"
},
"views" : {
"type" : "integer"
}
}
}
}
}
}
I want to apply query to filter based on "booking_resource_service_restrictions" field, but in some cases I just need to know if this query, if applied, would filter particular record, which means I need to get some true/false value for every record, based on this filter(but data should not be filtered), is there any way to extract this kind of information for every record?
sample query looks like this, this is how I filter data based on "booking_resource_service_restrictions" filed but as I already mentioned in some cases I want to know if this filter would work if applied and get true/false values for every record without actually filtering them
GET vabaco_dhp_development_persons/_search
{
"query": {
"bool":{
"must":[
{
"term":{
"active": true
}
},
{
"nested":{
"path":"booking_resources",
"query":{
"bool":{
"must":[
{
"nested":{
"path":"booking_resources.booking_resource_service_restrictions",
"query":{
"bool":{
"should":[
{
"bool":{
"must":[
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurer_provider_ic":"204919008"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurance_service_code":"11111"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.location_id": 1
}
}
]
}
},
{
"bool":{
"must":[
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurer_provider_ic":"204919008"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurance_service_code":"33333"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.location_id": 1
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}
}
]
}
}
}
}
]
}
}
}
In general what I need is a way to know if record a satisfies some filtering requirements and base on this get true/false values(or data equivalent to true/false)
You can use named queries to give names to your queries and filters using "_name" field.
Add your query in a should clause so that it does not affect the documents returned.
When you search, you need to look for "matched_queries" array in the result to find names of all queries matched for that particular document.
{
"_index": "testindex",
"_type": "employee",
"_id": "2",
"_score": 0.19178301,
"_source": {
"name": "Barkha Jain"
},
"matched_queries": [
"query on name field"
]
}
Read about named queries here https://www.elastic.co/guide/en/elasticsearch/reference/7.11/query-dsl-bool-query.html#named-queries

Remove stopwords while querying using GET Request in Elasticsearch

I'm trying to implement Stop Token Filter in an index of Elasticsearch. I've following code taken from here.
PUT /test1
{
"settings": {
"analysis": {
"filter": {
"my_stop": {
"type": "stop",
"stopwords": "_english_"
}
}
}
}
}
I have my data stored in JSON format and have a field named as Ingredients which contains stopped words. I want to search through the whole index (containing almost 80k records) about the top 100 most appeared values in Ingredients tag. The query I'm using to retrieve the results is
GET test1/_search?size=0&pretty
{
"aggs": {
"genres": {
"terms": {
"field": "Ingredients",
"size": 100,
"exclude": "[0-9].*"
}
}
}
}
I need to exclude Numbers from it for which I'm using exclude.
But applying the above query using Kibana it doesn't remove the Stop Words and keeps them displayed while querying response.
As per the documentation, it should remove the stopped words but it isn't doing that. I'm unable to find the cause as I'm a newbie in Elasticsearch. Please help me figure it out.
I'm using elasticsearch-7.3.1 and Kibana-7.3.1.
I'm working over it for about two days but none of the methods is working. Thanks! Any help would be really appreciated.
If I try it using this way, it works but while putting the GET request as per the method defined above, it doesn't work at all.
POST test1/_analyze
{
"analyzer": "my_stop",
"text": "House of Dickson<br> corp"
}
My Mapping
{
"recipe_test" : {
"aliases" : { },
"mappings" : {
"properties" : {
"Author" : {
"properties" : {
"additionalInfo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"eval" : {
"type" : "boolean"
},
"url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Channel" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Cousine" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Ingredients" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"fielddata" : true
},
"Keywords" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"MakingMethod" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Publication" : {
"properties" : {
"additionalInfo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"eval" : {
"type" : "boolean"
},
"published" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Rating" : {
"properties" : {
"bestRating" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ratingCount" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ratingValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"worstRating" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Servings" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Timings" : {
"properties" : {
"cookTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"prepTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"totalTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"recipe_url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "1",
"provided_name" : "recipe_test",
"creation_date" : "1567443878756",
"analysis" : {
"filter" : {
"english_stop" : {
"type" : "stop",
"stopwords" : "_english_"
}
},
"analyzer" : {
"rebuilt_stop" : {
"filter" : [
"asciifolding",
"lowercase",
"english_stop"
],
"tokenizer" : "standard"
}
}
},
"number_of_replicas" : "1",
"uuid" : "K-FrOyc6QlWokGQoN6HxCg",
"version" : {
"created" : "7030199"
}
}
}
}
}
My Example Data
{
"recipe_url": "http1742637/bean-and-pesto-mash",
"Channel": "waqas",
"recipe_id":"31",
"Title": "Bean & pesto mash",
"Rating": {
"ratingValue": "4.625",
"bestRating": "5",
"worstRating": "1",
"ratingCount": "8"
},
"Timings": {
"cookTime": "PT5M",
"prepTime": "PT5M",
"totalTime": "PT10M"
},
"Author": {
"eval": false,
"value": "dfgkkdfgdfgfmes",
"url": "https://www.example.com/",
"additionalInfo": "Recipe from Good Food magazine, ",
"description": "Substitute potatoes with pulses for a healthy alternative mash with a chunky texture",
"published": "November 2011"
},
"Publication": {
"eval": false,
"value": "",
"url": "",
"additionalInfo": "",
"published": ""
},
"Nutrition": "per serving",
"NutritionContents": {
"kcal": "183",
"fat": "5g",
"saturates": "1g",
"carbs": "25g",
"sugars": "3g",
"fibre": "7g",
"protein": "11g",
"salt": "0.84g"
},
"SkillLevel": "Easy",
"Ingredients": [
"drizzle", "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus"
],
"MakingMethod": [
"Heat the oil in a large saucepan. Add the beans and cook for 3-4 mins until hot through. Lightly mash with a potato masher for a chunky texture. Stir through the pesto and season. To serve, drizzle with a little olive oil, if you like."
],
"Keywords": [
"Cannellini bean",
"Cannellini beans",
"Mash",
"Beans",
"Super healthy",
"Pulses",
"5-a-day",
"Low fat",
"Diet",
"Dieting",
"Side dish",
"Bangers and mash",
"Sausage and mash",
"Texture",
"Fireworks",
"Pesto",
"Easy",
"Vegetarian",
"Healthy",
"Bonfire Night"
],
"Category": [
"Side dish",
"Dinner"
],
"Cousine": "British",
"Servings": "Serves 4"
}
There is no easy way on how to do this.
Option 1
Enable fielddata on the text field on which you applied the correct analyzer. Something like this:
{
"settings": {
"analysis": {
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "_english_"
}
},
"analyzer": {
"rebuilt_stop": {
"filter": [
"asciifolding",
"lowercase",
"english_stop"
],
"tokenizer": "standard"
}
}
}
},
"mappings": {
"properties": {
"Ingredients": {
"type": "text",
"analyzer": "rebuilt_stop",
"fielddata": true
}
}
}
}
And then you run your terms aggregation.
Drawbacks: it could use a lot of memory because of fielddata usage.
Option 2
Use term vectors API. Since you are interested in the most used "values"/"terms" in Ingredients field you could call this API on one document in the index and have the total term frequency for each term in that specific document. The drawback: you need to specify a certain document ID and only the terms in that document will be reported back.
Something like this:
GET /test/_termvectors/1
{
"fields" : ["Ingredients"],
"offsets" : false,
"payloads" : false,
"positions" : false,
"term_statistics" : true,
"field_statistics" : false
}
Option 3
Probably the ugliest one. Something around these lines: Elasticsearch: index a field with keyword tokenizer but without stopwords
Advantage: is not using fielddata (heap memory). Drawback: you'd have to define the stopwords manually in the char_filter definition.

Has_parent query returns no hits in elasticsearch 6.5.4

I'm currently trying to find all children of parents that match certain query using the following has_parent query:
GET my_index*/_search
{
"query": {
"has_parent": {
"parent_type": "threat",
"query": {
"term": {
"type.keyword": {
"value": "ip"
}
}
}
}
}
}
But it returns no hits, even with a match_all query.
The mapping of the index is as follows:
"my_index" : {
"mappings" : {
"doc" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"asn_info" : {
"properties" : {
"as_org" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"asn" : {
"type" : "long"
}
}
},
"campaign" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"category_description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"confidence" : {
"type" : "float"
},
"criticity" : {
"type" : "float"
},
"detection_date" : {
"type" : "float"
},
"feed" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"feeds" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"geo" : {
"properties" : {
"city_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_code2" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_code3" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"latitude" : {
"type" : "float"
},
"longitude" : {
"type" : "float"
}
}
},
"hierarchy" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"threat" : "date"
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ip" : {
"type" : "long"
},
"ip_address" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"port" : {
"type" : "long"
},
"subcategory" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "date"
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
As you can see, the hierarchy field is a join field, with "threat" defined as parent of "date". I don't see any problem with this. Does anyone know what could be happening?

Resources