Remove stopwords while querying using GET Request in Elasticsearch - elasticsearch

I'm trying to implement Stop Token Filter in an index of Elasticsearch. I've following code taken from here.
PUT /test1
{
"settings": {
"analysis": {
"filter": {
"my_stop": {
"type": "stop",
"stopwords": "_english_"
}
}
}
}
}
I have my data stored in JSON format and have a field named as Ingredients which contains stopped words. I want to search through the whole index (containing almost 80k records) about the top 100 most appeared values in Ingredients tag. The query I'm using to retrieve the results is
GET test1/_search?size=0&pretty
{
"aggs": {
"genres": {
"terms": {
"field": "Ingredients",
"size": 100,
"exclude": "[0-9].*"
}
}
}
}
I need to exclude Numbers from it for which I'm using exclude.
But applying the above query using Kibana it doesn't remove the Stop Words and keeps them displayed while querying response.
As per the documentation, it should remove the stopped words but it isn't doing that. I'm unable to find the cause as I'm a newbie in Elasticsearch. Please help me figure it out.
I'm using elasticsearch-7.3.1 and Kibana-7.3.1.
I'm working over it for about two days but none of the methods is working. Thanks! Any help would be really appreciated.
If I try it using this way, it works but while putting the GET request as per the method defined above, it doesn't work at all.
POST test1/_analyze
{
"analyzer": "my_stop",
"text": "House of Dickson<br> corp"
}
My Mapping
{
"recipe_test" : {
"aliases" : { },
"mappings" : {
"properties" : {
"Author" : {
"properties" : {
"additionalInfo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"eval" : {
"type" : "boolean"
},
"url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Channel" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Cousine" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Ingredients" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"fielddata" : true
},
"Keywords" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"MakingMethod" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Publication" : {
"properties" : {
"additionalInfo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"eval" : {
"type" : "boolean"
},
"published" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Rating" : {
"properties" : {
"bestRating" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ratingCount" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ratingValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"worstRating" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Servings" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Timings" : {
"properties" : {
"cookTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"prepTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"totalTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"Title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"recipe_url" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "1",
"provided_name" : "recipe_test",
"creation_date" : "1567443878756",
"analysis" : {
"filter" : {
"english_stop" : {
"type" : "stop",
"stopwords" : "_english_"
}
},
"analyzer" : {
"rebuilt_stop" : {
"filter" : [
"asciifolding",
"lowercase",
"english_stop"
],
"tokenizer" : "standard"
}
}
},
"number_of_replicas" : "1",
"uuid" : "K-FrOyc6QlWokGQoN6HxCg",
"version" : {
"created" : "7030199"
}
}
}
}
}
My Example Data
{
"recipe_url": "http1742637/bean-and-pesto-mash",
"Channel": "waqas",
"recipe_id":"31",
"Title": "Bean & pesto mash",
"Rating": {
"ratingValue": "4.625",
"bestRating": "5",
"worstRating": "1",
"ratingCount": "8"
},
"Timings": {
"cookTime": "PT5M",
"prepTime": "PT5M",
"totalTime": "PT10M"
},
"Author": {
"eval": false,
"value": "dfgkkdfgdfgfmes",
"url": "https://www.example.com/",
"additionalInfo": "Recipe from Good Food magazine, ",
"description": "Substitute potatoes with pulses for a healthy alternative mash with a chunky texture",
"published": "November 2011"
},
"Publication": {
"eval": false,
"value": "",
"url": "",
"additionalInfo": "",
"published": ""
},
"Nutrition": "per serving",
"NutritionContents": {
"kcal": "183",
"fat": "5g",
"saturates": "1g",
"carbs": "25g",
"sugars": "3g",
"fibre": "7g",
"protein": "11g",
"salt": "0.84g"
},
"SkillLevel": "Easy",
"Ingredients": [
"drizzle", "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus" , "Asparagus"
],
"MakingMethod": [
"Heat the oil in a large saucepan. Add the beans and cook for 3-4 mins until hot through. Lightly mash with a potato masher for a chunky texture. Stir through the pesto and season. To serve, drizzle with a little olive oil, if you like."
],
"Keywords": [
"Cannellini bean",
"Cannellini beans",
"Mash",
"Beans",
"Super healthy",
"Pulses",
"5-a-day",
"Low fat",
"Diet",
"Dieting",
"Side dish",
"Bangers and mash",
"Sausage and mash",
"Texture",
"Fireworks",
"Pesto",
"Easy",
"Vegetarian",
"Healthy",
"Bonfire Night"
],
"Category": [
"Side dish",
"Dinner"
],
"Cousine": "British",
"Servings": "Serves 4"
}

There is no easy way on how to do this.
Option 1
Enable fielddata on the text field on which you applied the correct analyzer. Something like this:
{
"settings": {
"analysis": {
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "_english_"
}
},
"analyzer": {
"rebuilt_stop": {
"filter": [
"asciifolding",
"lowercase",
"english_stop"
],
"tokenizer": "standard"
}
}
}
},
"mappings": {
"properties": {
"Ingredients": {
"type": "text",
"analyzer": "rebuilt_stop",
"fielddata": true
}
}
}
}
And then you run your terms aggregation.
Drawbacks: it could use a lot of memory because of fielddata usage.
Option 2
Use term vectors API. Since you are interested in the most used "values"/"terms" in Ingredients field you could call this API on one document in the index and have the total term frequency for each term in that specific document. The drawback: you need to specify a certain document ID and only the terms in that document will be reported back.
Something like this:
GET /test/_termvectors/1
{
"fields" : ["Ingredients"],
"offsets" : false,
"payloads" : false,
"positions" : false,
"term_statistics" : true,
"field_statistics" : false
}
Option 3
Probably the ugliest one. Something around these lines: Elasticsearch: index a field with keyword tokenizer but without stopwords
Advantage: is not using fielddata (heap memory). Drawback: you'd have to define the stopwords manually in the char_filter definition.

Related

combining a multi_match and a geo_bounding_box query in elasticSearch not returning results

I'm writing an elasticSearch query against an index that contains lat/long. It is indexed as correct type to work with geoSpatial queries.
I am trying to aggregate places based on the current bounding box in a mapbox map and getting the bounds. The map also has a search box where user search with some string. Combining both the search and geoBounding, I am forming the following query.
{
"from":0,
"size":100,
"track_total_hits":true,
"sort":[
{
"place_name.keyword":"asc"
}
],
"query":{
"bool":{
"must":[
{
"multi_match":{
"query":"w",
"fields":[
"place_name^3",
"properties.top_category",
"properties.brands"
],
"operator":"and"
}
},
{
"geo_bounding_box":{
"location.point":{
"top_right":{
"lat":38.89450183333278,
"lon":-90.38570942514077
},
"bottom_left":{
"lat":38.88102629071099,
"lon":-90.40970118570218
}
}
}
}
]
}
}
}
But the query returns 0 hits.
When I run just the multi_match separately and the geo_bounding_box separately both returns the results as expected. So I'm not sure what I'm missing here.
I tried this as well using one as a filter
{
"from":0,
"size":100,
"track_total_hits":true,
"sort":[
{
"place_name.keyword":"asc"
}
],
"query":{
"bool":{
"must":
{
"multi_match":{
"query":"W",
"fields":[
"place_name^3",
"properties.top_category",
"properties.brands"
],
"operator":"and"
}
},
"filter":{
"geo_bounding_box":{
"location.point":{
"top_right":{
"lat":38.89450183333278,
"lon":-90.38570942514077
},
"bottom_left":{
"lat":38.88102629071099,
"lon":-90.40970118570218
}
}
}
}
}
}
}
Getting same issue. I know the data exists because calling with only the geo_bounding_box returns this data. But not when I combine it with a multi_match, the data should match because the place name matches.
Update: Added the index mapping
{
"places_here_integration" : {
"mappings" : {
"properties" : {
"location" : {
"properties" : {
"DMA_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"DMA_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"county_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"county_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"geo_point" : {
"type" : "geo_point",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"point" : {
"type" : "geo_shape"
},
"polygon" : {
"type" : "geo_shape"
},
"state_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"state_usps" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"place_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"properties" : {
"properties" : {
"address" : {
"properties" : {
"city" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"region" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"state" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"street_address" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"zip_code" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"brands" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"created_datetime" : {
"type" : "date"
},
"created_user" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ids" : {
"properties" : {
"building_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"parkingarea_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"place_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"related_place_ids" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tenantspace_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"naics_code" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"operating_information" : {
"properties" : {
"operating_hours" : {
"properties" : {
"fri" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"mon" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"sat" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"sun" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"thu" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"tue" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"wed" : {
"properties" : {
"close" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"open" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
},
"operating_hours_note" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"phone_number" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"place_category_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"sub_category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"top_category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}

Elasticsearch - I am struggling to write a query on a field where 2 criteria need to be met

I am struggling to write a query on a field where 2 criteria need to be met for a dashboard in Kibana. My field name is test:keyword and I need the results to be where Test A and Test B have the result:keyword (another field) as PASS
{ "query": {
"match_phrase": {
"test.keyword": "EOL_Overall_test_result" }
}
}
so I need another criteria and test.keyword:"EOL_flash_app_fw"
and these both need to have the result as:
result.keyword:"PASS"
{
"mte" : {
"mappings" : {
"properties" : {
"EESWVer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"acdID" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"board" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"create" : {
"properties" : {
"board" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"device" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"reason" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"result" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"test" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timeStamp" : {
"type" : "date"
}
}
},
"device" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"reason" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"result" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"test" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timeStamp" : {
"type" : "date"
}
}
}
}
}
DOCUMENT SAMPLE
{
"_index": "mte",
"_type": "result",
"_id": "fY1Amn4BTPepfjg1c5x5",
"_version": 1,
"_score": 1,
"_source": {
"timeStamp": "2022-01-27T14:37:01+08:00",
"test": "EOL_Overall_test_result",
"hostname": "eol-test-0",
"acdID": "0x00A2F16A",
"EESWVer": "0.3.0",
"device": "",
"result": "PASS",
"reason": "0b001111111110011011111111",
"board": "JENMUL90"
},
"fields": {
"acdID.keyword": [
"0x00A2F16A"
],
"reason": [
"0b001111111110011011111111"
],
"device.keyword": [
""
],
"test": [
"EOL_Overall_test_result"
],
"test.keyword": [
"EOL_Overall_test_result"
],
"result.keyword": [
"PASS"
],
"EESWVer.keyword": [
"0.3.0"
],
"board.keyword": [
"JENMU90"
],
"result": [
"PASS"
],
"timeStamp": [
"2022-01-27T06:37:01.000Z"
],
"hostname": [
"eol-test-0"
],
"reason.keyword": [
"0b001111111110011011111111"
],
"acdID": [
"0x00A2F16A"
],
"EESWVer": [
"0.3.0"
],
"hostname.keyword": [
"eol-test-0"
],
"device": [
""
],
"board": [
"JENMUL90"
]
}
}
Can you try this query, as far as I understand, it should work the way you expect (not sure though as the test field seems to only contain one single value):
{
"query": {
"bool": {
"filter": [
{
"terms": {
"test.keyword": [
"EOL_Overall_test_result",
"EOL_flash_app_fw"
]
}
},
{
"term": {
"result.keyword": "PASS"
}
}
]
}
}
}

Elasticsearch not returning result with same token?

Data inserted in ElasticSearch is in korean so I cannot present exact case but let's say
i have a word ABBCC that has been tokenized as ["A","BBCC"] and another word AZZXXXtokenized as ["A","ZZXXX"].
if i search ABBCC, then shouldn't AZZXXX come up since they have same token? or is this not how elasticsearch work?
this is how I checked analyzed words:
GET recpost_test/_analyze
{
"analyzer": "my_analyzer",
"text":"my query String!"
}
this is how i created my index:
PUT recpost
{
"settings": {
"index": {
"analysis": {
"tokenizer": {
"nori_user_dict": {
"type": "nori_tokenizer",
"decompound_mode": "mixed",
"user_dictionary": "userdict_ko.txt"
}
},
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "nori_user_dict"
}
},
"filter": {
"substring": {
"type": "edgeNGram",
"min_gram": 1,
"max_gram": 10
}
}
}
}
}
}
this is how i searched:
GET recpost/_search
{
"_source": [""],
"from": 0,
"size": 2,
"query":{
"multi_match": {
"query" : "my query String!",
"type": "best_fields",
"fields" : [
"brandkor",
"content",
"itemname",
"name",
"review",
"shortreview^2",
"title^3"]
}
}
}
EDIT:
I tried adding "analyzer" field to search and still doesn't work
GET recpost/_search
{
"_source": [""],
"from": 0,
"size": 2,
"query":{
"multi_match": {
"query" : "깡스",
"analyzer": "my_analyzer",
"type": "best_fields",
"fields" : [
"brandkor",
"content",
"itemname",
"name",
"review",
"shortreview^2",
"title^3"]
}
}
}
EDIT2: This is my mapping:
{
"recpost_test" : {
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"brandkor" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"content" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"field_statistics" : {
"type" : "boolean"
},
"fields" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"itemname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"offsets" : {
"type" : "boolean"
},
"payloads" : {
"type" : "boolean"
},
"positions" : {
"type" : "boolean"
},
"review" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"shortreview" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"term_statistics" : {
"type" : "boolean"
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
I dont see that you mounted your fields to your index(mapping).
so for all I know, is that you're indexing all of the fields (brandkor, content, ...etc) as text .. and basically you're matching exact values.
unless you correlated each field with its analyzer.

Has_parent query returns no hits in elasticsearch 6.5.4

I'm currently trying to find all children of parents that match certain query using the following has_parent query:
GET my_index*/_search
{
"query": {
"has_parent": {
"parent_type": "threat",
"query": {
"term": {
"type.keyword": {
"value": "ip"
}
}
}
}
}
}
But it returns no hits, even with a match_all query.
The mapping of the index is as follows:
"my_index" : {
"mappings" : {
"doc" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"asn_info" : {
"properties" : {
"as_org" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"asn" : {
"type" : "long"
}
}
},
"campaign" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"category_description" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"confidence" : {
"type" : "float"
},
"criticity" : {
"type" : "float"
},
"detection_date" : {
"type" : "float"
},
"feed" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"feeds" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"geo" : {
"properties" : {
"city_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_code2" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_code3" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"country_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"latitude" : {
"type" : "float"
},
"longitude" : {
"type" : "float"
}
}
},
"hierarchy" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"threat" : "date"
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ip" : {
"type" : "long"
},
"ip_address" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"port" : {
"type" : "long"
},
"subcategory" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "date"
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
As you can see, the hierarchy field is a join field, with "threat" defined as parent of "date". I don't see any problem with this. Does anyone know what could be happening?

How to create an elasticsearch k/v json field

I have the following json object:
{
"AVAILED_NOT_LIVE": {
"ExceptionType": "Avail'd - Not Live",
"ExceptionReason": "Clear For Rent",
"ExpectedValue": "Cleared for VOD (Yes)",
"FoundValue": "Cleared for VOD (No)"
},
"LIVE_NOT_AVAILED": {
"ExceptionType": "Live - Not Avail'd",
"ExceptionReason": "--",
"ExpectedValue": "No",
"FoundValue": "Yes"
}
}
How would I create an index for this field? So far I have:
"properties": {
"ExceptionType": { "type": "text" },
"ExceptionReason": { "type": "text" },
"ExpectedValue": { "type": "text" },
"FoundValue": { "type": "text" },
},
But this is missing the key (AVAILED_NOT_LIVE, LIVE_NOT_AVAILED), that may have various values. How would I dot hat mapping?
Generally I'd just insert a document and then customize the generated dynamic mapping:
Your 2 initial requests:
PUT /foo/_doc/1
{
"AVAILED_NOT_LIVE": {
"ExceptionType": "Avail'd - Not Live",
"ExceptionReason": "Clear For Rent",
"ExpectedValue": "Cleared for VOD (Yes)",
"FoundValue": "Cleared for VOD (No)"
},
"LIVE_NOT_AVAILED": {
"ExceptionType": "Live - Not Avail'd",
"ExceptionReason": "--",
"ExpectedValue": "No",
"FoundValue": "Yes"
}
}
GET /foo/_mapping
Response for the mapping, which you could then customize:
{
"foo" : {
"mappings" : {
"_doc" : {
"properties" : {
"AVAILED_NOT_LIVE" : {
"properties" : {
"ExceptionReason" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ExceptionType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ExpectedValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"FoundValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"LIVE_NOT_AVAILED" : {
"properties" : {
"ExceptionReason" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ExceptionType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ExpectedValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"FoundValue" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
}
PS: From the mapping it's also getting clear how nested objects are being mapped — by having properties in a property.

Resources