ElasticSearch inconsistent relevance - elasticsearch

I'm using elasticsearch to do search for movies by the actors that played in them. When I search for e.g. "leonardo dicaprio" there are 10 or so movies that I get back but they all have a different score. Since they all have the same actor I would expect them to have the same score. Is anyone able to shed some light on why this is happening and hopefully how to stop it?
Elasticsearch version 1.7.2
Mapping:
{
"programs": {
"mappings": {
"program_doc_type": {
"properties": {
"cast": {
"type": "string",
"analyzer": "keyword_analyzer",
"fields": {
"name": {
"type": "string",
"analyzer": "name_analyzer"
}
}
},
"django_id": {
"type": "integer"
},
"has_poster": {
"type": "boolean"
},
"imdb_id": {
"type": "string",
"index": "not_analyzed"
},
"kind": {
"type": "string",
"index": "not_analyzed"
},
"record_url_count": {
"type": "integer"
},
"release_date": {
"type": "date",
"format": "dateOptionalTime"
},
"release_year": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "pattern"
},
"tms_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
Analyzers:
"analysis": {
"analyzer": {
"keyword_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "keyword"
},
"name_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "whitespace"
}
}
}
Query:
{
"query": {
"match": {"cast.name": "leonardo dicaprio"}
}
}
First Page Result:
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 62,
"max_score": 12.046804,
"hits": [
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1077511",
"_score": 12.046804,
"_source": {
"imdb_id": "tt4007278",
"tms_id": "",
"record_url_count": 0,
"release_date": "2014-08-20",
"title": "Carbon",
"has_poster": false,
"release_year": 2014,
"django_id": 1077511,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "812919",
"_score": 11.906615,
"_source": {
"imdb_id": "tt2076929",
"tms_id": "",
"record_url_count": 0,
"title": "Satori",
"has_poster": false,
"release_year": 2014,
"django_id": 812919,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "376792",
"_score": 11.886408,
"_source": {
"imdb_id": "tt0402538",
"tms_id": "",
"record_url_count": 0,
"title": "Titanic: The Premiere",
"has_poster": true,
"release_year": 2000,
"django_id": 376792,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "306106",
"_score": 11.69776,
"_source": {
"imdb_id": "tt0325727",
"tms_id": "",
"record_url_count": 0,
"release_date": "1998-08-16",
"title": "Leo Mania",
"has_poster": true,
"release_year": 1998,
"django_id": 306106,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "269743",
"_score": 9.637444,
"_source": {
"imdb_id": "tt0286234",
"tms_id": "",
"record_url_count": 0,
"title": "Total Eclipse",
"has_poster": false,
"release_year": 1995,
"django_id": 269743,
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Agnieszka Holland"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "840945",
"_score": 9.358208,
"_source": {
"imdb_id": "tt2195237",
"tms_id": "",
"record_url_count": 0,
"release_date": "2004-12-01",
"title": "MovieReal: The Aviator",
"has_poster": false,
"release_year": 2004,
"django_id": 840945,
"kind": "series",
"cast": [
"Leonardo DiCaprio",
"Martin Scorsese"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "382168",
"_score": 9.358208,
"_source": {
"imdb_id": "tt0408269",
"tms_id": "",
"record_url_count": 0,
"release_date": "1998-09-29",
"title": "To Leo with Love",
"has_poster": true,
"release_year": 1998,
"django_id": 382168,
"kind": "movie",
"cast": [
"Jo Wyatt",
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "846212",
"_score": 7.2280827,
"_source": {
"imdb_id": "tt2218442",
"tms_id": "",
"record_url_count": 0,
"title": "Legacy of Secrecy",
"has_poster": false,
"release_year": 1947,
"django_id": 846212,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio",
"Robert De Niro",
"D'Anthony Palms"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "595027",
"_score": 7.1439695,
"_source": {
"imdb_id": "tt1294988",
"tms_id": "",
"record_url_count": 0,
"release_date": "2006-09-27",
"title": "Emporio Armani 'Red' One Night Only",
"has_poster": false,
"release_year": 2006,
"django_id": 595027,
"kind": "movie",
"cast": [
"Kim Cattrall",
"Leonardo DiCaprio",
"Beyoncé Knowles"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "752646",
"_score": 7.1439695,
"_source": {
"imdb_id": "tt1826731",
"tms_id": "",
"record_url_count": 0,
"release_date": "2009-06-02",
"title": "Lives of Quiet Desperation: The Making of Revolutionary Road",
"has_poster": false,
"release_year": 2009,
"django_id": 752646,
"kind": "movie",
"cast": [
"Kathy Bates",
"Leonardo DiCaprio",
"Kate Winslet"
]
}
}
]
}
}
UPDATE:
I disabled field length norm and that seems to have improved it a lot but they still aren't all the same. I'm still confused. According to what i've read there are three ways to determine relevancy:
Term frequency
Inverse document frequency
Field length norm (disabled)
Since each program only has Leonardo Dicaprio one time it seems to me that they should have identical scores but they don't. Maybe i'm misunderstanding. Here are the updated settings after disabling field length norm:
Mapping:
{
"programs": {
"mappings": {
"program_doc_type": {
"properties": {
"cast": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "keyword_analyzer",
"fields": {
"name": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "name_analyzer"
}
}
},
"django_id": {
"type": "integer"
},
"has_poster": {
"type": "boolean"
},
"imdb_id": {
"type": "string",
"index": "not_analyzed"
},
"kind": {
"type": "string",
"index": "not_analyzed"
},
"record_url_count": {
"type": "integer"
},
"release_date": {
"type": "date",
"format": "dateOptionalTime"
},
"release_year": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "pattern"
},
"tms_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
First Page Result:
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 836,
"max_score": 13.778852,
"hits": [
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "421026",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 421026,
"imdb_id": "tt0449557",
"has_poster": false,
"release_date": "2005-05-24",
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Jeffrey M. Schwartz",
"Donald L. Barlett",
"James B. Steele"
],
"release_year": 2005,
"record_url_count": 0,
"title": "The Affliction of Howard Hughes: Obsessive-Compulsive Disorder"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "555015",
"_score": 13.778852,
"_source": {
"tms_id": "MV002510340000",
"django_id": 555015,
"imdb_id": "tt1130884",
"has_poster": true,
"release_date": "2010-02-19",
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Mark Ruffalo",
"Ben Kingsley",
"Max von Sydow"
],
"release_year": 2010,
"record_url_count": 2,
"title": "Shutter Island"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "104669",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 104669,
"imdb_id": "tt0108330",
"has_poster": true,
"release_date": "1993-04-23",
"kind": "movie",
"cast": [
"Robert De Niro",
"Ellen Barkin",
"Leonardo DiCaprio",
"Jonah Blechman"
],
"release_year": 1993,
"record_url_count": 1,
"title": "This Boy's Life"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "846212",
"_score": 13.778852,
"_source": {
"django_id": 846212,
"title": "Legacy of Secrecy",
"imdb_id": "tt2218442",
"has_poster": false,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio",
"Robert De Niro",
"D'Anthony Palms"
],
"release_year": 1947,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "256632",
"_score": 13.778852,
"_source": {
"django_id": 256632,
"title": "The Movie Show",
"imdb_id": "tt0271918",
"has_poster": false,
"kind": "series",
"cast": [
"Ray Brady",
"Russell Crowe",
"Larry Day",
"Leonardo DiCaprio"
],
"release_year": 1986,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "269743",
"_score": 13.778852,
"_source": {
"django_id": 269743,
"title": "Total Eclipse",
"imdb_id": "tt0286234",
"has_poster": false,
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Agnieszka Holland"
],
"release_year": 1995,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1007190",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 1007190,
"imdb_id": "tt3391950",
"has_poster": false,
"release_date": "2013-12-29",
"kind": "series",
"cast": [
"Leonardo DiCaprio",
"Jonah Hill",
"Martin Scorsese",
"Terence Winter"
],
"release_year": 2013,
"record_url_count": 0,
"title": "The Hollywood Reporter in Focus"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1077511",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 1077511,
"imdb_id": "tt4007278",
"has_poster": false,
"release_date": "2014-08-20",
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
],
"release_year": 2014,
"record_url_count": 0,
"title": "Carbon"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "302615",
"_score": 13.57246,
"_source": {
"django_id": 302615,
"title": "Directors: James Cameron",
"imdb_id": "tt0322031",
"has_poster": true,
"kind": "movie",
"cast": [
"Michael Biehn",
"James Cameron",
"Jamie Lee Curtis",
"Leonardo DiCaprio"
],
"release_year": 1997,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "509785",
"_score": 13.57246,
"_source": {
"tms_id": "",
"django_id": 509785,
"imdb_id": "tt0923573",
"has_poster": false,
"release_date": "2003-05-06",
"kind": "movie",
"cast": [
"Frank Abagnale Jr.",
"Amy Adams",
"Nathalie Baye",
"Leonardo DiCaprio"
],
"release_year": 2003,
"record_url_count": 0,
"title": "'Catch Me If You Can': The Casting of the Film"
}
}
]
}
}
The results are MUCH improved but still the last 2 have different scores than the rest of the results.

Elasticsearch relevancy default model is called TF/IDF. You can read more about it here.
The _score you see in your search hits is calculated by this model.
Basically, the score is a result of a calculation on three factors (more info here):
Term frequency - How often does a term appear in a specific document? TF
Inverse document frequency - How often does the term appear in all documents in the collection? IDF
Field-length norm - How long is the field?
As you can infer from the above, because each document that contains leonardo dicaprio is different in its matching terms count, length of fields and matching terms count all over the index, its relevancy score is different.
Nevertheless, you get high scores for documents that contains leonardo dicaprio than those who doesn't.
Hope it helps.

Related

Edge n-gram suggestions and 'starts with' keyword in Elasticsearch

I'm trying to build a food search engine on Elasticsearch that should meet following use cases -
If the user searches for 'coff' then it should return all the documents with phrase 'coffee' in their name and the priority should be for food items that have 'coffee' at the starting of their name.
If the user searches for 'green tea' then it should give priority to the documents that have both the phrases 'green tea' instead of splitting 'green' and 'tea'
If the phrase does not exist in the 'name' then it should also search in the alias field.
To manage the first case, I've used the edge n-grams analyzer.
Mapping -
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"analyzer_keyword": {
"tokenizer": "standard",
"filter": "lowercase"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"doc": {
"properties": {
"alias": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"search_analyzer": "analyzer_keyword",
"analyzer": "edge_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
This is the search query that I'm using but it's not exactly returning the relevant search results
{
"query": {
"multi_match": {
"query": "coffee",
"fields": ["name^2", "alias"]
}
}
}
There are over 1500 food items with 'coffee' in their name but the above query is only returning 2
{
"took": 745,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 8.657346,
"hits": [
{
"_index": "food-master",
"_type": "doc",
"_id": "a9uzinABb4g7LgmgoI1I",
"_score": 8.657346,
"_source": {
"id": 17463,
"name": "Rotiboy, coffee bun",
"alias": [
"Mexican Coffee Bun (Rotiboy)",
"Mexican coffee bun"
],
}
},
{
"_index": "food-master",
"_type": "doc",
"_id": "TNuzinABb4g7LgmgoFVI",
"_score": 7.0164866,
"_source": {
"id": 1344,
"name": "Coffee with sugar",
"alias": [
"Heart Friendly",
"Coffee With Sugar",
"Coffee With Milk and Sugar",
"Gluten Free",
"Hypertension Friendly"
],
}
}
]
}
}
In the mapping, if I remove the analyzer_keyword then it returns relevant results but the documents that start with 'coffee' are not prioritized
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1323,
"max_score": 57.561867,
"hits": [
{
"_index": "food-master-new",
"_type": "doc",
"_id": "nduzinABb4g7LgmgoINI",
"_score": 57.561867,
"_source": {
"name": "Egg Coffee",
"alias": [],
"id": 12609
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "dNuzinABb4g7LgmgoFVI",
"_score": 55.811295,
"_source": {
"name": "Coffee (Black)",
"alias": [
"Weight Loss",
"Diabetes Friendly",
"Gluten Free",
"Lactose Free",
"Heart Friendly",
"Hypertension Friendly"
],
"id": 1341
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "NduzinABb4g7LgmgoHxI",
"_score": 54.303185,
"_source": {
"name": "Brewed Coffee",
"alias": [
"StarBucks"
],
"id": 15679
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "ltuzinABb4g7LgmgoJJI",
"_score": 54.303185,
"_source": {
"name": "Coffee - Masala",
"alias": [],
"id": 11329
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "oduzinABb4g7LgmgoGpI",
"_score": 53.171227,
"_source": {
"name": "Coffee, German",
"alias": [],
"id": 12257
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "YNuzinABb4g7LgmgoFRI",
"_score": 52.929176,
"_source": {
"name": "Soy Milk Coffee",
"alias": [
"Gluten Free",
"Lactose Free",
"Weight Loss",
"Diabetes Friendly",
"Heart Friendly",
"Hypertension Friendly"
],
"id": 978
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "8duzinABb4g7LgmgoFRI",
"_score": 52.068523,
"_source": {
"name": "Cold Coffee (Soy Milk)",
"alias": [
"Soy Milk"
],
"id": 1097
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "tNuzinABb4g7LgmgoF9I",
"_score": 50.956154,
"_source": {
"name": "Coffee Frappe",
"alias": [],
"id": 3142
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "ZduzinABb4g7LgmgoF5I",
"_score": 49.810112,
"_source": {
"name": "Big Apple Coffee",
"alias": [],
"id": 3130
}
},
{
"_index": "food-master-new",
"_type": "doc",
"_id": "eduzinABb4g7LgmgoHtI",
"_score": 49.62197,
"_source": {
"name": "Mexican Coffee",
"alias": [],
"id": 13604
}
}
]
}
}
If I change the tokenizer to 'keyword' from 'standard' then I face the same problem and it also splits phrases into individual words - 'green tea' to 'green' and 'tea'
Any suggestions on what I might be getting wrong with respect to analyzers? I've tried all possible combinations but meeting all 3 scenarios with high accuracy is getting a little difficult.

Accurate query confusion on Elasticsearch

Here I simulated batch of mock data:
{
"took": 35,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 9000009,
"max_score": 1,
"hits": [
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7B",
"_score": 1,
"_source": {
"operation_name": "ADD_IFC",
"hlrsn": "51",
"user_name": "boss2",
"business_type": "VoLTE",
"task_id": "a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "460082279570892",
"msisdn": "8618882291205",
"content": """2017-11-06 05:39:27,871|User:boss2| id:a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871 |{"HLRSN":"51","operationName":"ADD_IFC","ISDN":"8618882291205"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7C",
"_score": 1,
"_source": {
"operation_name": "BAT_RMV_EPSDATA",
"hlrsn": "50",
"user_name": "boss3",
"business_type": "OVERHEAD",
"task_id": "a-6dbf64ee-81e9-4ef4-8b05-664a7fc3f47b#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "460050840482507",
"msisdn": "8618178395664",
"content": """2017-11-06 05:39:27,871|User:boss3| id:a-6dbf64ee-81e9-4ef4-8b05-664a7fc3f47b#1509946767871 |{"HLRSN":"50","operationName":"BAT_RMV_EPSDATA","ISDN":"8618178395664"}"""
}
},
...
I want to query data according to a specific task_id :
GET /boss-mock/soap-mock/_search
{
"query": {
"match": {
"task_id": "a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871"
}
}
}
response:
{
"took": 66,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 9000009,
"max_score": 68.65554,
"hits": [
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7B",
"_score": 68.65554,
"_source": {
"operation_name": "ADD_IFC",
"hlrsn": "51",
"user_name": "boss2",
"business_type": "VoLTE",
"task_id": "a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "460082279570892",
"msisdn": "8618882291205",
"content": """2017-11-06 05:39:27,871|User:boss2| id:a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871 |{"HLRSN":"51","operationName":"ADD_IFC","ISDN":"8618882291205"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7K",
"_score": 20.13632,
"_source": {
"operation_name": "ADD_TPLSUB",
"hlrsn": "53",
"user_name": "boss1",
"business_type": "OVERHEAD",
"task_id": "a-931b0935-a0d4-46fa-b403-7c1075a1d7a7#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "",
"msisdn": "8618509192307",
"content": """2017-11-06 05:39:27,871|User:boss1| id:a-931b0935-a0d4-46fa-b403-7c1075a1d7a7#1509946767871 |{"HLRSN":"53","operationName":"ADD_TPLSUB","ISDN":"8618509192307"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P06bepN_SAQa2S9uQ",
"_score": 17.619738,
"_source": {
"operation_name": "DEA_BOICEXHC",
"hlrsn": "52",
"user_name": "boss3",
"business_type": "VOICE",
"task_id": "a-cc771389-8712-46fa-8f9b-0e64e4fc38e6#1509946485051",
"response_time": "2017-11-06T05:34:45.051Z",
"imsi": "",
"msisdn": "8618914540349",
"content": """2017-11-06 05:34:45,051|User:boss3| id:a-cc771389-8712-46fa-8f9b-0e64e4fc38e6#1509946485051 |{"HLRSN":"52","operationName":"DEA_BOICEXHC","ISDN":"8618914540349"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15kQpN_SAQa2UP6w",
"_score": 12.451507,
"_source": {
"operation_name": "LST_STNSR",
"hlrsn": "51",
"user_name": "boss1",
"business_type": "",
"task_id": "a-30e82392-8817-48ed-8c3d-f4aee6e6c61d#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "",
"msisdn": "8618871203019",
"content": """2017-11-06 05:39:27,871|User:boss1| id:a-30e82392-8817-48ed-8c3d-f4aee6e6c61d#1509946767871 |{"HLRSN":"51","operationName":"LST_STNSR","ISDN":"8618871203019"}"""
}
...
```
It seems that ES returned all the data,but the first piece is what I query.
Then I try to use `term` query:
```
GET /boss-mock/soap-mock/_search
{
"query": {
"term": {
"task_id": "a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871"
}
}
}
But I get nothing:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
However,it works for other field ,which has 'shorter' data length,such as msisdn:
GET /boss-mock/soap-mock/_search
{
"query": {
"term": {
"msisdn": "8618882291205"
}
}
}
response:
{
"took": 35,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 9000009,
"max_score": 1,
"hits": [
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7B",
"_score": 1,
"_source": {
"operation_name": "ADD_IFC",
"hlrsn": "51",
"user_name": "boss2",
"business_type": "VoLTE",
"task_id": "a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "460082279570892",
"msisdn": "8618882291205",
"content": """2017-11-06 05:39:27,871|User:boss2| id:a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871 |{"HLRSN":"51","operationName":"ADD_IFC","ISDN":"8618882291205"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7C",
"_score": 1,
"_source": {
"operation_name": "BAT_RMV_EPSDATA",
"hlrsn": "50",
"user_name": "boss3",
"business_type": "OVERHEAD",
"task_id": "a-6dbf64ee-81e9-4ef4-8b05-664a7fc3f47b#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "460050840482507",
"msisdn": "8618178395664",
"content": """2017-11-06 05:39:27,871|User:boss3| id:a-6dbf64ee-81e9-4ef4-8b05-664a7fc3f47b#1509946767871 |{"HLRSN":"50","operationName":"BAT_RMV_EPSDATA","ISDN":"8618178395664"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7J",
"_score": 1,
"_source": {
"operation_name": "MOD_EPS_CONTEXT",
"hlrsn": "52",
"user_name": "boss2",
"business_type": "LTE",
"task_id": "a-b0bed660-3fca-4201-a90c-e4103f6289c5#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "460039208697055",
"msisdn": "8618275883802",
"content": """2017-11-06 05:39:27,871|User:boss2| id:a-b0bed660-3fca-4201-a90c-e4103f6289c5#1509946767871 |{"HLRSN":"52","operationName":"MOD_EPS_CONTEXT","ISDN":"8618275883802"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7L",
"_score": 1,
"_source": {
"operation_name": "DEA_BAIC",
"hlrsn": "53",
"user_name": "boss3",
"business_type": "VOICE",
"task_id": "a-c5cc2332-9d81-476c-ad0a-0809c23cfe49#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "",
"msisdn": "",
"content": """2017-11-06 05:39:27,871|User:boss3| id:a-c5cc2332-9d81-476c-ad0a-0809c23cfe49#1509946767871 |{"HLRSN":"53","operationName":"DEA_BAIC","ISDN":"8618886204829"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7O",
"_score": 1,
"_source": {
"operation_name": "LST_SIFC",
"hlrsn": "51",
"user_name": "boss3",
"business_type": "",
"task_id": "a-b0f2a526-8757-4b2c-9011-674cc714fedc#1509946767871",
"response_time": "2017-11-06T05:39:27.871Z",
"imsi": "",
"msisdn": "",
"content": """2017-11-06 05:39:27,871|User:boss3| id:a-b0f2a526-8757-4b2c-9011-674cc714fedc#1509946767871 |{"HLRSN":"51","operationName":"LST_SIFC","ISDN":"8618258093284"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7R",
"_score": 1,
"_source": {
"operation_name": "LST_COLR",
"hlrsn": "52",
"user_name": "boss2",
"business_type": "",
"task_id": "a-348463b7-eb49-45e2-bffb-1068e706802b#1509946767872",
"response_time": "2017-11-06T05:39:27.872Z",
"imsi": "",
"msisdn": "8618557891401",
"content": """2017-11-06 05:39:27,872|User:boss2| id:a-348463b7-eb49-45e2-bffb-1068e706802b#1509946767872 |{"HLRSN":"52","operationName":"LST_COLR","ISDN":"8618557891401"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7W",
"_score": 1,
"_source": {
"operation_name": "BAT_ADD_TPLSUB",
"hlrsn": "52",
"user_name": "boss2",
"business_type": "OVERHEAD",
"task_id": "a-db3748af-0359-40d3-b5fd-eb09cc53ba56#1509946767872",
"response_time": "2017-11-06T05:39:27.872Z",
"imsi": "460017353100210",
"msisdn": "8618219821848",
"content": """2017-11-06 05:39:27,872|User:boss2| id:a-db3748af-0359-40d3-b5fd-eb09cc53ba56#1509946767872 |{"HLRSN":"52","operationName":"BAT_ADD_TPLSUB","ISDN":"8618219821848"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7d",
"_score": 1,
"_source": {
"operation_name": "ACT_BAOC",
"hlrsn": "51",
"user_name": "boss2",
"business_type": "VOICE",
"task_id": "a-80d105e7-138f-4c48-99df-e1b6ea404f43#1509946767872",
"response_time": "2017-11-06T05:39:27.872Z",
"imsi": "",
"msisdn": "",
"content": """2017-11-06 05:39:27,872|User:boss2| id:a-80d105e7-138f-4c48-99df-e1b6ea404f43#1509946767872 |{"HLRSN":"51","operationName":"ACT_BAOC","ISDN":"8618881023802"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7f",
"_score": 1,
"_source": {
"operation_name": "SND_CANCELC",
"hlrsn": "53",
"user_name": "boss1",
"business_type": "LOCATION",
"task_id": "a-1a26292d-0f6d-416b-ab3b-47b0c888843f#1509946767872",
"response_time": "2017-11-06T05:39:27.872Z",
"imsi": "",
"msisdn": "8618571785343",
"content": """2017-11-06 05:39:27,872|User:boss1| id:a-1a26292d-0f6d-416b-ab3b-47b0c888843f#1509946767872 |{"HLRSN":"53","operationName":"SND_CANCELC","ISDN":"8618571785343"}"""
}
},
{
"_index": "boss-mock",
"_type": "soap-mock",
"_id": "AV-P15lDpN_SAQa2UP7g",
"_score": 1,
"_source": {
"operation_name": "MOD_MEDIAID",
"hlrsn": "53",
"user_name": "boss2",
"business_type": "VoLTE",
"task_id": "a-8d2b037b-d346-4b89-9ab7-8f828b1bb783#1509946767872",
"response_time": "2017-11-06T05:39:27.872Z",
"imsi": "",
"msisdn": "",
"content": """2017-11-06 05:39:27,872|User:boss2| id:a-8d2b037b-d346-4b89-9ab7-8f828b1bb783#1509946767872 |{"HLRSN":"53","operationName":"MOD_MEDIAID","ISDN":"8618458567583"}"""
}
}
]
}
}
So,what's going on here?Can't I just query on task_id?
By the way,I have background for SQL.
I need to query data like:
select * from table where task_id = ?
mapping:
```
{
"boss-mock": {
"mappings": {
"soap-mock": {
"properties": {
"business_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"content": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"hlrsn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"imsi": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"msisdn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"operation_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"response_time": {
"type": "date"
},
"task_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
```
It's happening because you're executing analyzed query against analyzed field. So let me briefly explain what I mean. Each field with type text is analyzed and stored as a set of tokens just for the sake of full text search functionality.
You can check how analyzer will process your data by sending
POST http://localhost:9200/_analyze HTTP/1.1
Content-type: application/json
{
"tokenizer": "standard",
"text": "a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871"
}
Response will inform you that ES would index a list of following tokens
["a", "ec0fe200", "6219", "46fa", "8f9b", "d23d3fc367a0", "1509946767871"]
Same happens to your match which query is analyzed query so in fact you're querying
["a","ec0fe200","6219","46fa","8f9b","d23d3fc367a0","1509946767871"]
against
["a","ec0fe200","6219","46fa","8f9b","d23d3fc367a0","1509946767871"]
["a","931b0935","a0d4","46fa","b403","7c1075a1d7a7","1509946767871"]
["a","cc771389","8712","46fa","8f9b","0e64e4fc38e6","1509946485051"]
["a","30e82392","8817","48ed","8c3d","f4aee6e6c61d","1509946767871"]
What is more, match query's default operator is OR so you'll get a result if at least one token from your query matches with indexed ones ("46fa", "1509946767871" ...).
Then you tried term query which is not analyzed so the problem is quite opposite. You were trying to match
"a-ec0fe200-6219-46fa-8f9b-d23d3fc367a0#1509946767871"
as one string against the same lists of tokens. That's why you get empty results.
So the short answer for your problem is that if you want to have something similar to where from sql, you shouldn't use analyzed fields for these properties or maintain both analyzed and non-analyzed fields.
To solve this issue you should drop your index, define mapping statically like below, index your data again and then use terms query to match entire strings. Keyword data type would be the most relevant one here. You can read more here
PUT http://localhost:9200/boss_mock
Content-type: application/json
{
"mappings": {
"soap-mock": {
"properties": {
"task_id": {
"type": "keyword"
},
"msisdn": {
"type": "keyword"
}
//whatever else you need
}
}
}
}
Please note that you don't have to define mapping statically for all your properties (others will be added dynamically as texts).

Elasticsearch + Kibana + Alerting (X-Pack) For Energy Monitoring System

Can somebody help me with Alerting Via X-Pack for Energy monitoring system project? The main problem here is I can't collect the 'Value' data from the database, as I want to compare it later with the upper and the lower threshold.
So here is the index:
PUT /test-1
{
"mappings": {
"Test1": {
"properties": {
"Value": {
"type": "integer"
},
"date": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss.SSSZ"
},
"UpperThreshold": {
"type": "integer"
},
"LowerThreshold": {
"type": "integer"
}
}
}
}
}
Here is the example of the input:
POST /test-1/Test1
{
"Value": "500",
"date": "2017-06-13T16:20:00.000Z",
"UpperThreshold":"450",
"LowerThreshold": "380"
}
This is my alerting code
{
"trigger": {
"schedule": {
"interval": "10s"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"logs"
],
"types": [],
"body": {
"query": {
"match": {
"message": "error"
}
}
}
}
}
},
"condition": {
"compare": {
"ctx.payload.hits.total": {
"gt": 0
}
}
},
"actions": {
"send_email": {
"email": {
"profile": "standard",
"to": [
"<account#gmail.com>"
],
"subject": "Watcher Notification",
"body": {
"text": "{{ctx.payload.hits.total}} error logs found"
}
}
}
}
}
Here is the response I got from the alerting plugin
{
"watch_id": "Alerting-Test",
"state": "execution_not_needed",
"_status": {
"state": {
"active": true,
"timestamp": "2017-07-26T15:27:35.497Z"
},
"last_checked": "2017-07-26T15:27:38.625Z",
"actions": {
"logging": {
"ack": {
"timestamp": "2017-07-26T15:27:35.497Z",
"state": "awaits_successful_execution"
}
}
}
},
"trigger_event": {
"type": "schedule",
"triggered_time": "2017-07-26T15:27:38.625Z",
"schedule": {
"scheduled_time": "2017-07-26T15:27:38.175Z"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"test-1"
],
"types": [
"Test1"
],
"body": {
"query": {
"match_all": {}
}
}
}
}
},
"condition": {
"compare": {
"ctx.payload.hits.hits.0.Value": {
"gt": 450
}
}
},
"metadata": {
"name": "Alerting-Test"
},
"result": {
"execution_time": "2017-07-26T15:27:38.625Z",
"execution_duration": 0,
"input": {
"type": "search",
"status": "success",
"payload": {
"_shards": {
"total": 5,
"failed": 0,
"successful": 5
},
"hits": {
"hits": [
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-22T12:00:00.000Z",
"LowerThreshold": "380",
"Value": "350",
"UpperThreshold": "450"
},
"_id": "AV1-1P3lArbJ1tbnct4e",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-22T18:00:00.000Z",
"LowerThreshold": "380",
"Value": "4100",
"UpperThreshold": "450"
},
"_id": "AV1-1Sq0ArbJ1tbnct4v",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-24T18:00:00.000Z",
"LowerThreshold": "380",
"Value": "450",
"UpperThreshold": "450"
},
"_id": "AV1-1eLJArbJ1tbnct6G",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-23T00:00:00.000Z",
"LowerThreshold": "380",
"Value": "400",
"UpperThreshold": "450"
},
"_id": "AV1-1VUzArbJ1tbnct5A",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-23T12:00:00.000Z",
"LowerThreshold": "380",
"Value": "390",
"UpperThreshold": "450"
},
"_id": "AV1-1X4FArbJ1tbnct5R",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-23T18:00:00.000Z",
"LowerThreshold": "380",
"Value": "390",
"UpperThreshold": "450"
},
"_id": "AV1-1YySArbJ1tbnct5T",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-26T00:00:00.000Z",
"LowerThreshold": "380",
"Value": "4700",
"UpperThreshold": "450"
},
"_id": "AV1-1mflArbJ1tbnct67",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-26T06:00:00.000Z",
"LowerThreshold": "380",
"Value": "390",
"UpperThreshold": "450"
},
"_id": "AV1-1oluArbJ1tbnct7M",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-21T12:00:00.000Z",
"LowerThreshold": "380",
"Value": "400",
"UpperThreshold": "450"
},
"_id": "AV1-1IrZArbJ1tbnct3r",
"_score": 1
},
{
"_index": "test-1",
"_type": "Test1",
"_source": {
"date": "2017-07-21T18:00:00.000Z",
"LowerThreshold": "380",
"Value": "440",
"UpperThreshold": "450"
},
"_id": "AV1-1LwzArbJ1tbnct38",
"_score": 1
}
],
"total": 20,
"max_score": 1
},
"took": 1,
"timed_out": false
},
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"test-1"
],
"types": [
"Test1"
],
"body": {
"query": {
"match_all": {}
}
}
}
}
},
"condition": {
"type": "compare",
"status": "success",
"met": false,
"compare": {
"resolved_values": {
**"ctx.payload.hits.hits.0.Value": null**
}
}
},
"actions": []
},
"messages": []
}
Really appreciate for your help!!

ElasticSearch apply should and range

My situation:
I'm working with an ElasticSearch database and I cant apply a couple of "ORs" plus a couple of "ANDs". I'm writing the SQL query to show what I want, in my SQL query I've used confirmedPlayers and pendingPlayers as they were arrays, of course I know we cant do that in SQL, but I just wanted to take an example.
If you want me to add my mappings, I will, It is just I dont want to make extensive the post.
This is my query in SQL:
SELECT *
FROM match
WHERE (
"AVnJOMvXOX1s7Ny2Wu9O" in confirmedPlayers OR
"AVnJOMvXOX1s7Ny2Wu9O" in pendingPlayers OR
"AVnJOMvXOX1s7Ny2Wu9O" = creator
)
AND date >= "20/01/2016"
/* AND other filter will be added */
This is my match type info:
{
"took": 79,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 1,
"hits": [
{
"_index": "yojuego",
"_type": "match",
"_id": "AVmak0bWIjogo0aNpbGs",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabq-5Ijogo0aNpbGn",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVm0ETbT0Y26YggShbFa",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVmBKi21XRKVuACJGZZZ",
"AVmabq-5Ijogo0aNpbGn"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVmab1G5Ijogo0aNpbGo",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVmabVjUIjogo0aNpbGm",
"AVmBKi21XRKVuACJGZZZ"
],
"pendingPlayers": [
"AVmBKi21XRKVuACJGZZZ"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVm0EPX20Y26YggShbFZ",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVmabVjUIjogo0aNpbGm",
"AVmabq-5Ijogo0aNpbGn"
],
"pendingPlayers": [
"AVmBKi21XRKVuACJGZZZ"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "match",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-9fOJxj9yxI50RS3",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-ykMJxj9yxI50RS1",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-73OJxj9yxI50RS2",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"pendingPlayers": [],
"comments": []
}
}
]
}
}
This query returns 4 matches, and it is OK.
http://localhost:9200/my_index/match
POST _search
{
"query": {
"bool": {
"should": [
{ "term": { "confirmedPlayers": { "value": "AVnJOMvXOX1s7Ny2Wu9O" } } },
{ "term": { "pendingPlayers": { "value": "AVnJOMvXOX1s7Ny2Wu9O" } } },
{ "term": { "creator": { "value": "AVnJOMvXOX1s7Ny2Wu9O" } } }
],
"must": [
{ "range": { "date": { "gte": "20/01/2016", "format": "dd/MM/yyyy" } } }
]
}
}
}
//RESULT
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1.6931472,
"hits": [
{
"_index": "yojuego",
"_type": "match",
"_id": "match",
"_score": 1.6931472,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-73OJxj9yxI50RS2",
"_score": 1.6931472,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-9fOJxj9yxI50RS3",
"_score": 1.287682,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-ykMJxj9yxI50RS1",
"_score": 1.287682,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
}
]
}
}
But this query is returning 4 matches too, and this is the case where it should not return anything.
POST _search
{
"query": {
"bool": {
"should": [
{ "term": { "confirmedPlayers": { "value": "inexistant" } } },
{ "term": { "pendingPlayers": { "value": "inexistant" } } },
{ "term": { "creator": { "value": "inexistant" } } }
],
"must": [
{ "range": { "date": { "gte": "20/01/2016", "format": "dd/MM/yyyy" } } }
]
}
}
}
//RESULT
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "yojuego",
"_type": "match",
"_id": "match",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-9fOJxj9yxI50RS3",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-ykMJxj9yxI50RS1",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-73OJxj9yxI50RS2",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"pendingPlayers": [],
"comments": []
}
}
]
}
}
Mappings:
{
"match": {
"properties": {
"title": { "type": "string" },
"date": { "type": "date" },
"fromTime": { "type": "string" },
"toTime": { "type": "string" },
"location": { "type": "string" },
"matchType": { "type": "integer" },
"creator": {
"type": "string",
"index": "not_analyzed"
},
"confirmedPlayers" : {
"type": "string",
"index": "not_analyzed"
},
"pendingPlayers" : {
"type": "string",
"index": "not_analyzed"
},
"comments" : {
"properties" : {
"id" : { "type" : "integer" },
"owner" : { "type" : "string" },
"text" : { "type" : "string" },
"writtenOn": { "type": "date" }
}
}
}
}
}
The problem cames up when I use should and must all togheter. If I use should and must separately they work fine.
Based on the result of your second example query (where you claim that 0 results should be returned), it seems you have some confusion about the way that should works in elasticsearch.
I'll quote from the documentation
should
The clause (query) should appear in the matching document. In a
boolean query with no must or filter clauses, one or more should
clauses must match a document. The minimum number of should clauses to
match can be set using the minimum_should_match parameter.
If you use a query with a should and a must, it isn't actually necessary that the should clause hits, only the must clause. If the should clauses do happen to hit, they will be ranked higher in the results.
You have options though. One option: you can write a simple should query, and set the minimum_should_match parameter, then wrap that query in a filtered clause to filter based on the date. Second option: create a nested query, with the must clause inside the should clause.

Extract top visited websites from logs

We are storing log data containing information about sites that has been visited from our network. I had like to query the top 10 visited websites. How can I achieve this with ElasticSearch? The index mapping is as follows:
{
"data" : {
"properties" : {
"date": {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"status": {"type" : "string"},
"group": {"type" : "string"},
"ip": {"type" : "ip"},
"username":{"type" : "string"},
"category":{"type" : "string"},
"url":{"type" : "string"}
}
}
}
Sample Data:
"hits": {
"total": 7,
"max_score": 1,
"hits": [
{
"_index": "squid",
"_type": "data",
"_id": "AU_DT4_ibdcNyAnt753J",
"_score": 1,
"_source": {
"date": "2015-08-16T00:02:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "ads",
"url": "https://gmail.com/mail/u/0/#inbox"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DMjDpbdcNyAnt75iB",
"_score": 1,
"_source": {
"date": "2015-08-15T00:01:00.195Z",
"status": "BLOCK",
"group": "level3",
"ip": "10.249.10.51",
"username": "Fary",
"category": "ads",
"url": "https://gmail.com/details/blabla"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DT94kbdcNyAnt753Y",
"_score": 1,
"_source": {
"date": "2015-08-17T00:02:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "news",
"url": "http://aol.com"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_CwTEqbdcNyAnt74RJ",
"_score": 1,
"_source": {
"date": "2015-08-15T00:00:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "Blog",
"url": "http://gmail.com"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DMmUQbdcNyAnt75iQ",
"_score": 1,
"_source": {
"date": "2015-08-15T00:02:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.51",
"username": "Fary",
"category": "ads",
"url": "http://yahoo.com/vbfhghfgjfdgfd"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DT1yjbdcNyAnt753B",
"_score": 1,
"_source": {
"date": "2015-08-16T00:02:00.195Z",
"status": "REDIR",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "ads",
"url": "http://news.yahoo.com/"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DMV1ObdcNyAnt75hd",
"_score": 1,
"_source": {
"date": "2015-08-15T00:01:00.195Z",
"status": "BLOCK",
"group": "level3",
"ip": "10.249.10.50",
"username": "Kamal",
"category": "Blog",
"url": "http://hotmail.com/dfdgfgfdg"
}
}
]
What I had like to have:
Top visited sites:
- **Sites - Hits**
- gmail.com - 3
- yahoo.com - 2
- hotmail.com - 1
- aol.com - 1
First you need to extract the base site ( Like gmail.com ) from the URL field before indexing and add it to a new field. Lets assume this new field is baseSite.
Then , you need to follow what is exactly told in this blog.
First make the field baseSite as not_analyzed and then do a terms aggregation on that field.

Resources