Elasticsearch query for two filters - elasticsearch

I am new to Elasticsearch and i have a use case where i need to fetch data for the below 2 conditions
zoneType : [test,oms]
{"geo_bounding_box":{"location":{"top_left":{"lat":"1.3545001078734353","lon":"103.87945999358624"},"bottom_right":{"lat":"1.3435168247600437","lon":"103.89390100692282"}}}
My Query always returns the whole data which is below , but i want my data to be returned only for the above conditions
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 11,
"max_score": 1,
"hits": [
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "chak_01",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lng": 78.460938,
"lat": 25.665325
}
],
"level": 1,
"station_id": "01",
"tag_owner": "xylem",
"tag_network_name": "chak_network",
"supply_zone": "mantena",
"display_name": "Chak 01",
"tag_sector": "sensorstation",
"meta_info": {
"site": {
"site_name": "site Name",
"site_id": "04"
},
"district": {
"district_name": "district Name",
"district_id": "03"
},
"state": {
"state_name": "state Name",
"state_id": "05"
},
"village": {
"village_id": "01",
"village_name": "village Name"
},
"tehsil": {
"tehsil_id": "02",
"tehsil_name": "tehsil Name"
}
},
"tag_category": "sensorstation",
"node_reference": "chak-01",
"_id": "chak_01"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "chak-01",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lng": 78.460938,
"lat": 25.665325
}
],
"level": 1,
"station_id": "01",
"tag_owner": "xylem",
"tag_network_name": "chak_network",
"supply_zone": "mantena",
"display_name": "Chak 01",
"tag_sector": "sensorstation",
"meta_info": {
"site": {
"site_name": "site Name",
"site_id": "04"
},
"district": {
"district_name": "district Name",
"district_id": "03"
},
"state": {
"state_name": "state Name",
"state_id": "05"
},
"village": {
"village_id": "01",
"village_name": "village Name"
},
"tehsil": {
"tehsil_id": "02",
"tehsil_name": "tehsil Name"
}
},
"tag_category": "sensorstation",
"node_reference": "980066547976678",
"_id": "chak-01"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "dummy_elastic_update_station",
"_score": 1,
"_source": {
"dummystnupddate": "Thu Dec 03 2015 07:00:01 GMT+0000",
"level": "1",
"icon": "newicons/dataxicons/blue.png",
"_id": "dummy_elastic_update_station",
"tag_location": "unknown"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "5f3121d6b4c93c1d20bbbb38",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lat": "0",
"lon": "0"
}
],
"level": 1,
"kml_path": "",
"created": "Mon Aug 10 16:00:47 IST 2020",
"latitude": "0",
"station_id": "5f3121d6b4c93c1d20bbbb38",
"longtitude": "0",
"tag_owner": "",
"description": "",
"zoneType": "test",
"tag_network_name": "chak_network",
"display_name": "testname",
"supply_zone": "testname",
"outflow": null,
"tag_sector": "dmameter",
"name": "testname",
"tag_category": "sensorstation",
"inflow": null,
"_id": "5f3121d6b4c93c1d20bbbb38",
"tag_location": "NA",
"lastmod": "Mon Aug 10 16:00:47 IST 2020",
"status": "ACTIVE"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "5f312253b4c93c1d20bbbb39",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lat": "0",
"lon": "0"
}
],
"level": 1,
"kml_path": "",
"created": "Mon Aug 10 16:02:51 IST 2020",
"latitude": "0",
"station_id": "5f312253b4c93c1d20bbbb39",
"longtitude": "0",
"tag_owner": "",
"description": "",
"zoneType": "oms",
"tag_network_name": "chak_network",
"display_name": "506020200236117-O1",
"supply_zone": "506020200236117-O1",
"outflow": null,
"tag_sector": "dmameter",
"name": "506020200236117-O1",
"tag_category": "sensorstation",
"inflow": null,
"_id": "5f312253b4c93c1d20bbbb39",
"tag_location": "NA",
"lastmod": "Mon Aug 10 16:02:51 IST 2020",
"status": "ACTIVE"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "5f32357b3ccb8f51e003587e",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lat": "0",
"lon": "0"
}
],
"level": 1,
"kml_path": "",
"created": "Tue Aug 11 11:36:51 IST 2020",
"latitude": "0",
"station_id": "5f32357b3ccb8f51e003587e",
"longtitude": "0",
"tag_owner": "",
"description": "",
"zoneType": "village",
"display_name": "testvillage1",
"supply_zone": "testvillage1",
"outflow": null,
"tag_sector": "dmameter",
"name": "testvillage1",
"tag_category": "sensorstation",
"inflow": null,
"_id": "5f32357b3ccb8f51e003587e",
"tag_location": "NA",
"lastmod": "Tue Aug 11 11:36:51 IST 2020",
"status": "ACTIVE"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "5ee0861c5f15030001b2dfd9",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lat": "0",
"lon": "0"
}
],
"level": 1,
"kml_path": "",
"created": "Wed Jun 10 07:05:00 UTC 2020",
"latitude": "0",
"station_id": "5ee0861c5f15030001b2dfd9",
"longtitude": "0",
"tag_owner": "",
"description": "",
"tag_network_name": "chak_network",
"display_name": "bhamarhar",
"supply_zone": "bhamarhar",
"outflow": null,
"tag_sector": "dmameter",
"name": "bhamarhar",
"tag_category": "sensorstation",
"inflow": null,
"_id": "5ee0861c5f15030001b2dfd9",
"tag_location": "NA",
"lastmod": "Wed Jun 10 07:05:00 UTC 2020",
"status": "ACTIVE"
}
},
{
"_index": "dataintelindex_man",
"_type": "station_info",
"_id": "5f2ad03bba21eb28684451e3",
"_score": 1,
"_source": {
"tag_datatype": "sensor",
"loc": [
{
"lat": "0",
"lon": "0"
}
],
"level": 1,
"kml_path": "",
"created": "Wed Aug 05 20:58:59 IST 2020",
"latitude": "0",
"station_id": "5f2ad03bba21eb28684451e3",
"longtitude": "0",
"tag_owner": "",
"description": "",
"tag_network_name": "chak_network",
"display_name": "zone-1",
"supply_zone": "zone-1",
"outflow": null,
"tag_sector": "dmameter",
"name": "zone-1",
"tag_category": "sensorstation",
"inflow": null,
"_id": "5f2ad03bba21eb28684451e3",
"tag_location": "NA",
"lastmod": "Wed Aug 05 20:58:59 IST 2020",
"status": "ACTIVE"
}
},
{
"_index": "dataintelindex_man",
"_type": "sensor_info",
"_id": "chak_01_btry",
"_score": 1,
"_source": {
"parent": "chak_01",
"sensortype_units": "volt",
"device_id": "OMS_MP_LRR_001",
"level": 2,
"sensortype_actual": "btry",
"highrate": false,
"tag_datasource": "xylem",
"_id": "chak_01_btry",
"sensortype_display": "btry - chak_01",
"type": "sensor",
"sensortype_backend": "btry"
}
},
{
"_index": "dataintelindex_man",
"_type": "sensor_info",
"_id": "chak-01/pressure",
"_score": 1,
"_source": {
"parent": "chak-01",
"sensortype_units": "bar",
"device_id": "OMS_MP_LRR_001",
"level": 2,
"sensortype_actual": "pressure",
"highrate": false,
"tag_datasource": "xylem",
"_id": "chak-01/pressure",
"sensortype_display": "pressure - chak-01",
"type": "sensor",
"sensortype_backend": "pressure"
}
}
]
}
}
The query which i have formed is as below :
{
"query":{
"geo_bounding_box":{
"location":{
"top_left":{
"lat":"1.3545001078734353",
"lon":"103.87945999358624"
},
"bottom_right":{
"lat":"1.3435168247600437",
"lon":"103.89390100692282"
}
}
}
},
{
"zoneType":[
"oms",
"test"
]
},
"size":100000
}
Please help me validate the query.

There are multiple issues:
JSON is invalid
zoneType is placed at the root of the object, it should be under query
In order for this to work, all queries/filters must be placed within root query object and in order to combine multiple filters, you'll need to use compound queries, in this specific case - bool query, read about it more in the documentation.
So this would be the actual query that should work for you:
{
"query": {
"bool": {
"filter": [
{
"geo_bounding_box": {
"location": {
"top_left": {
"lat": "1.3545001078734353",
"lon": "103.87945999358624"
},
"bottom_right": {
"lat": "1.3435168247600437",
"lon": "103.89390100692282"
}
}
}
},
{
"terms": {
"zoneType": [
"oms",
"test"
]
}
}
]
}
},
"size": 100000
}
See I've moved zoneType under query->bool->filter[], next to geo_bounding_box query.

Related

How to get one element from array of objects in elasticsearch

I have a products index which has an offers field. Offers is an array of objects.
I want to return one offer by seller_id in an array or in a new field.
Input:
with seller_id=5
{
"_index":"dev_products",
"_type":"_doc",
"_id":"138",
"_score":1.0,
"_source":{
"is_adult":false,
"status_id":3,
"allow_publish":false,
"name":"Consequuntur expedita sit perferendis est.",
"category_id":816,
"brand_id":363,
"description":"Nec.",
"type":3,
"vendor_code":"4968258909901",
"barcode":"98735976",
"code":"consequuntur-expedita-sit-perferendis-est",
"updated_at":"2022-11-15T10:42:33.000000Z",
"created_at":"2022-11-15T10:42:33.000000Z",
"id":138,
"offers":[
{
"product_id":"138",
"seller_id":"1",
"sale_status":"2",
"external_id":"1267631",
"store_id":"2",
"qty":"44",
"storage_address":"",
"base_price":"312.84",
"updated_at":"2022-11-15T10:42:49.000000Z",
"created_at":"2022-11-15T10:42:49.000000Z",
"id":74
},
{
"product_id":"138",
"seller_id":"2",
"sale_status":"1",
"external_id":"2795841",
"store_id":"2",
"qty":"1",
"storage_address":"",
"base_price":"1812.3",
"updated_at":"2022-11-15T10:44:50.000000Z",
"created_at":"2022-11-15T10:44:50.000000Z",
"id":76
},
{
"product_id":"138",
"seller_id":"3",
"sale_status":"1",
"external_id":"32219",
"store_id":"1",
"qty":"32",
"storage_address":"",
"base_price":"1556.25",
"updated_at":"2022-11-15T10:50:16.000000Z",
"created_at":"2022-11-15T10:50:16.000000Z",
"id":77
},
{
"product_id":"138",
"seller_id":"4",
"sale_status":"1",
"external_id":"967427",
"store_id":"1",
"qty":"35",
"storage_address":"",
"base_price":"137.62",
"updated_at":"2022-11-15T10:50:18.000000Z",
"created_at":"2022-11-15T10:50:18.000000Z",
"id":78
},
{
"product_id":"138",
"seller_id":"5",
"sale_status":"2",
"external_id":"209466",
"store_id":"1",
"qty":"45",
"storage_address":"",
"base_price":"187.03",
"updated_at":"2022-11-15T10:50:19.000000Z",
"created_at":"2022-11-15T10:50:19.000000Z",
"id":79
},
{
"product_id":"138",
"seller_id":"6",
"sale_status":"1",
"external_id":"522912",
"store_id":"1",
"qty":"61",
"storage_address":"",
"base_price":"306.39",
"updated_at":"2022-11-15T10:50:20.000000Z",
"created_at":"2022-11-15T10:50:20.000000Z",
"id":80
}
]
}
}
Expected:
{
"_index":"dev_products",
"_type":"_doc",
"_id":"138",
"_score":1.0,
"_source":{
"is_adult":false,
"status_id":3,
"allow_publish":false,
"name":"Consequuntur expedita sit perferendis est.",
"category_id":816,
"brand_id":363,
"description":"Nec.",
"type":3,
"vendor_code":"4968258909901",
"barcode":"98735976",
"code":"consequuntur-expedita-sit-perferendis-est",
"updated_at":"2022-11-15T10:42:33.000000Z",
"created_at":"2022-11-15T10:42:33.000000Z",
"id":138,
"offers":[
{
"product_id":"138",
"seller_id":"5",
"sale_status":"2",
"external_id":"209466",
"store_id":"1",
"qty":"45",
"storage_address":"",
"base_price":"187.03",
"updated_at":"2022-11-15T10:50:19.000000Z",
"created_at":"2022-11-15T10:50:19.000000Z",
"id":79
}
]
}
}
Or expected:
{
"_index":"dev_products",
"_type":"_doc",
"_id":"138",
"_score":1.0,
"_source":{
"is_adult":false,
"status_id":3,
"allow_publish":false,
"name":"Consequuntur expedita sit perferendis est.",
"category_id":816,
"brand_id":363,
"description":"Nec.",
"type":3,
"vendor_code":"4968258909901",
"barcode":"98735976",
"code":"consequuntur-expedita-sit-perferendis-est",
"updated_at":"2022-11-15T10:42:33.000000Z",
"created_at":"2022-11-15T10:42:33.000000Z",
"id":138,
"offer":{
"product_id":"138",
"seller_id":"5",
"sale_status":"2",
"external_id":"209466",
"store_id":"1",
"qty":"45",
"storage_address":"",
"base_price":"187.03",
"updated_at":"2022-11-15T10:50:19.000000Z",
"created_at":"2022-11-15T10:50:19.000000Z",
"id":79
}
}
}
Thanks for help!
If the offers field is nested type you can to use inner hits to get only object match in list.
The object you expected will in "inner_hits" response.
Query
GET idx_nested/_search?filter_path=hits.hits
{
"query": {
"nested": {
"path": "offers",
"query": {
"match": {
"offers.seller_id": "5"
}
},
"inner_hits": {}
}
}
}
Response:
{
"hits": {
"hits": [
{
"_index": "idx_nested",
"_id": "kYyYf4QBgXg8h_rctd1z",
"_score": 1.540445,
"_source": {
"is_adult": false,
"status_id": 3,
"allow_publish": false,
"name": "Consequuntur expedita sit perferendis est.",
"category_id": 816,
"brand_id": 363,
"description": "Nec.",
"type": 3,
"vendor_code": "4968258909901",
"barcode": "98735976",
"code": "consequuntur-expedita-sit-perferendis-est",
"updated_at": "2022-11-15T10:42:33.000000Z",
"created_at": "2022-11-15T10:42:33.000000Z",
"id": 138,
"offers": [
{
"product_id": "138",
"seller_id": "1",
"sale_status": "2",
"external_id": "1267631",
"store_id": "2",
"qty": "44",
"storage_address": "",
"base_price": "312.84",
"updated_at": "2022-11-15T10:42:49.000000Z",
"created_at": "2022-11-15T10:42:49.000000Z",
"id": 74
},
{
"product_id": "138",
"seller_id": "2",
"sale_status": "1",
"external_id": "2795841",
"store_id": "2",
"qty": "1",
"storage_address": "",
"base_price": "1812.3",
"updated_at": "2022-11-15T10:44:50.000000Z",
"created_at": "2022-11-15T10:44:50.000000Z",
"id": 76
},
{
"product_id": "138",
"seller_id": "3",
"sale_status": "1",
"external_id": "32219",
"store_id": "1",
"qty": "32",
"storage_address": "",
"base_price": "1556.25",
"updated_at": "2022-11-15T10:50:16.000000Z",
"created_at": "2022-11-15T10:50:16.000000Z",
"id": 77
},
{
"product_id": "138",
"seller_id": "4",
"sale_status": "1",
"external_id": "967427",
"store_id": "1",
"qty": "35",
"storage_address": "",
"base_price": "137.62",
"updated_at": "2022-11-15T10:50:18.000000Z",
"created_at": "2022-11-15T10:50:18.000000Z",
"id": 78
},
{
"product_id": "138",
"seller_id": "5",
"sale_status": "2",
"external_id": "209466",
"store_id": "1",
"qty": "45",
"storage_address": "",
"base_price": "187.03",
"updated_at": "2022-11-15T10:50:19.000000Z",
"created_at": "2022-11-15T10:50:19.000000Z",
"id": 79
},
{
"product_id": "138",
"seller_id": "6",
"sale_status": "1",
"external_id": "522912",
"store_id": "1",
"qty": "61",
"storage_address": "",
"base_price": "306.39",
"updated_at": "2022-11-15T10:50:20.000000Z",
"created_at": "2022-11-15T10:50:20.000000Z",
"id": 80
}
]
},
"inner_hits": {
"offers": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.540445,
"hits": [
{
"_index": "idx_nested",
"_id": "kYyYf4QBgXg8h_rctd1z",
"_nested": {
"field": "offers",
"offset": 4
},
"_score": 1.540445,
"_source": {
"store_id": "1",
"updated_at": "2022-11-15T10:50:19.000000Z",
"storage_address": "",
"product_id": "138",
"qty": "45",
"base_price": "187.03",
"sale_status": "2",
"created_at": "2022-11-15T10:50:19.000000Z",
"external_id": "209466",
"id": 79,
"seller_id": "5"
}
}
]
}
}
}
}
]
}
}

ElasticSearch apply should and range

My situation:
I'm working with an ElasticSearch database and I cant apply a couple of "ORs" plus a couple of "ANDs". I'm writing the SQL query to show what I want, in my SQL query I've used confirmedPlayers and pendingPlayers as they were arrays, of course I know we cant do that in SQL, but I just wanted to take an example.
If you want me to add my mappings, I will, It is just I dont want to make extensive the post.
This is my query in SQL:
SELECT *
FROM match
WHERE (
"AVnJOMvXOX1s7Ny2Wu9O" in confirmedPlayers OR
"AVnJOMvXOX1s7Ny2Wu9O" in pendingPlayers OR
"AVnJOMvXOX1s7Ny2Wu9O" = creator
)
AND date >= "20/01/2016"
/* AND other filter will be added */
This is my match type info:
{
"took": 79,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 1,
"hits": [
{
"_index": "yojuego",
"_type": "match",
"_id": "AVmak0bWIjogo0aNpbGs",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabq-5Ijogo0aNpbGn",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVm0ETbT0Y26YggShbFa",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVmBKi21XRKVuACJGZZZ",
"AVmabq-5Ijogo0aNpbGn"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVmab1G5Ijogo0aNpbGo",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVmabVjUIjogo0aNpbGm",
"AVmBKi21XRKVuACJGZZZ"
],
"pendingPlayers": [
"AVmBKi21XRKVuACJGZZZ"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVm0EPX20Y26YggShbFZ",
"_score": 1,
"_source": {
"title": "Mi primer match",
"date": "2016-01-13T20:31:20.000Z",
"fromTime": "19:00",
"toTime": "20:00",
"location": "casa de pablo",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVmabVjUIjogo0aNpbGm",
"AVmabq-5Ijogo0aNpbGn"
],
"pendingPlayers": [
"AVmBKi21XRKVuACJGZZZ"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "match",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-9fOJxj9yxI50RS3",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-ykMJxj9yxI50RS1",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-73OJxj9yxI50RS2",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"pendingPlayers": [],
"comments": []
}
}
]
}
}
This query returns 4 matches, and it is OK.
http://localhost:9200/my_index/match
POST _search
{
"query": {
"bool": {
"should": [
{ "term": { "confirmedPlayers": { "value": "AVnJOMvXOX1s7Ny2Wu9O" } } },
{ "term": { "pendingPlayers": { "value": "AVnJOMvXOX1s7Ny2Wu9O" } } },
{ "term": { "creator": { "value": "AVnJOMvXOX1s7Ny2Wu9O" } } }
],
"must": [
{ "range": { "date": { "gte": "20/01/2016", "format": "dd/MM/yyyy" } } }
]
}
}
}
//RESULT
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1.6931472,
"hits": [
{
"_index": "yojuego",
"_type": "match",
"_id": "match",
"_score": 1.6931472,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-73OJxj9yxI50RS2",
"_score": 1.6931472,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-9fOJxj9yxI50RS3",
"_score": 1.287682,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-ykMJxj9yxI50RS1",
"_score": 1.287682,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
}
]
}
}
But this query is returning 4 matches too, and this is the case where it should not return anything.
POST _search
{
"query": {
"bool": {
"should": [
{ "term": { "confirmedPlayers": { "value": "inexistant" } } },
{ "term": { "pendingPlayers": { "value": "inexistant" } } },
{ "term": { "creator": { "value": "inexistant" } } }
],
"must": [
{ "range": { "date": { "gte": "20/01/2016", "format": "dd/MM/yyyy" } } }
]
}
}
}
//RESULT
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "yojuego",
"_type": "match",
"_id": "match",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-9fOJxj9yxI50RS3",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-ykMJxj9yxI50RS1",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVnJOMvXOX1s7Ny2Wu9O",
"matchType": "5",
"confirmedPlayers": [],
"pendingPlayers": [],
"comments": []
}
},
{
"_index": "yojuego",
"_type": "match",
"_id": "AVnm-73OJxj9yxI50RS2",
"_score": 1,
"_source": {
"title": "otro match 3",
"date": "2017-12-28T00:00:00.000Z",
"fromTime": "21:00",
"toTime": "22:00",
"location": "somewhere",
"creator": "AVmabVjUIjogo0aNpbGm",
"matchType": "5",
"confirmedPlayers": [
"AVnJOMvXOX1s7Ny2Wu9O"
],
"pendingPlayers": [],
"comments": []
}
}
]
}
}
Mappings:
{
"match": {
"properties": {
"title": { "type": "string" },
"date": { "type": "date" },
"fromTime": { "type": "string" },
"toTime": { "type": "string" },
"location": { "type": "string" },
"matchType": { "type": "integer" },
"creator": {
"type": "string",
"index": "not_analyzed"
},
"confirmedPlayers" : {
"type": "string",
"index": "not_analyzed"
},
"pendingPlayers" : {
"type": "string",
"index": "not_analyzed"
},
"comments" : {
"properties" : {
"id" : { "type" : "integer" },
"owner" : { "type" : "string" },
"text" : { "type" : "string" },
"writtenOn": { "type": "date" }
}
}
}
}
}
The problem cames up when I use should and must all togheter. If I use should and must separately they work fine.
Based on the result of your second example query (where you claim that 0 results should be returned), it seems you have some confusion about the way that should works in elasticsearch.
I'll quote from the documentation
should
The clause (query) should appear in the matching document. In a
boolean query with no must or filter clauses, one or more should
clauses must match a document. The minimum number of should clauses to
match can be set using the minimum_should_match parameter.
If you use a query with a should and a must, it isn't actually necessary that the should clause hits, only the must clause. If the should clauses do happen to hit, they will be ranked higher in the results.
You have options though. One option: you can write a simple should query, and set the minimum_should_match parameter, then wrap that query in a filtered clause to filter based on the date. Second option: create a nested query, with the must clause inside the should clause.

Aggregation in elasticsearch with specific parameter

I have bulk documents in elasticsearch and as an example I have taken the elasticsearch documentation example as banks
{
"_index": "bank",
"_type": "account",
"_id": "25",
"_score": 1,
"_source": {
"account_number": 25,
"balance": 40540,
"firstname": "Virginia",
"lastname": "Ayala",
"age": 39,
"gender": "F",
"address": "171 Putnam Avenue",
"employer": "Filodyne",
"email": "virginiaayala#filodyne.com",
"city": "Nicholson",
"state": "PA"
}
}
,
{
"_index": "bank",
"_type": "account",
"_id": "44",
"_score": 1,
"_source": {
"account_number": 44,
"balance": 34487,
"firstname": "Aurelia",
"lastname": "Harding",
"age": 37,
"gender": "M",
"address": "502 Baycliff Terrace",
"employer": "Orbalix",
"email": "aureliaharding#orbalix.com",
"city": "Yardville",
"state": "DE"
}
}
,
{
"_index": "bank",
"_type": "account",
"_id": "99",
"_score": 1,
"_source": {
"account_number": 99,
"balance": 47159,
"firstname": "Ratliff",
"lastname": "Heath",
"age": 39,
"gender": "F",
"address": "806 Rockwell Place",
"employer": "Zappix",
"email": "ratliffheath#zappix.com",
"city": "Shaft",
"state": "ND"
}
}
,
{
"_index": "bank",
"_type": "account",
"_id": "119",
"_score": 1,
"_source": {
"account_number": 119,
"balance": 49222,
"firstname": "Laverne",
"lastname": "Johnson",
"age": 28,
"gender": "F",
"address": "302 Howard Place",
"employer": "Senmei",
"email": "lavernejohnson#senmei.com",
"city": "Herlong",
"state": "DC"
}
}
,
{
"_index": "bank",
"_type": "account",
"_id": "126",
"_score": 1,
"_source": {
"account_number": 126,
"balance": 3607,
"firstname": "Effie",
"lastname": "Gates",
"age": 39,
"gender": "F",
"address": "620 National Drive",
"employer": "Digitalus",
"email": "effiegates#digitalus.com",
"city": "Blodgett",
"state": "MD"
}
}
Now there is a field called state and price in each document. How can I write a query for which it returns only the results that contain distinct state with sort order as balance in asc order.
I was trying with terms aggregation but of no use.
UPDATE
POST _search
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state"
},
"aggs": {
"balances": {
"top_hits": {
"from" : 0,
"size": 1,
"sort": {"balance": "asc"}
}
}
}
}
}
}
now for this query i'll be returned with all top-hits with price sorted in that key "state". But what i want is a sorted results w.r.t balance and with unique state fields.
For the above query, i am getting response as follows
"buckets": [
{
"key": "tx",
"doc_count": 30,
"balances": {
"hits": {
"total": 30,
"max_score": null,
"hits": [
{
"_index": "bank",
"_type": "account",
"_id": "161",
"_score": null,
"_source": {
"account_number": 161,
"balance": 4659,
"firstname": "Doreen",
"lastname": "Randall",
"age": 37,
"gender": "F",
"address": "178 Court Street",
"employer": "Calcula",
"email": "doreenrandall#calcula.com",
"city": "Belmont",
"state": "TX"
},
"sort": [
4659
]
}
]
}
}
},
{
"key": "md",
"doc_count": 28,
"balances": {
"hits": {
"total": 28,
"max_score": null,
"hits": [
{
"_index": "bank",
"_type": "account",
"_id": "527",
"_score": null,
"_source": {
"account_number": 527,
"balance": 2028,
"firstname": "Carver",
"lastname": "Peters",
"age": 35,
"gender": "M",
"address": "816 Victor Road",
"employer": "Housedown",
"email": "carverpeters#housedown.com",
"city": "Nadine",
"state": "MD"
},
"sort": [
2028
]
}
]
}
}
},
{
"key": "id",
"doc_count": 27,
"balances": {
"hits": {
"total": 27,
"max_score": null,
"hits": [
{
"_index": "bank",
"_type": "account",
"_id": "402",
"_score": null,
"_source": {
"account_number": 402,
"balance": 1282,
"firstname": "Pacheco",
"lastname": "Rosales",
"age": 32,
"gender": "M",
"address": "538 Pershing Loop",
"employer": "Circum",
"email": "pachecorosales#circum.com",
"city": "Elbert",
"state": "ID"
},
"sort": [
1282
]
}
]
}
}
},
which is not in price sorted.
Try like this:
POST bank/_search
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state",
"order": {
"balances": "asc"
}
},
"aggs": {
"balances": {
"sum": {
"field": "balance"
}
}
}
}
}
}
Note: I don't see a price field, but a balance one, maybe that's the one you meant.
If you're interested in getting all documents by state sorted by price, then you can try this, too:
POST bank/_search
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state"
},
"aggs": {
"balances": {
"top_hits": {
"size": 5,
"sort": {"balance": "asc"}
}
}
}
}
}
}

ElasticSearch inconsistent relevance

I'm using elasticsearch to do search for movies by the actors that played in them. When I search for e.g. "leonardo dicaprio" there are 10 or so movies that I get back but they all have a different score. Since they all have the same actor I would expect them to have the same score. Is anyone able to shed some light on why this is happening and hopefully how to stop it?
Elasticsearch version 1.7.2
Mapping:
{
"programs": {
"mappings": {
"program_doc_type": {
"properties": {
"cast": {
"type": "string",
"analyzer": "keyword_analyzer",
"fields": {
"name": {
"type": "string",
"analyzer": "name_analyzer"
}
}
},
"django_id": {
"type": "integer"
},
"has_poster": {
"type": "boolean"
},
"imdb_id": {
"type": "string",
"index": "not_analyzed"
},
"kind": {
"type": "string",
"index": "not_analyzed"
},
"record_url_count": {
"type": "integer"
},
"release_date": {
"type": "date",
"format": "dateOptionalTime"
},
"release_year": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "pattern"
},
"tms_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
Analyzers:
"analysis": {
"analyzer": {
"keyword_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "keyword"
},
"name_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "whitespace"
}
}
}
Query:
{
"query": {
"match": {"cast.name": "leonardo dicaprio"}
}
}
First Page Result:
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 62,
"max_score": 12.046804,
"hits": [
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1077511",
"_score": 12.046804,
"_source": {
"imdb_id": "tt4007278",
"tms_id": "",
"record_url_count": 0,
"release_date": "2014-08-20",
"title": "Carbon",
"has_poster": false,
"release_year": 2014,
"django_id": 1077511,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "812919",
"_score": 11.906615,
"_source": {
"imdb_id": "tt2076929",
"tms_id": "",
"record_url_count": 0,
"title": "Satori",
"has_poster": false,
"release_year": 2014,
"django_id": 812919,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "376792",
"_score": 11.886408,
"_source": {
"imdb_id": "tt0402538",
"tms_id": "",
"record_url_count": 0,
"title": "Titanic: The Premiere",
"has_poster": true,
"release_year": 2000,
"django_id": 376792,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "306106",
"_score": 11.69776,
"_source": {
"imdb_id": "tt0325727",
"tms_id": "",
"record_url_count": 0,
"release_date": "1998-08-16",
"title": "Leo Mania",
"has_poster": true,
"release_year": 1998,
"django_id": 306106,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "269743",
"_score": 9.637444,
"_source": {
"imdb_id": "tt0286234",
"tms_id": "",
"record_url_count": 0,
"title": "Total Eclipse",
"has_poster": false,
"release_year": 1995,
"django_id": 269743,
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Agnieszka Holland"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "840945",
"_score": 9.358208,
"_source": {
"imdb_id": "tt2195237",
"tms_id": "",
"record_url_count": 0,
"release_date": "2004-12-01",
"title": "MovieReal: The Aviator",
"has_poster": false,
"release_year": 2004,
"django_id": 840945,
"kind": "series",
"cast": [
"Leonardo DiCaprio",
"Martin Scorsese"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "382168",
"_score": 9.358208,
"_source": {
"imdb_id": "tt0408269",
"tms_id": "",
"record_url_count": 0,
"release_date": "1998-09-29",
"title": "To Leo with Love",
"has_poster": true,
"release_year": 1998,
"django_id": 382168,
"kind": "movie",
"cast": [
"Jo Wyatt",
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "846212",
"_score": 7.2280827,
"_source": {
"imdb_id": "tt2218442",
"tms_id": "",
"record_url_count": 0,
"title": "Legacy of Secrecy",
"has_poster": false,
"release_year": 1947,
"django_id": 846212,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio",
"Robert De Niro",
"D'Anthony Palms"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "595027",
"_score": 7.1439695,
"_source": {
"imdb_id": "tt1294988",
"tms_id": "",
"record_url_count": 0,
"release_date": "2006-09-27",
"title": "Emporio Armani 'Red' One Night Only",
"has_poster": false,
"release_year": 2006,
"django_id": 595027,
"kind": "movie",
"cast": [
"Kim Cattrall",
"Leonardo DiCaprio",
"Beyoncé Knowles"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "752646",
"_score": 7.1439695,
"_source": {
"imdb_id": "tt1826731",
"tms_id": "",
"record_url_count": 0,
"release_date": "2009-06-02",
"title": "Lives of Quiet Desperation: The Making of Revolutionary Road",
"has_poster": false,
"release_year": 2009,
"django_id": 752646,
"kind": "movie",
"cast": [
"Kathy Bates",
"Leonardo DiCaprio",
"Kate Winslet"
]
}
}
]
}
}
UPDATE:
I disabled field length norm and that seems to have improved it a lot but they still aren't all the same. I'm still confused. According to what i've read there are three ways to determine relevancy:
Term frequency
Inverse document frequency
Field length norm (disabled)
Since each program only has Leonardo Dicaprio one time it seems to me that they should have identical scores but they don't. Maybe i'm misunderstanding. Here are the updated settings after disabling field length norm:
Mapping:
{
"programs": {
"mappings": {
"program_doc_type": {
"properties": {
"cast": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "keyword_analyzer",
"fields": {
"name": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "name_analyzer"
}
}
},
"django_id": {
"type": "integer"
},
"has_poster": {
"type": "boolean"
},
"imdb_id": {
"type": "string",
"index": "not_analyzed"
},
"kind": {
"type": "string",
"index": "not_analyzed"
},
"record_url_count": {
"type": "integer"
},
"release_date": {
"type": "date",
"format": "dateOptionalTime"
},
"release_year": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "pattern"
},
"tms_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
First Page Result:
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 836,
"max_score": 13.778852,
"hits": [
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "421026",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 421026,
"imdb_id": "tt0449557",
"has_poster": false,
"release_date": "2005-05-24",
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Jeffrey M. Schwartz",
"Donald L. Barlett",
"James B. Steele"
],
"release_year": 2005,
"record_url_count": 0,
"title": "The Affliction of Howard Hughes: Obsessive-Compulsive Disorder"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "555015",
"_score": 13.778852,
"_source": {
"tms_id": "MV002510340000",
"django_id": 555015,
"imdb_id": "tt1130884",
"has_poster": true,
"release_date": "2010-02-19",
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Mark Ruffalo",
"Ben Kingsley",
"Max von Sydow"
],
"release_year": 2010,
"record_url_count": 2,
"title": "Shutter Island"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "104669",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 104669,
"imdb_id": "tt0108330",
"has_poster": true,
"release_date": "1993-04-23",
"kind": "movie",
"cast": [
"Robert De Niro",
"Ellen Barkin",
"Leonardo DiCaprio",
"Jonah Blechman"
],
"release_year": 1993,
"record_url_count": 1,
"title": "This Boy's Life"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "846212",
"_score": 13.778852,
"_source": {
"django_id": 846212,
"title": "Legacy of Secrecy",
"imdb_id": "tt2218442",
"has_poster": false,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio",
"Robert De Niro",
"D'Anthony Palms"
],
"release_year": 1947,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "256632",
"_score": 13.778852,
"_source": {
"django_id": 256632,
"title": "The Movie Show",
"imdb_id": "tt0271918",
"has_poster": false,
"kind": "series",
"cast": [
"Ray Brady",
"Russell Crowe",
"Larry Day",
"Leonardo DiCaprio"
],
"release_year": 1986,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "269743",
"_score": 13.778852,
"_source": {
"django_id": 269743,
"title": "Total Eclipse",
"imdb_id": "tt0286234",
"has_poster": false,
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Agnieszka Holland"
],
"release_year": 1995,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1007190",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 1007190,
"imdb_id": "tt3391950",
"has_poster": false,
"release_date": "2013-12-29",
"kind": "series",
"cast": [
"Leonardo DiCaprio",
"Jonah Hill",
"Martin Scorsese",
"Terence Winter"
],
"release_year": 2013,
"record_url_count": 0,
"title": "The Hollywood Reporter in Focus"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1077511",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 1077511,
"imdb_id": "tt4007278",
"has_poster": false,
"release_date": "2014-08-20",
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
],
"release_year": 2014,
"record_url_count": 0,
"title": "Carbon"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "302615",
"_score": 13.57246,
"_source": {
"django_id": 302615,
"title": "Directors: James Cameron",
"imdb_id": "tt0322031",
"has_poster": true,
"kind": "movie",
"cast": [
"Michael Biehn",
"James Cameron",
"Jamie Lee Curtis",
"Leonardo DiCaprio"
],
"release_year": 1997,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "509785",
"_score": 13.57246,
"_source": {
"tms_id": "",
"django_id": 509785,
"imdb_id": "tt0923573",
"has_poster": false,
"release_date": "2003-05-06",
"kind": "movie",
"cast": [
"Frank Abagnale Jr.",
"Amy Adams",
"Nathalie Baye",
"Leonardo DiCaprio"
],
"release_year": 2003,
"record_url_count": 0,
"title": "'Catch Me If You Can': The Casting of the Film"
}
}
]
}
}
The results are MUCH improved but still the last 2 have different scores than the rest of the results.
Elasticsearch relevancy default model is called TF/IDF. You can read more about it here.
The _score you see in your search hits is calculated by this model.
Basically, the score is a result of a calculation on three factors (more info here):
Term frequency - How often does a term appear in a specific document? TF
Inverse document frequency - How often does the term appear in all documents in the collection? IDF
Field-length norm - How long is the field?
As you can infer from the above, because each document that contains leonardo dicaprio is different in its matching terms count, length of fields and matching terms count all over the index, its relevancy score is different.
Nevertheless, you get high scores for documents that contains leonardo dicaprio than those who doesn't.
Hope it helps.

Extract top visited websites from logs

We are storing log data containing information about sites that has been visited from our network. I had like to query the top 10 visited websites. How can I achieve this with ElasticSearch? The index mapping is as follows:
{
"data" : {
"properties" : {
"date": {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"status": {"type" : "string"},
"group": {"type" : "string"},
"ip": {"type" : "ip"},
"username":{"type" : "string"},
"category":{"type" : "string"},
"url":{"type" : "string"}
}
}
}
Sample Data:
"hits": {
"total": 7,
"max_score": 1,
"hits": [
{
"_index": "squid",
"_type": "data",
"_id": "AU_DT4_ibdcNyAnt753J",
"_score": 1,
"_source": {
"date": "2015-08-16T00:02:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "ads",
"url": "https://gmail.com/mail/u/0/#inbox"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DMjDpbdcNyAnt75iB",
"_score": 1,
"_source": {
"date": "2015-08-15T00:01:00.195Z",
"status": "BLOCK",
"group": "level3",
"ip": "10.249.10.51",
"username": "Fary",
"category": "ads",
"url": "https://gmail.com/details/blabla"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DT94kbdcNyAnt753Y",
"_score": 1,
"_source": {
"date": "2015-08-17T00:02:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "news",
"url": "http://aol.com"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_CwTEqbdcNyAnt74RJ",
"_score": 1,
"_source": {
"date": "2015-08-15T00:00:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "Blog",
"url": "http://gmail.com"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DMmUQbdcNyAnt75iQ",
"_score": 1,
"_source": {
"date": "2015-08-15T00:02:00.195Z",
"status": "PASS",
"group": "level3",
"ip": "10.249.10.51",
"username": "Fary",
"category": "ads",
"url": "http://yahoo.com/vbfhghfgjfdgfd"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DT1yjbdcNyAnt753B",
"_score": 1,
"_source": {
"date": "2015-08-16T00:02:00.195Z",
"status": "REDIR",
"group": "level3",
"ip": "10.249.10.49",
"username": "Hyder",
"category": "ads",
"url": "http://news.yahoo.com/"
}
},
{
"_index": "squid",
"_type": "data",
"_id": "AU_DMV1ObdcNyAnt75hd",
"_score": 1,
"_source": {
"date": "2015-08-15T00:01:00.195Z",
"status": "BLOCK",
"group": "level3",
"ip": "10.249.10.50",
"username": "Kamal",
"category": "Blog",
"url": "http://hotmail.com/dfdgfgfdg"
}
}
]
What I had like to have:
Top visited sites:
- **Sites - Hits**
- gmail.com - 3
- yahoo.com - 2
- hotmail.com - 1
- aol.com - 1
First you need to extract the base site ( Like gmail.com ) from the URL field before indexing and add it to a new field. Lets assume this new field is baseSite.
Then , you need to follow what is exactly told in this blog.
First make the field baseSite as not_analyzed and then do a terms aggregation on that field.

Resources