why my elasticsearch search slow? - elasticsearch

I have the following mapping in elasticsearch5.5:
"append": {
"type": "short"
},
"comment": {
"type": "text",
"index": "analyzed"
},
"create_time": {
"type": "date"
},
"desc_score": {
"type": "integer"
},
"goods_id": {
"type": "long"
},
"goods_name": {
"type": "keyword",
"index": "not_analyzed"
},
"handler": {
"type": "keyword",
"index": "not_analyzed"
},
"is_first_review": {
"type": "short"
},
"logistic_score": {
"type": "integer"
},
"mall_id": {
"type": "long"
},
"order_id": {
"type": "long"
},
"order_sn": {
"type": "keyword",
"index": "not_analyzed"
},
"parent_id": {
"type": "keyword",
"index": "not_analyzed"
},
"review_id": {
"type": "long"
},
"score": {
"type": "integer"
},
"service_score": {
"type": "integer"
},
"shipping_id": {
"type": "long"
},
"shipping_name": {
"type": "keyword",
"index": "not_analyzed"
},
"status": {
"type": "short"
},
"tracking_number": {
"type": "keyword",
"index": "not_analyzed"
},
"update_time": {
"type": "date"
},
"user_id": {
"type": "long"
}
When I profile in kibana with the following statement and got the result in the picture.
{
"from": 0,
"size": 10,
"query": {
"bool": {
"filter": [
{
"terms": {
"goods_id": [
"262628158"
],
"boost": 1.0
}
},
{
"terms": {
"status": [
"2",
"4"
],
"boost": 1.0
}
},
{
"range": {
"create_time": {
"from": "1514027649",
"to": "1514632449",
"include_lower": true,
"include_upper": true,
"boost": 1.0
}
}
}
],
"disable_coord": false,
"adjust_pure_negative": true,
"boost": 1.0
}
},
"sort": [
{
"create_time": {
"order": "desc"
}
}
]
}
I am confused why status filter cost so much time, and status is a field with only five value, this is perpahps the reason cause the problem but I do not know how to optimize my search clause. I have search a lot with google but got no answer yet. Any one can help me?

Finally I find the answer, it is because elasticsearch5.5 use kd-tree to store index data for numeric data type(short, long, etc.) for optimize range search which can be found in following article:
Better Query Planning For Range Queries
Tune For Search Speed
Elasticsearch Query Execution Order
Kd-tree is not suitable for exact term, and I changed the filed type to keyword.

Related

Elasticsearch match or term always failing

I have a problem with a query that return no result. When I execute the following query either with match or term :
{
"size": 1,
"query": {
"bool": {
"must": [
{ "term": { "ALERT_TYPE.raw": "ERROR" }}
],
"filter": [
{ "range": {
"#timestamp": {
"gte": "2018-02-01T00:00:01.000Z",
"lte": "2018-02-28T23:55:55.000Z"
}
}}
]
}
}
}
I always got the following response, :
{
"took": 92,
"timed_out": false,
"_shards": {
"total": 215,
"successful": 215,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
But i'm sure the element is present because when i do a match_all query, the first hit is the following :
{
"took": 269,
"timed_out": false,
"_shards": {
"total": 210,
"successful": 210,
"failed": 0
},
"hits": {
"total": 68292,
"max_score": 1,
"hits": [
{
"_index": "logstash-2018.02.22",
"_type": "alert",
"_id": "AWEdVphtJjppDZ0FiAz-",
"_score": 1,
"_source": {
"#version": "1",
"#timestamp": "2018-02-22T10:07:41.549Z",
"path": "/something",
"host": "host.host",
"type": "alert",
"SERVER_TYPE": "STANDALONE",
"LOG_FILE": "log.log",
"DATE": "2018-02-22 11:02:02,367",
"ALERT_TYPE": "ERROR",
"MESSAGE": "There is an error"
}
}
]
}
}
Here I can see the field is the value that I am expecting. And from the mapping I know the field is analyzed by the default analyser and the raw field is not analysed (Thanks to the answer of Glenn Van Schil). The mapping is generated dynamically by logstash but it looks like this for the type i'm looking into:
"alert": {
"_all": {
"enabled": true,
"omit_norms": true
},
"dynamic_templates": [
{
"message_field": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"fielddata": { "format": "disabled" },
"type": "string"
},
"match": "message",
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"fielddata": { "format": "disabled" },
"type": "string",
"fields": {
"raw": {
"index": "not_analyzed",
"ignore_above": 256,
"type": "string"
}
}
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"properties": {
"#timestamp": { "type": "date", "format": "strict_date_optional_time||epoch_millis" },
"#version": { "type": "string", "index": "not_analyzed" },
"ALERT_TYPE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"DATE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"LOG_FILE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"MESSAGE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"SERVER_TYPE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": { "type": "ip" },
"latitude": { "type": "float" },
"location": { "type": "geo_point" },
"longitude": { "type": "float" }
}
},
"host": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"path": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"type": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
Does anyone have a clue about why this query keep returning nothing ? Maybe there is something in the mapping that i am missing which explain why the match or term query keep failing ? I'm running out of idea about what is happenning and i'm quite new to elasticsearch and logstash.
Versions of tools and environment :
OS: RHEL Server 6.5 (Santiago)
Java: 1.7.0_91
Elasticsearch: 2.4.6
Lucene: 5.5.4
Logstash: 2.4.1
This is not really an answer, but it was to complicated to write this as a comment.
from the mapping i know the field is not analysed.
You are searching for ALERT_TYPE, but this one is in fact analyzed with the default analyzer since you did not specify any analyzer directly under your ALERT_TYPE's mapping.
However, your ALERT_TYPE has an internal field named raw that is not analyzed. If you want to search documents using the raw field you'll need to change the query from
"must": [
{ "term": { "ALERT_TYPE": "ERROR" }}
]
to
"must": [
{ "term": { "ALERT_TYPE.raw": "ERROR" }}
]

Elasticsearch geo_distance in combination with other queries

Hello I have a problem with the combination of multiple queries within Elasticsearch.
The problem only occurs whenever I try to combine a multi_match query with the geo_distance query. The multi_match query works when the geo_distance query is not present and the geo_distance query works when the multi_match query is not present.
Whenever I execute the multi_match query without the geo_distance query I get the results that I expect. I also get the expected results when I try the geo_distance query without the multi_match query.
Boths results contain the dataset that I would expect to receive when both queries are executed together. But whenever I execute them together I receive 0 results.
When I combine the geo_distance query with a simple term query the search works. So I presume it is problem with the combination of queries.
I would appreciate any ideas.
My query is the following:
{
"query": {
"bool": {
"must": {
"bool": {
"should": {
"multi_match": {
"query": "CompanyName GmbH",
"fields": [
"originalName",
"legalName"
],
"type": "cross_fields",
"operator": "AND"
}
}
}
},
"filter": {
"bool": {
"should": {
"geo_distance": {
"location": [
9.87107,
51.69915
],
"distance": "30.0km",
"distance_type": "arc"
}
}
}
}
}
}
}
The mapping behind all of that is:
{
"customer": {
"aliases": {
},
"mappings": {
"customer-entity": {
"properties": {
"communication": {
"properties": {
"domain": {
"type": "string"
},
"email": {
"type": "string"
},
"landline": {
"type": "string"
},
"mobile": {
"type": "string"
}
}
},
"id": {
"type": "long"
},
"legalName": {
"type": "string",
"store": true
},
"location": {
"type": "geo_point"
},
"operatingModes": {
"type": "string"
},
"originalName": {
"type": "string",
"store": true
}
}
},
"homepage-entity": {
"_parent": {
"type": "customer-entity"
},
"_routing": {
"required": true
},
"properties": {
"customerId": {
"type": "string",
"store": true
},
"id": {
"type": "long"
},
"metas": {
"type": "string",
"store": true
}
}
},
"person-entity": {
"_parent": {
"type": "customer-entity"
},
"_routing": {
"required": true
},
"properties": {
"customerId": {
"type": "string",
"store": true
},
"firstName": {
"type": "string",
"store": true
},
"id": {
"type": "long"
},
"lastName": {
"type": "string",
"store": true
},
"personId": {
"type": "string",
"store": true
}
}
}
},
"settings": {
"index": {
"refresh_interval": "-1",
"number_of_shards": "1",
"creation_date": "1488920698118",
"store": {
"type": "fs"
},
"number_of_replicas": "0",
"uuid": "ZcLN5sxASXGUnKZMg8mBpw",
"version": {
"created": "2040499"
}
}
},
"warmers": {
}
}
}

Searching on fields of a nested object on elasticsearch

I have this mapping on ES 1.7.3:
{
"customer": {
"aliases": {},
"mappings": {
"customer": {
"properties": {
"addresses": {
"type": "nested",
"include_in_parent": true,
"properties": {
"address1": {
"type": "string"
},
"address2": {
"type": "string"
},
"address3": {
"type": "string"
},
"country": {
"type": "string"
},
"latitude": {
"type": "double",
"index": "not_analyzed"
},
"longitude": {
"type": "double",
"index": "not_analyzed"
},
"postcode": {
"type": "string"
},
"state": {
"type": "string"
},
"town": {
"type": "string"
},
"unit": {
"type": "string"
}
}
},
"companyNumber": {
"type": "string"
},
"id": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "string"
},
"status": {
"type": "string"
},
"timeCreated": {
"type": "date",
"format": "dateOptionalTime"
},
"timeUpdated": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
},
"settings": {
"index": {
"refresh_interval": "1s",
"number_of_shards": "5",
"creation_date": "1472372294516",
"store": {
"type": "fs"
},
"uuid": "RxJdXvPWSXGpKz8pdcF91Q",
"version": {
"created": "1050299"
},
"number_of_replicas": "1"
}
},
"warmers": {}
}
}
The spring application generates this query:
{
"query": {
"bool": {
"should": {
"query_string": {
"query": "(addresses.\\*:sample* AND NOT status:ARCHIVED)",
"fields": [
"type",
"name",
"companyNumber",
"status",
"addresses.unit",
"addresses.address1",
"addresses.address2",
"addresses.address3",
"addresses.town",
"addresses.state",
"addresses.postcode",
"addresses.country"
],
"default_operator": "or",
"analyze_wildcard": true
}
}
}
}
}
on which "addresses.*:sample*" is the only input.
"query": "(sample* AND NOT status:ARCHIVED)"
Code above works but searches all fields of the customer object.
Since I want to search only on address fields I used the "addresses.*"
Query works only if the fields of the address object are of String type and before I added longitude and latitude fields of double type on address object. Now the error occurs because of these two new fields.
Error:
Parse Failure [Failed to parse source [{
"query": {
"bool": {
"should": {
"query_string": {
"query": "(addresses.\\*:sample* AND NOT status:ARCHIVED)",
"fields": [
"type",
"name",
"companyNumber","country",
"state",
"status",
"addresses.unit",
"addresses.address1",
"addresses.address2",
"addresses.address3",
"addresses.town",
"addresses.state",
"addresses.postcode",
"addresses.country",
],
"default_operator": "or",
"analyze_wildcard": true
}
}
}
}
}
]]
NumberFormatException[For input string: "sample"
Is there a way to search "String" fields within a nested object using addresses.* only?
The solution was to add "lenient": true. As per the documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
lenient - If set to true will cause format based failures (like providing text to a numeric field) to be ignored.

ElasticSearch term query vs query_string?

When I query my index with query_string, I am getting results
But when I query using term query, I dont get any results
{
"query": {
"bool": {
"must": [],
"must_not": [],
"should": [
{
"query_string": {
"default_field": "Printer.Name",
"query": "HL-2230"
}
}
]
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
I know that term is not_analyzed and query_string is analyzed but Name is already as "HL-2230", why doesnt it match with term query? I tried also searching with "hl-2230", I still didnt get any result.
EDIT: mapping looks like as below. Printer is the child of Product. Not sure if this makes difference
{
"state": "open",
"settings": {
"index": {
"creation_date": "1453816191454",
"number_of_shards": "5",
"number_of_replicas": "1",
"version": {
"created": "1070199"
},
"uuid": "TfMJ4M0wQDedYSQuBz5BjQ"
}
},
"mappings": {
"Product": {
"properties": {
"index": "not_analyzed",
"store": true,
"type": "string"
},
"ProductName": {
"type": "nested",
"properties": {
"Name": {
"store": true,
"type": "string"
}
}
},
"ProductCode": {
"type": "string"
},
"Number": {
"index": "not_analyzed",
"store": true,
"type": "string"
},
"id": {
"index": "no",
"store": true,
"type": "integer"
},
"ShortDescription": {
"store": true,
"type": "string"
},
"Printer": {
"_routing": {
"required": true
},
"_parent": {
"type": "Product"
},
"properties": {
"properties": {
"RelativeUrl": {
"index": "no",
"store": true,
"type": "string"
}
}
},
"PrinterId": {
"index": "no",
"store": true,
"type": "integer"
},
"Name": {
"store": true,
"type": "string"
}
}
},
"aliases": []
}
}
As per mapping provided by you above
"Name": {
"store": true,
"type": "string"
}
Name is analysed. So HL-2230 will split into two tokens, HL and 2230. That's why term query is not working and query_string is working. When you use term query it will search for exact term HL-2230 which is not there.

Elasticsearch big data

I`m new in elasticsearch and I have problem.
I have 1 million rows of data and query result take too long.
Went I have 150k it was taking 0.5s , now is taking 10sec.
Each days, number of data is different (One day can be 150k, other 1 million and etc.)
I need advice how to make it faster.
Mapping
{
"mappings": {
"Jobs": {
"_ttl": {
"enabled": true,
"default": "1d"
},
"properties": {
"id": {
"type": "integer"
},
"advertiser_id": {
"type": "integer"
},
"company_id": {
"type": "integer"
},
"feed_id": {
"type": "integer"
},
"description_unique": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"city": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"county": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"country": {
"type": "integer"
},
"description": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"store": true
}
}
},
"company": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"url": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"premium": {
"type": "integer"
},
"bid": {
"type": "integer"
},
"created": {
"type": "date",
"format": "dateOptionalTime",
"default": "basic_date"
},
"updated": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
Query
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Survey Developer",
"type": "best_fields",
"fields": [
"title",
"description"
],
"operator": "and"
}
}
]
}
},
"highlight": {
"boundary_chars": ".,!? \t\n",
"tag_schema": "styled",
"pre_tags": [
"<b>"
],
"post_tags": [
"</b>"
],
"fields": {
"description": {
"fragment_size": 200,
"number_of_fragments": 3
}
}
},
"sort": [
{
"premium": {
"order": "desc"
}
},
{
"bid": {
"order": "desc"
}
}
]
}
Server parameters:
CPU 1 vCPU
RAM 1 GB
System Disk 40 GB
Network 120 Mb/s

Resources