logstash transfer data to elasticsearch Parsing date error - elasticsearch

this is my Mysql.config
input {
stdin {
}
jdbc {
jdbc_connection_string => "jdbc:mysql://localhost:3306/xc_course?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=UTC"
# the user we wish to excute our statement as
jdbc_user => "root"
jdbc_password => "1234"
# the path to our downloaded jdbc driver
jdbc_driver_library =>"/usr/local/elasticsearch/logstash/mysql-connector-java-5.1.4.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
#statement_filepath => "/conf/course.sql"
statement => "select * from course_pub where timestamp > date_add(:sql_last_value,INTERVAL 8 HOUR)"
schedule => "* * * * *"
record_last_run => true
last_run_metadata_path => "/usr/local/elasticsearch/elasticsearch-6.2.1/config/logstash_metadata"
}
}
output {
elasticsearch {
hosts => "localhost:9200"
#hosts => ["localhost:9200","localhost:9202","localhost:9203"]
index => "xc_course"
document_id => "%{id}"
document_type => "doc"
template =>"/usr/local/elasticsearch/logstash-6.2.1/config/xc_course_template.json"
template_name =>"xc_course"
template_overwrite =>"true"
}
stdout {
codec => json_lines
}
}
this is my template.json
{
"mappings": {
"doc": {
"properties": {
"charge": {
"type": "keyword"
},
"description": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"end_time": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"expires": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"grade": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"mt": {
"type": "keyword"
},
"name": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"pic": {
"index": false,
"type": "keyword"
},
"price": {
"type": "float"
},
"price_old": {
"type": "float"
},
"pub_time": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"qq": {
"index": false,
"type": "keyword"
},
"st": {
"type": "keyword"
},
"start_time": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"status": {
"type": "keyword"
},
"studymodel": {
"type": "keyword"
},
"teachmode": {
"type": "keyword"
},
"teachplan": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"users": {
"index": false,
"type": "text"
},
"valid": {
"type": "keyword"
}
}
}
},
"template": "xc_course"
}
this is my elasticSearch index_mapping
{
"properties": {
"description": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"grade": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"mt": {
"type": "keyword"
},
"name": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"users": {
"index": false,
"type": "text"
},
"charge": {
"type": "keyword"
},
"valid": {
"type": "keyword"
},
"pic": {
"index": false,
"type": "keyword"
},
"qq": {
"index": false,
"type": "keyword"
},
"price": {
"type": "float"
},
"price_old": {
"type": "float"
},
"st": {
"type": "keyword"
},
"status": {
"type": "keyword"
},
"studymodel": {
"type": "keyword"
},
"teachmode": {
"type": "keyword"
},
"teachplan": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"expires": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
},
"pub_time": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
},
"start_time": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
},
"end_time": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
}
}
}
but when it start
Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>"4028e58161bd3b380161bd3bcd2f0000", :_index=>"xc_course", :_type=>"doc", :_routing=>nil}, #LogStash::Event:0x62e0a8f9], :response=>{"index"=>{"_index"=>"xc_course", "_type"=>"doc", "_id"=>"4028e58161bd3b380161bd3bcd2f0000", "status"=>400, "error"=>{"type"=>"mapper_parsing_exception", "reason"=>"failed to parse [start_time]", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"Invalid format: \"2019-12-20T15:18:13.000Z\" is malformed at \"-12-20T15:18:13.000Z\""}}}}}
failed to parse [start_time]", "caused_by"=>{"type"=>"illegal_argument_exception"
** "Invalid format: \"2019-12-20T15:18:13.000Z\" is malformed at \"-12-20T15:18:13.000Z\""**
but inmy data base my data type is this 2019-12-28 19:24:41
when i make date to null ,its worke ,but like 2019-12-28 19:24:41 would Parsing error
so how to deal it

The jdbc input will automatically change date columns to LogStash::Timestamp type, but your index template expects them to be text. Remove the "format" from the date fields in your index template.

Related

How to migrate data from ES2.1 to ES7 by logstash

[ERROR][logstash.outputs.elasticsearch][main] Failed to install
template {:message=>"Got response code '400' contacting Elasticsearch
at URL 'http://127.0.0.1:9200/_template/ecs-logstash'",
:exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError
original ES version: 2.1.2 new ES version: 7.13 logstash version: 8.1.1
I have below index on my ES2
"designs_v1": {
"mappings": {
"design": {
"dynamic": "false",
"_all": {
"enabled": false
},
"_id": {
"store": true,
"index": "not_analyzed"
},
"_timestamp": {
"enabled": true,
"store": true
},
"properties": {
"createDate": {
"type": "date",
"fielddata": {
"__comment": "Used for sorting",
"loading": "eager"
},
"format": "epoch_millis||date_time"
},
"designId": {
"type": "long",
"fielddata": {
"__comment": "Used for sorting to break ties and accessed by our custom scorer",
"loading": "eager"
}
},
"editorialTags": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "shingle"
},
"stemmed": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "kstem"
}
}
},
"isPersonalizable": {
"type": "boolean"
},
"legalBlockTags": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"memberId": {
"type": "long"
},
"pixel_height": {
"type": "integer"
},
"pixel_width": {
"type": "integer"
},
"products": {
"type": "nested",
"properties": {
"caption": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"defaultThumbnail": {
"type": "integer"
},
"description": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"hasPersonalizableSvg": {
"type": "boolean"
},
"imageOneId": {
"type": "long"
},
"imageTwoId": {
"type": "long"
},
"locations": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "keyword"
},
"productId": {
"type": "long"
},
"productTypeId": {
"type": "integer",
"fielddata": {
"__comment": "Used during aggregations",
"loading": "eager"
}
},
"showColorId": {
"type": "integer"
},
"storeId": {
"type": "long"
}
}
},
"sellerTags": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "shingle"
},
"stemmed": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "kstem"
}
}
}
}
}
}
}
I created new index in ES7:
{
"mappings": {
// "_id": {
// "store": true,
// "index": "not_analyzed"
// },
// "_timestamp": {
// "enabled": true,
// "store": true
// },
"properties": {
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"designId": {
"type": "long"
},
"editorialTags": {
"type": "text",
"norms": true,
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "text",
"norms": true,
"analyzer": "shingle"
},
"stemmed": {
"type": "text",
"norms": true,
"analyzer": "kstem"
}
}
},
"isPersonalizable": {
"type": "boolean"
},
"legalBlockTags": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"memberId": {
"type": "long"
},
"pixel_height": {
"type": "integer"
},
"pixel_width": {
"type": "integer"
},
"products": {
"type": "nested",
"properties": {
"caption": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"defaultThumbnail": {
"type": "integer"
},
"description": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"hasPersonalizableSvg": {
"type": "boolean"
},
"imageOneId": {
"type": "long"
},
"imageTwoId": {
"type": "long"
},
"locations": {
"type": "text",
"norms": false,
"analyzer": "keyword"
},
"productId": {
"type": "long"
},
"productTypeId": {
"type": "integer"
},
"showColorId": {
"type": "integer"
},
"storeId": {
"type": "long"
}
}
},
"sellerTags": {
"type": "text",
"norms": true,
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "text",
"norms": true,
"analyzer": "shingle"
},
"stemmed": {
"type": "text",
"norms": true,
"analyzer": "kstem"
}
}
}
}
}
}
I want to migrate data to ES7 by logstash, below is my logstash conf file:
input {
elasticsearch {
hosts => ["http://xxx:9200"]
index => "designs_v1"
type => "design"
size => 10
scroll => "1m"
}
}
filter {
}
output {
elasticsearch {
hosts => ["http://127.0.0.1:9200"]
index => "designs_v1"
#document_type => "%{[#metadata][_type]}"
document_id => "%{[#metadata][_id]}"
}
}
But I can't fix issue:
[ERROR][logstash.outputs.elasticsearch][main] Failed to install
template {:message=>"Got response code '400' contacting Elasticsearch
at URL 'http://127.0.0.1:9200/_template/ecs-logstash'",
:exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError
What can I do now?

Kibana don't store the right data into the right fiels

Ok so now I've my mapping into kibana.
Here's my mapping:
PUT logstash-2019.05.09
{
"mappings": {
"doc": {
"properties": {
"index": {
"_index": {
"type": "keyword"
},
"_type": {
"type": "text"
}
},
"#timestamp": {
"type": "date"
},
"ip": {
"type": "ip"
},
"extension": {
"type": "text"
},
"response": {
"type": "text"
},
"geo": {
"coordinates": {
"type": "geo_point"
},
"src": {
"type": "text"
},
"dest": {
"type": "text"
},
"srcdest": {
"type": "text"
}
},
"tags": {
"type": "text"
},
"utc_time": {
"type": "date"
},
"referer": {
"type": "text"
},
"agent": {
"type": "text"
},
"clientip": {
"type": "ip"
},
"bytes": {
"type": "integer"
},
"host": {
"type": "text"
},
"request": {
"type": "text"
},
"url": {
"type": "text"
},
"#message": {
"type": "text"
},
"spaces": {
"type": "text"
},
"xss": {
"type": "text"
},
"links": {
"type": "text"
},
"relatedContent": {
"url": {
"type": "text"
},
"og:type": {
"type": "text"
},
"og:title": {
"type": "text"
},
"og:description": {
"type": ""
},
"og:url": {
"type": ""
},
"article:published_time": {
"type": "date"
},
"article:modified_time": {
"type": "date"
},
"article:section": {
"type": "keyword"
},
"article:tag": {
"type": "text"
},
"og:image": {
"type": "text"
},
"og:image:height": {
"type": "integer"
},
"og:image:width": {
"type": "integer"
},
"og:site_name": {
"type": "text"
},
"twitter:title": {
"type": "text"
},
"twitter:description": {
"type": "text"
},
"twitter:card": {
"type": "keyword"
},
"twitter:image": {
"type": "text"
},
"twitter:site": {
"type": "keyword"
}
},
"machine": {
"os": {
"type": "text"
},
"ram": {
"type": "integer"
}
},
"#version": {
"type": "integer"
}
}
}
}
}
But I don't know why, Kibana don't store the right information into the right field. He just put all the information into a message field. I think it's because I've a dynamic mapping by default, I'm not really sure. Here's the result :
Result (table)
result (json):
{
"_index": "logstash-2019.05.09",
"_type": "doc",
"_id": "9zfam2oBWngGU4Wy3Id5",
"_version": 1,
"_score": null,
"_source": {
"#version": "1",
"#timestamp": "2019-05-09T09:09:32.167Z",
"path": "/home/secunix/logs/TestLogPourMapping_09_05.json",
"message": "{\"#timestamp\":\"2019-05-07T09:56:33.996Z\",\"ip\":\"181.144.250.19\",\"extension\":\"jpg\",\"response\":\"200\",\"geo\":{\"coordinates\":{\"lat\":44.12326,\"lon\":-123.2186856},\"src\":\"IN\",\"dest\":\"CN\",\"srcdest\":\"IN:CN\"},\"#tags\":[\"success\",\"info\"],\"utc_time\":\"2019-05-07T09:56:33.996Z\",\"referer\":\"http://www.slate.com/success/thomas-marshburn\",\"agent\":\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.50 Safari/534.24\",\"clientip\":\"181.144.250.19\",\"bytes\":2553,\"host\":\"media-for-the-masses.theacademyofperformingartsandscience.org\",\"request\":\"/uploads/fyodor-yurchikhin.jpg\",\"url\":\"https://media-for-the-masses.theacademyofperformingartsandscience.org/uploads/fyodor-yurchikhin.jpg\",\"#message\":\"181.144.250.19 - - [2019-05-07T09:56:33.996Z] \\\"GET /uploads/fyodor-yurchikhin.jpg HTTP/1.1\\\" 200 2553 \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.50 Safari/534.24\\\"\",\"spaces\":\"this is a thing with lots of spaces wwwwoooooo\",\"xss\":\"<script>console.log(\\\"xss\\\")</script>\",\"headings\":[\"<h3>ulrich-walter</h5>\",\"http://www.slate.com/success/susan-still-kilrain\"],\"links\":[\"viktor-m-afanasyev#twitter.com\",\"http://twitter.com/security/stephen-oswald\",\"www.twitter.com\"],\"relatedContent\":[],\"machine\":{\"os\":\"win xp\",\"ram\":6442450944},\"#version\":\"1\"}\r",
"host": "qvisbcld0051"
},
"fields": {
"#timestamp": [
"2019-05-09T09:09:32.167Z"
]
},
"sort": [
1557392972167
]
}
And that's what I have when I check my mapping:
{
"mapping": {
"doc": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"norms": false,
"type": "text"
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"norms": false,
"type": "text"
}
}
}
],
"properties": {
"#message": {
"type": "text"
},
"#timestamp": {
"type": "date"
},
"#version": {
"type": "integer"
},
"agent": {
"type": "text"
},
"bytes": {
"type": "integer"
},
"clientip": {
"type": "ip"
},
"extension": {
"type": "text"
},
"geo": {
"properties": {
"coordinates": {
"type": "geo_point"
},
"dest": {
"type": "text"
},
"src": {
"type": "text"
},
"srcdest": {
"type": "text"
}
}
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
}
}
},
"host": {
"type": "text"
},
"ip": {
"type": "ip"
},
"links": {
"type": "text"
},
"machine": {
"properties": {
"os": {
"type": "text"
},
"ram": {
"type": "integer"
}
}
},
"message": {
"type": "text",
"norms": false
},
"path": {
"type": "text",
"norms": false,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"referer": {
"type": "text"
},
"relatedContent": {
"properties": {
"article:modified_time": {
"type": "date"
},
"article:published_time": {
"type": "date"
},
"article:section": {
"type": "keyword"
},
"article:tag": {
"type": "text"
},
"og:description": {
"type": "text"
},
"og:image": {
"type": "text"
},
"og:image:height": {
"type": "integer"
},
"og:image:width": {
"type": "integer"
},
"og:site_name": {
"type": "text"
},
"og:title": {
"type": "text"
},
"og:type": {
"type": "text"
},
"og:url": {
"type": "text"
},
"twitter:card": {
"type": "keyword"
},
"twitter:description": {
"type": "text"
},
"twitter:image": {
"type": "text"
},
"twitter:site": {
"type": "keyword"
},
"twitter:title": {
"type": "text"
},
"url": {
"type": "text"
}
}
},
"request": {
"type": "text"
},
"response": {
"type": "text"
},
"spaces": {
"type": "text"
},
"tags": {
"type": "text"
},
"url": {
"type": "text"
},
"utc_time": {
"type": "date"
},
"xss": {
"type": "text"
}
}
},
"_default_": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"norms": false,
"type": "text"
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"norms": false,
"type": "text"
}
}
}
],
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "keyword"
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
}
}
}
}
}
}
}
I send my data thanks to logstash, so here's the conf of the input:
input {
beats {
port => 5044
tags => "fromBeats"
}
file {
path => [
"/home/secunix/logs/*",
"/tech/*"
]
start_position => "beginning"
sincedb_path => "/dev/null"
}
tcp {
port => 5514
type => "syslog"
tags => "from Syslog-ng"
}
}
filter {
if [type] == "syslog"{
grok {
match => ["message", "<(?<sys_priority>\d+?)>(?<syslog_timestamp>%{CISCOTIMESTAMP})\s(?<logsource>%{URIHOST})(\s(?:(?<application>.*?)(%(?<project>.*?))?))?:(?:\s)?(?<logmessage>.*$)"]
}
if [logmessage] {
mutate {
replace => [ "message", "%{logmessage}" ]
remove_field => [ "logmessage" ]
}
}
if [project] {
mutate {
replace => [ "type", "%{project}" ]
remove_field => [ "project" ]
}
}else if [application] {
mutate {
lowercase => [ "application" ]
}
mutate {
gsub => [ "application", " ", "_" ]
}
mutate {
replace => [ "type", "%{application}" ]
}
}else {
mutate {
replace => [ "type", "uknapp" ]
add_field => { "application" => "uknapp" }
}
}
}
}
and the output:
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "logstash-%{+YYYY.MM.dd}"
#+++ sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
user => "logstash"
#--- sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
#+++ sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
password => "logstash"
#--- sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
}
}
Can someone tell me how I can fix this please ?

Update Mapping of existing Index in Elasticsearch

I am totally new to elastic search. So please forgive me if this is a stupid Question and my Questions might have been answered somewhere else already but I couldn't find it.
I want to use Elastic Search as a search engine for PDF'S and docx's in my network. I used fscrawler to ingest the PDF's to elastic search. Since the documents I want to ingest are in several languages I wanted to use n-graming for stemming. To do so I wanted to update my mapping like this
PUT test/_mappings/_all
{
"mappings": {
"title": {
"properties": {
"title": {
"type": "text",
"fields": {
"de": {
"type": "string",
"analyzer": "german"
},
"en": {
"type": "string",
"analyzer": "english"
},
"general": {
"type": "string",
"analyzer": "trigrams"
}
}
}
}
}
}
}
And now I get this Errormessage
{ "error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [mappings : {title={properties={title={type=text,
fields={de={type=string, analyzer=german}, en={type=string,
analyzer=english}, general={type=string, analyzer=trigrams}}}}}}]"
}
],
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [mappings : {title={properties={title={type=text,
fields={de={type=string, analyzer=german}, en={type=string,
analyzer=english}, general={type=string, analyzer=trigrams}}}}}}]"
}, "status": 400 }
Do you have any idea how i can fix this? Or do you have an idea how I can ingest the files with the right mapping without using fscrawler?
those are my settings
{
"test": {
"settings": {
"index": {
"mapping": {
"total_fields": {
"limit": "2000"
}
},
"number_of_shards": "5",
"provided_name": "test",
"creation_date": "1542031632596",
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": "3",
"max_gram": "3"
}
},
"analyzer": {
"fscrawler_path": {
"tokenizer": "fscrawler_path"
},
"trigrams": {
"filter": [
"lowercase",
"trigrams_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"tokenizer": {
"fscrawler_path": {
"type": "path_hierarchy"
}
}
},
"number_of_replicas": "1",
"uuid": "7L3QE5_xRACECVbTFlFY-Q",
"version": {
"created": "6040399"
}
}
}
}
}
My mapping
{
"test": {
"mappings": {
"_doc": {
"dynamic_templates": [
{
"raw_as_text": {
"path_match": "meta.raw.*",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
}
],
"properties": {
"attachment": {
"type": "binary"
},
"attributes": {
"properties": {
"group": {
"type": "keyword"
},
"owner": {
"type": "keyword"
}
}
},
"content": {
"type": "text"
},
"file": {
"properties": {
"checksum": {
"type": "keyword"
},
"content_type": {
"type": "keyword"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"extension": {
"type": "keyword"
},
"filename": {
"type": "keyword",
"store": true
},
"filesize": {
"type": "long"
},
"indexed_chars": {
"type": "long"
},
"indexing_date": {
"type": "date",
"format": "dateOptionalTime"
},
"last_accessed": {
"type": "date",
"format": "dateOptionalTime"
},
"last_modified": {
"type": "date",
"format": "dateOptionalTime"
},
"url": {
"type": "keyword",
"index": false
}
}
},
"meta": {
"properties": {
"altitude": {
"type": "text"
},
"author": {
"type": "text"
},
"comments": {
"type": "text"
},
"contributor": {
"type": "text"
},
"coverage": {
"type": "text"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"creator_tool": {
"type": "keyword"
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"description": {
"type": "text"
},
"format": {
"type": "text"
},
"identifier": {
"type": "text"
},
"keywords": {
"type": "text"
},
"language": {
"type": "keyword"
},
"latitude": {
"type": "text"
},
"longitude": {
"type": "text"
},
"metadata_date": {
"type": "date",
"format": "dateOptionalTime"
},
"modifier": {
"type": "text"
},
"print_date": {
"type": "date",
"format": "dateOptionalTime"
},
"publisher": {
"type": "text"
},
"rating": {
"type": "byte"
},
"relation": {
"type": "text"
},
"rights": {
"type": "text"
},
"source": {
"type": "text"
},
"title": {
"type": "text"
},
"type": {
"type": "text"
}
}
},
"path": {
"properties": {
"real": {
"type": "keyword",
"fields": {
"fulltext": {
"type": "text"
},
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
}
}
},
"root": {
"type": "keyword"
},
"virtual": {
"type": "keyword",
"fields": {
"fulltext": {
"type": "text"
},
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
}
}
}
}
}
}
}
}
}
}

Why does the keyword type take up much more space than text in elasticsearch?

env: ElasticSearch 5.5.1
First there are two indexs in my elasticsearch
and the only different of two index is the message field, the field's type of message in index1 is keyword, and in index2 is text.
To ensure that it is not affected by other fields,I remove the message field and compare before and after result:
Before remove message field:
after remove message field i got:
Obvious the message field takes up a lot of space,and the type of keyword take up much more than text,but I don't know why keyword take up much more size than text?
so, is there anyone help me ?
Following is the index of index1's mapping info:
"mappings": {
"system": {
"dynamic": "true",
"_all": {
"enabled": false
},
"dynamic_date_formats": [
"yyyy-MM-dd HH:mm:ss.SSS"
],
"dynamic_templates": [
{
"geo2": {
"match": "*_geo",
"mapping": {
"type": "geo_point"
}
}
},
{
"strings2": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
],
"numeric_detection": false,
"properties": {
"#agent_timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"#timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"Kafkaspeed": {
"type": "keyword"
},
"_index_name": {
"type": "keyword"
},
"count": {
"type": "long"
},
"datex": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"defaultWriteESspeed": {
"type": "double"
},
"filepathname": {
"type": "keyword"
},
"jsonmessage": {
"type": "text"
},
"key": {
"type": "keyword"
},
"logcount": {
"type": "long"
},
"loglevel": {
"type": "keyword"
},
"message": {
"type": "keyword"
},
"paredspeed": {
"type": "float"
},
"seccount": {
"type": "long"
},
"sn": {
"type": "long"
},
"sourceName": {
"type": "keyword"
},
"sourceip": {
"type": "keyword"
},
"sourcename": {
"type": "keyword"
},
"sourceport": {
"type": "long"
},
"sucesscount": {
"type": "long"
},
"time_str": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"timestamp": {
"type": "long"
},
"totalcount": {
"type": "long"
},
"uniqueid": {
"type": "keyword"
}
}
}
}
and settings info:
"settings": {
"index": {
"refresh_interval": "1s",
"number_of_shards": "3",
"translog": {
"flush_threshold_size": "1024mb",
"sync_interval": "60s",
"durability": "async"
},
"provided_name": "index1",
"creation_date": "1531389785215",
"analysis": {
"analyzer": {
"optionIK": {
"filter": [
"word_delimiter"
],
"type": "custom",
"tokenizer": "ik_max_word"
}
}
},
"number_of_replicas": "0",
"uuid": "zd8oVbwUQbys1UJ8hJZRmQ",
"version": {
"created": "5050099"
}
}
}
Following is the index of index2's mapping info:
"mappings": {
"system": {
"dynamic": "true",
"_all": {
"enabled": false
},
"dynamic_date_formats": [
"yyyy-MM-dd HH:mm:ss.SSS"
],
"dynamic_templates": [
{
"geo2": {
"match": "*_geo",
"mapping": {
"type": "geo_point"
}
}
},
{
"strings2": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
],
"numeric_detection": false,
"properties": {
"#agent_timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"#timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"CommunicationReturnCode": {
"type": "keyword"
},
"Kafkaspeed": {
"type": "keyword"
},
"_index_name": {
"type": "keyword"
},
"action": {
"type": "keyword"
},
"count": {
"type": "long"
},
"datex": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"defaultWriteESspeed": {
"type": "double"
},
"filepathname": {
"type": "keyword"
},
"jsonmessage": {
"type": "text"
},
"key": {
"type": "keyword"
},
"logcount": {
"type": "long"
},
"loglevel": {
"type": "keyword"
},
"message": {
"type": "text"
},
"msgid": {
"type": "keyword"
},
"msgname": {
"type": "keyword"
},
"nodetype": {
"type": "keyword"
},
"orgid": {
"type": "keyword"
},
"orgname": {
"type": "keyword"
},
"paredspeed": {
"type": "float"
},
"processingState": {
"type": "keyword"
},
"processingStatecode": {
"type": "keyword"
},
"seccount": {
"type": "long"
},
"sn": {
"type": "long"
},
"sourceName": {
"type": "keyword"
},
"sourceip": {
"type": "keyword"
},
"sourcename": {
"type": "keyword"
},
"sourceport": {
"type": "long"
},
"sucesscount": {
"type": "long"
},
"thread": {
"type": "keyword"
},
"time_str": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSS"
},
"timestamp": {
"type": "long"
},
"totalcount": {
"type": "long"
},
"transDescription": {
"type": "keyword"
},
"transactionErrorCode": {
"type": "keyword"
},
"transactionTimeConsuming": {
"type": "keyword"
},
"transcode": {
"type": "keyword"
},
"uniqueid": {
"type": "keyword"
}
}
}
}
and setting info:
"settings": {
"index": {
"refresh_interval": "1s",
"number_of_shards": "2",
"translog": {
"flush_threshold_size": "1024mb",
"sync_interval": "60s",
"durability": "async"
},
"provided_name": "index2",
"creation_date": "1531467294314",
"analysis": {
"analyzer": {
"optionIK": {
"filter": [
"word_delimiter"
],
"type": "custom",
"tokenizer": "ik_max_word"
}
}
},
"number_of_replicas": "0",
"uuid": "yROU2MrMTzip4VXH_zWEXQ",
"version": {
"created": "5050099"
}
}
}
Following are one of the index's file structure of the two shards about the text type field:
and the keyword type field:
And you can believe that there are same number of documents in two folder, and the only difference of the field is the type of message field.
Could you explain it?
Thank you so much!
In Elasticsearch keyword fields have doc_values enabled by default, while text fields does not. This means that on your keyword fields it will store the whole field in a column-oriented fashion, in order to be able to perform aggregations or sorting, without relying on fielddata.
Also, Once you tokenize a string, with stemming, lowercasing, etc, you can achieve much better compression.
You can try to disable doc_values on that field if you don't perform aggregations or sorting on it.

Decay function across multiple indices where decay field name might not exist

I'm trying to use a decay function across multiple indices. The issue I'm having is that some of those indices do not have the field that the decay function uses so I'm getting the following error.
{"type":"parsing_exception","reason":"unknown field [start_date]","line":1,"col":0},{"type":"parsing_exception","reason":"unknown field [match_date]","line":1,"col":0}
I can provide any additional information that would be helpful.
The documentation shows that there is a filter option that I attempted to use however it is still tripping up when not finding the field.
I'm using the php flavor of elasticsearch.
'query' => [
"function_score" => [
'query' => [
"bool" => [
"must" => [
"multi_match" => [
"query" => $search,
"fields" => [
"match_web_name",
"shooter_result_name",
"name",
"post_match_name",
"club_name"
]
]
],
"should" => [
[
"term" => [
"private" => 0
],
]
]
]
],
"functions" => [
[
"filter" => [ "exists" => ["field" => "start_date"]],
"linear" => [
"start_date" => [
"origin" => 'now',
"scale" => $scale
]
]
],
[
"filter" => [ "exists" => ["field" => "match_date"]],
"linear" => [
"match_date" => [
"origin" => 'now',
"scale" => $scale
]
]
]
]
]
]
Here is the mapping for an index that does not have the start_date
"club": {
"properties": {
"zip": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"website": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"city": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
},
"raw": {
"type": "keyword"
}
}
},
"about": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
}
}
},
"tagline": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
}
}
},
"location": {
"ignore_malformed": true,
"type": "geo_point"
},
"id": {
"type": "integer"
},
"state": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
},
"raw": {
"type": "keyword"
}
}
},
"club_name": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
},
"raw": {
"type": "keyword"
}
}
},
"deleted_at": {
"format": "yyyy-MM-dd HH:mm:ss",
"type": "date"
},
"slug": {
"type": "keyword"
}
}
Here is a mapping where an index does contain the start_date
"match": {
"properties": {
"end_date": {
"null_value": "1990-01-01 12:12:12",
"format": "yyyy-MM-dd HH:mm:ss",
"type": "date"
},
"zip": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"private": {
"type": "boolean"
},
"reg_close_date": {
"null_value": "1990-01-01 12:12:12",
"format": "yyyy-MM-dd HH:mm:ss",
"type": "date"
},
"city": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
},
"raw": {
"type": "keyword"
}
}
},
"level": {
"type": "keyword"
},
"description": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
}
}
},
"max_shooters": {
"type": "integer"
},
"type": {
"type": "keyword"
},
"deleted_at": {
"format": "yyyy-MM-dd HH:mm:ss",
"ignore_malformed": true,
"type": "date"
},
"reg_open_date": {
"null_value": "1990-01-01 12:12:12",
"format": "yyyy-MM-dd HH:mm:ss",
"type": "date"
},
"score_type": {
"type": "keyword"
},
"club_id": {
"type": "integer"
},
"match_web_name": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
},
"raw": {
"type": "keyword"
}
}
},
"location": {
"ignore_malformed": true,
"type": "geo_point"
},
"id": {
"type": "integer"
},
"state": {
"type": "text",
"fields": {
"en": {
"analyzer": "english",
"type": "text"
},
"raw": {
"type": "keyword"
}
}
},
"prices": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"slug": {
"type": "keyword"
},
"start_date": {
"null_value": "1990-01-01 12:12:12",
"format": "yyyy-MM-dd HH:mm:ss",
"type": "date"
},
"status": {
"type": "keyword"
}
}

Resources