I am using JDBC with logstash to get data from a PostgreSQL query and export it to ES v7 here is the configuration file:
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://ldatabase_rds_path?useSSL=true"
jdbc_user => "username"
jdbc_password => "password"
jdbc_driver_library => "/home/z/Documents/postgresql-42.2.18.jar"
jdbc_driver_class => "org.postgresql.Driver"
tracking_column => "id"
tracking_column_type => "numeric"
clean_run => true
schedule => "0 */1 * * *"
statement => "SELECT id as id, type as type, z_id as z_id, sender_id as sender_id, receiver_id as receiver_id, status as status, amount as amount, fees as fees, created as created, metadata as metadata, funding_source_from_id as funding_source_from_id, funding_source_to_id as funding_source_to_id, is_parent as is_parent, destination_type as destination_type, source_type as source_type FROM payments_transfer"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
hosts => ["localhost:9200"]
manage_template => false
index => "payments_transfer_data"
document_id => "%{id}"
}
}
It take much time to only get 1 record from the database!
I tried some solutions like explicitly define a mapping so I added a mapping for the data like that:
PUT payments_transfer_data/_mapping/doc?include_type_name=true
{
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"amount": {
"type": "float"
},
"created": {
"type": "date"
},
"destination_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"z_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"fees": {
"type": "float"
},
"funding_source_from_id": {
"type": "long"
},
"funding_source_to_id": {
"type": "long"
},
"id": {
"type": "long"
},
"is_parent": {
"type": "boolean"
},
"metadata": {
"type": "keyword"
},
"receiver_id": {
"type": "long"
},
"sender_id": {
"type": "long"
},
"source_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"status": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
Here is the only record that get:
source_type:customer sender_id:3 destination_type:funding-source type:funded_transfer z_id:863a240c-2011-e911-8114-bacd823e9f1d receiver_id:65 status:processed amount:550 funding_source_from_id:332 #timestamp:Nov 8, 2020 # 16:00:08.809 fees:5.61 is_parent:false created:Jan 5, 2019 # 21:28:21.847 #version:1 id:2,160 funding_source_to_id: - metadata: - _id:2160 _type:doc _index:payments_transfer_data _score: -
According to the official documentation you should use tracking_column in conjunction to use_column_value. With your current setting tracking_column will not have any impact.
Have you tried using Select * from to see if everything is being pulled.
Related
I am using Elasticsearch 7.12.0 , Logstash 7.12.0, Kibana 7.12.0 on Windows 10 x64. Logstash config file logistics.conf
input {
jdbc {
jdbc_driver_library => "D:\\tools\\postgresql-42.2.16.jar"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_connection_string => "jdbc:postgresql://localhost:5433/ld"
jdbc_user => "xxxx"
jdbc_password => "sEcrET"
schedule => "*/5 * * * *"
statement => "select * from inventory_item_report();"
}
}
filter {
uuid {
target => "uuid"
}
}
output {
elasticsearch {
hosts => "http://localhost:9200"
index => "localdist"
document_id => "%{uuid}"
doc_as_upsert => "true"
}
}
Run logstash
logstash -f logistics.conf
If I don't set mapping explicit, the query
GET /localdist/_search
{
"query": {
"match_all": {}
}
}
return many result.
My mappings
POST localdist/_mapping
{
}
DELETE /localdist
PUT /localdist
{
}
POST /localdist
{
}
PUT localdist/_mapping
{
"properties": {
"unt_cost": {
"type": "double"
},
"ii_typ": {
"type": "keyword"
},
"qty_uom_id": {
"type": "keyword"
},
"prod_id": {
"type": "keyword"
},
"root_cat_id": {
"type": "keyword"
},
"uom": {
"type": "keyword"
},
"product_name": {
"type": "text"
},
"ii_id": {
"type": "keyword"
},
"wght_uom_id": {
"type": "keyword"
},
"iid_seq_id": {
"type": "long"
},
"avai_diff": {
"type": "double"
},
"invt_change_typ": {
"type": "keyword"
},
"ccy": {
"type": "keyword"
},
"exp_date": {
"type": "date"
},
"req_amt": {
"type": "text"
},
"pur_cost": {
"type": "double"
},
"tot_pri": {
"type": "long"
},
"own_pid": {
"type": "keyword"
},
"doc_type": {
"type": "keyword"
},
"ii_date": {
"type": "date"
},
"fac_id": {
"type": "keyword"
},
"shipment_type_id": {
"type": "keyword"
},
"lot_id": {
"type": "keyword"
},
"phy_invt_id": {
"type": "keyword"
},
"facility_name": {
"type": "text"
},
"amt_ohand_diff": {
"type": "double"
},
"reason_id": {
"type": "keyword"
},
"cat_id": {
"type": "keyword"
},
"qty_ohand_diff": {
"type": "double"
},
"#timestamp": {
"type": "date"
}
}
}
run query
GET /localdist/_search
{
"query": {
"match_all": {}
}
}
return nothing.
How to fix it, how to make explicit mappings works correctly?
If I got you right, you are indexing via logstash. Elastic then create the index if missing, indexes the documents, and tries to guess the mapping for your documents based on the very first documents.
TL;DR: You are DELETING your index containing the data by yourself.
With
DELETE /localdist
you are deleting the whole index including all data. After that, by issuing
PUT /localdist
{
}
you are re-creating the previously deleted index which is empty again. And at the end, you are setting the index mapping with
PUT localdist/_mapping
{
"properties": {
"unt_cost": {
"type": "double"
},
"ii_typ": {
"type": "keyword"
},
...
Now, as you have an empty elastic index with a mapping set, start the logstash pipeline again. If your documents are matching the index mapping, the docs should start to appear very quickly.
this is my Mysql.config
input {
stdin {
}
jdbc {
jdbc_connection_string => "jdbc:mysql://localhost:3306/xc_course?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=UTC"
# the user we wish to excute our statement as
jdbc_user => "root"
jdbc_password => "1234"
# the path to our downloaded jdbc driver
jdbc_driver_library =>"/usr/local/elasticsearch/logstash/mysql-connector-java-5.1.4.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
#statement_filepath => "/conf/course.sql"
statement => "select * from course_pub where timestamp > date_add(:sql_last_value,INTERVAL 8 HOUR)"
schedule => "* * * * *"
record_last_run => true
last_run_metadata_path => "/usr/local/elasticsearch/elasticsearch-6.2.1/config/logstash_metadata"
}
}
output {
elasticsearch {
hosts => "localhost:9200"
#hosts => ["localhost:9200","localhost:9202","localhost:9203"]
index => "xc_course"
document_id => "%{id}"
document_type => "doc"
template =>"/usr/local/elasticsearch/logstash-6.2.1/config/xc_course_template.json"
template_name =>"xc_course"
template_overwrite =>"true"
}
stdout {
codec => json_lines
}
}
this is my template.json
{
"mappings": {
"doc": {
"properties": {
"charge": {
"type": "keyword"
},
"description": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"end_time": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"expires": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"grade": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"mt": {
"type": "keyword"
},
"name": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"pic": {
"index": false,
"type": "keyword"
},
"price": {
"type": "float"
},
"price_old": {
"type": "float"
},
"pub_time": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"qq": {
"index": false,
"type": "keyword"
},
"st": {
"type": "keyword"
},
"start_time": {
"format": "yyyy‐MM‐dd HH:mm:ss",
"type": "date"
},
"status": {
"type": "keyword"
},
"studymodel": {
"type": "keyword"
},
"teachmode": {
"type": "keyword"
},
"teachplan": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"users": {
"index": false,
"type": "text"
},
"valid": {
"type": "keyword"
}
}
}
},
"template": "xc_course"
}
this is my elasticSearch index_mapping
{
"properties": {
"description": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"grade": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"mt": {
"type": "keyword"
},
"name": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"users": {
"index": false,
"type": "text"
},
"charge": {
"type": "keyword"
},
"valid": {
"type": "keyword"
},
"pic": {
"index": false,
"type": "keyword"
},
"qq": {
"index": false,
"type": "keyword"
},
"price": {
"type": "float"
},
"price_old": {
"type": "float"
},
"st": {
"type": "keyword"
},
"status": {
"type": "keyword"
},
"studymodel": {
"type": "keyword"
},
"teachmode": {
"type": "keyword"
},
"teachplan": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"type": "text"
},
"expires": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
},
"pub_time": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
},
"start_time": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
},
"end_time": {
"type": "date",
"format": "yyyy‐MM‐dd HH:mm:ss"
}
}
}
but when it start
Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>"4028e58161bd3b380161bd3bcd2f0000", :_index=>"xc_course", :_type=>"doc", :_routing=>nil}, #LogStash::Event:0x62e0a8f9], :response=>{"index"=>{"_index"=>"xc_course", "_type"=>"doc", "_id"=>"4028e58161bd3b380161bd3bcd2f0000", "status"=>400, "error"=>{"type"=>"mapper_parsing_exception", "reason"=>"failed to parse [start_time]", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"Invalid format: \"2019-12-20T15:18:13.000Z\" is malformed at \"-12-20T15:18:13.000Z\""}}}}}
failed to parse [start_time]", "caused_by"=>{"type"=>"illegal_argument_exception"
** "Invalid format: \"2019-12-20T15:18:13.000Z\" is malformed at \"-12-20T15:18:13.000Z\""**
but inmy data base my data type is this 2019-12-28 19:24:41
when i make date to null ,its worke ,but like 2019-12-28 19:24:41 would Parsing error
so how to deal it
The jdbc input will automatically change date columns to LogStash::Timestamp type, but your index template expects them to be text. Remove the "format" from the date fields in your index template.
Ok so now I've my mapping into kibana.
Here's my mapping:
PUT logstash-2019.05.09
{
"mappings": {
"doc": {
"properties": {
"index": {
"_index": {
"type": "keyword"
},
"_type": {
"type": "text"
}
},
"#timestamp": {
"type": "date"
},
"ip": {
"type": "ip"
},
"extension": {
"type": "text"
},
"response": {
"type": "text"
},
"geo": {
"coordinates": {
"type": "geo_point"
},
"src": {
"type": "text"
},
"dest": {
"type": "text"
},
"srcdest": {
"type": "text"
}
},
"tags": {
"type": "text"
},
"utc_time": {
"type": "date"
},
"referer": {
"type": "text"
},
"agent": {
"type": "text"
},
"clientip": {
"type": "ip"
},
"bytes": {
"type": "integer"
},
"host": {
"type": "text"
},
"request": {
"type": "text"
},
"url": {
"type": "text"
},
"#message": {
"type": "text"
},
"spaces": {
"type": "text"
},
"xss": {
"type": "text"
},
"links": {
"type": "text"
},
"relatedContent": {
"url": {
"type": "text"
},
"og:type": {
"type": "text"
},
"og:title": {
"type": "text"
},
"og:description": {
"type": ""
},
"og:url": {
"type": ""
},
"article:published_time": {
"type": "date"
},
"article:modified_time": {
"type": "date"
},
"article:section": {
"type": "keyword"
},
"article:tag": {
"type": "text"
},
"og:image": {
"type": "text"
},
"og:image:height": {
"type": "integer"
},
"og:image:width": {
"type": "integer"
},
"og:site_name": {
"type": "text"
},
"twitter:title": {
"type": "text"
},
"twitter:description": {
"type": "text"
},
"twitter:card": {
"type": "keyword"
},
"twitter:image": {
"type": "text"
},
"twitter:site": {
"type": "keyword"
}
},
"machine": {
"os": {
"type": "text"
},
"ram": {
"type": "integer"
}
},
"#version": {
"type": "integer"
}
}
}
}
}
But I don't know why, Kibana don't store the right information into the right field. He just put all the information into a message field. I think it's because I've a dynamic mapping by default, I'm not really sure. Here's the result :
Result (table)
result (json):
{
"_index": "logstash-2019.05.09",
"_type": "doc",
"_id": "9zfam2oBWngGU4Wy3Id5",
"_version": 1,
"_score": null,
"_source": {
"#version": "1",
"#timestamp": "2019-05-09T09:09:32.167Z",
"path": "/home/secunix/logs/TestLogPourMapping_09_05.json",
"message": "{\"#timestamp\":\"2019-05-07T09:56:33.996Z\",\"ip\":\"181.144.250.19\",\"extension\":\"jpg\",\"response\":\"200\",\"geo\":{\"coordinates\":{\"lat\":44.12326,\"lon\":-123.2186856},\"src\":\"IN\",\"dest\":\"CN\",\"srcdest\":\"IN:CN\"},\"#tags\":[\"success\",\"info\"],\"utc_time\":\"2019-05-07T09:56:33.996Z\",\"referer\":\"http://www.slate.com/success/thomas-marshburn\",\"agent\":\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.50 Safari/534.24\",\"clientip\":\"181.144.250.19\",\"bytes\":2553,\"host\":\"media-for-the-masses.theacademyofperformingartsandscience.org\",\"request\":\"/uploads/fyodor-yurchikhin.jpg\",\"url\":\"https://media-for-the-masses.theacademyofperformingartsandscience.org/uploads/fyodor-yurchikhin.jpg\",\"#message\":\"181.144.250.19 - - [2019-05-07T09:56:33.996Z] \\\"GET /uploads/fyodor-yurchikhin.jpg HTTP/1.1\\\" 200 2553 \\\"-\\\" \\\"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.50 Safari/534.24\\\"\",\"spaces\":\"this is a thing with lots of spaces wwwwoooooo\",\"xss\":\"<script>console.log(\\\"xss\\\")</script>\",\"headings\":[\"<h3>ulrich-walter</h5>\",\"http://www.slate.com/success/susan-still-kilrain\"],\"links\":[\"viktor-m-afanasyev#twitter.com\",\"http://twitter.com/security/stephen-oswald\",\"www.twitter.com\"],\"relatedContent\":[],\"machine\":{\"os\":\"win xp\",\"ram\":6442450944},\"#version\":\"1\"}\r",
"host": "qvisbcld0051"
},
"fields": {
"#timestamp": [
"2019-05-09T09:09:32.167Z"
]
},
"sort": [
1557392972167
]
}
And that's what I have when I check my mapping:
{
"mapping": {
"doc": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"norms": false,
"type": "text"
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"norms": false,
"type": "text"
}
}
}
],
"properties": {
"#message": {
"type": "text"
},
"#timestamp": {
"type": "date"
},
"#version": {
"type": "integer"
},
"agent": {
"type": "text"
},
"bytes": {
"type": "integer"
},
"clientip": {
"type": "ip"
},
"extension": {
"type": "text"
},
"geo": {
"properties": {
"coordinates": {
"type": "geo_point"
},
"dest": {
"type": "text"
},
"src": {
"type": "text"
},
"srcdest": {
"type": "text"
}
}
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
}
}
},
"host": {
"type": "text"
},
"ip": {
"type": "ip"
},
"links": {
"type": "text"
},
"machine": {
"properties": {
"os": {
"type": "text"
},
"ram": {
"type": "integer"
}
}
},
"message": {
"type": "text",
"norms": false
},
"path": {
"type": "text",
"norms": false,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"referer": {
"type": "text"
},
"relatedContent": {
"properties": {
"article:modified_time": {
"type": "date"
},
"article:published_time": {
"type": "date"
},
"article:section": {
"type": "keyword"
},
"article:tag": {
"type": "text"
},
"og:description": {
"type": "text"
},
"og:image": {
"type": "text"
},
"og:image:height": {
"type": "integer"
},
"og:image:width": {
"type": "integer"
},
"og:site_name": {
"type": "text"
},
"og:title": {
"type": "text"
},
"og:type": {
"type": "text"
},
"og:url": {
"type": "text"
},
"twitter:card": {
"type": "keyword"
},
"twitter:description": {
"type": "text"
},
"twitter:image": {
"type": "text"
},
"twitter:site": {
"type": "keyword"
},
"twitter:title": {
"type": "text"
},
"url": {
"type": "text"
}
}
},
"request": {
"type": "text"
},
"response": {
"type": "text"
},
"spaces": {
"type": "text"
},
"tags": {
"type": "text"
},
"url": {
"type": "text"
},
"utc_time": {
"type": "date"
},
"xss": {
"type": "text"
}
}
},
"_default_": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"norms": false,
"type": "text"
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"norms": false,
"type": "text"
}
}
}
],
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "keyword"
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
}
}
}
}
}
}
}
I send my data thanks to logstash, so here's the conf of the input:
input {
beats {
port => 5044
tags => "fromBeats"
}
file {
path => [
"/home/secunix/logs/*",
"/tech/*"
]
start_position => "beginning"
sincedb_path => "/dev/null"
}
tcp {
port => 5514
type => "syslog"
tags => "from Syslog-ng"
}
}
filter {
if [type] == "syslog"{
grok {
match => ["message", "<(?<sys_priority>\d+?)>(?<syslog_timestamp>%{CISCOTIMESTAMP})\s(?<logsource>%{URIHOST})(\s(?:(?<application>.*?)(%(?<project>.*?))?))?:(?:\s)?(?<logmessage>.*$)"]
}
if [logmessage] {
mutate {
replace => [ "message", "%{logmessage}" ]
remove_field => [ "logmessage" ]
}
}
if [project] {
mutate {
replace => [ "type", "%{project}" ]
remove_field => [ "project" ]
}
}else if [application] {
mutate {
lowercase => [ "application" ]
}
mutate {
gsub => [ "application", " ", "_" ]
}
mutate {
replace => [ "type", "%{application}" ]
}
}else {
mutate {
replace => [ "type", "uknapp" ]
add_field => { "application" => "uknapp" }
}
}
}
}
and the output:
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "logstash-%{+YYYY.MM.dd}"
#+++ sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
user => "logstash"
#--- sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
#+++ sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
password => "logstash"
#--- sa Added by scr-sop-af-config-elksandbox. Do not remove this line.
}
}
Can someone tell me how I can fix this please ?
Using ELk 6.X It seems i cannot plot points due to geoip.location not populated?
I also added a template which i hope is correct. Not an expert but i am pretty sure my points aren't rendered bc its missing data there.
Kibana 6.4.2
Logstash 6.4.2-1
Elasticsearch 6.4.2
Following configs
input {
udp {
port => 9996
codec => netflow {
versions => [5, 7, 9, 10]
}
type => netflow
}
}
filter {
geoip {
source => "[netflow][ipv4_src_addr]"
target => "src_geoip"
database => "/usr/share/GeoIP/GeoLite2-City.mmdb"
}
geoip {
source => "[netflow][ipv4_dst_addr]"
target => "dst_geoip"
database => "/usr/share/GeoIP/GeoLite2-City.mmdb"
}
}
output
output {
if [type] == "netflow" {
elasticsearch {
hosts => ["localhost:9200"]
index => "logstash-%{+YYYY.MM.dd}"
}
} else {
elasticsearch {
hosts => ["localhost:9200"]
sniffing => true
manage_template => false
index => "%{[#metadata][beat]}-%{+YYYY.MM.dd}"
document_type => "%{[#metadata][type]}"
}
}
}
The Mapping is like such
"geoip": {
"dynamic": "true",
"properties": {
"ip": {
"type": "ip"
},
"latitude": {
"type": "half_float"
},
"location": {
"type": "geo_point"
},
"longitude": {
"type": "half_float"
}
}
},
Template
{
"logstash": {
"order": 0,
"version": 60001,
"index_patterns": [
"logstash-*"
],
"settings": {
"index": {
"refresh_interval": "5s"
}
},
"mappings": {
"_default_": {
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"norms": false
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"norms": false,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
],
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "keyword"
},
"geoip": {
"dynamic": true,
"properties": {
"ip": {
"type": "ip"
},
"location": {
"type": "geo_point"
},
"latitude": {
"type": "half_float"
},
"longitude": {
"type": "half_float"
}
}
}
}
}
},
"aliases": {}
}
}
My indexes come back with
src or dst but only the below
# dst_geoip.latitude 26.097
# dst_geoip.location.lat 26.097
# dst_geoip.location.lon -80.181
In Elasticsearch 5.6.3 I installed ingest-attachment plugin. I am trying to set the term vector property to WithPositionOffsets in attachment's content. Where should I set this property to see highligted result in my search?
The Document POCO is below:
public class Document
{
public int Id { get; set; }
public string Path { get; set; }
public string Content { get; set; }
public Attachment Attachment { get; set; }
}
Here is the CreateIndex function with mappings.
var indexResponse = client.CreateIndex(documentsIndex, c => c
.Settings(s => s
.Analysis(a => a
.Analyzers(ad => ad
.Custom("windows_path_hierarchy_analyzer", ca => ca
.Tokenizer("windows_path_hierarchy_tokenizer")
)
)
.Tokenizers(t => t
.PathHierarchy("windows_path_hierarchy_tokenizer", ph => ph
.Delimiter('\\')
)
)
)
)
.Mappings(m => m
.Map<Document>(mp => mp
.AutoMap()
.AllField(all => all
.Enabled(false)
)
.Properties(ps => ps
.Text(s => s
.Name(n => n.Path)
.Analyzer("windows_path_hierarchy_analyzer")
)
.Attachment(at => at.
Name(n => n.Attachment.Content)
.FileField(ff => ff
.Name("Content")
.TermVector(TermVectorOption.WithPositionsOffsets)
.Store()))
//.Text(s => s
// .Name(n => n.Attachment.Content)
// .TermVector(TermVectorOption.WithPositionsOffsets)
// .Store(true)
//)
.Object<Attachment>(a => a
.Name(n => n.Attachment)
.AutoMap()
)
)
)
)
);
into the .Mappings part I used a FileField to set termvector and store property. But the result is below:
{
"documents": {
"mappings": {
"document": {
"_all": {
"enabled": false
},
"properties": {
"attachment": {
"properties": {
"author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"content": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"content_length": {
"type": "long"
},
"content_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"date": {
"type": "date"
},
"detect_language": {
"type": "boolean"
},
"indexed_chars": {
"type": "long"
},
"keywords": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"language": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"content": {
"type": "attachment",
"fields": {
"content": {
"type": "text",
"store": true,
"term_vector": "with_positions_offsets"
},
"author": {
"type": "text"
},
"title": {
"type": "text"
},
"name": {
"type": "text"
},
"date": {
"type": "date"
},
"keywords": {
"type": "text"
},
"content_type": {
"type": "text"
},
"content_length": {
"type": "integer"
},
"language": {
"type": "text"
}
}
},
"id": {
"type": "integer"
},
"path": {
"type": "text",
"analyzer": "windows_path_hierarchy_analyzer"
}
}
}
}
}
}
So I can't see the highlight property in my query result. How should I do this?
To work with the ingest-attachment plugin, don't map the Attachment property as an attachment data type; this mapping is for the mapper-attachment plugin which is deprecated in 5.x and removed in 6.x.
I wrote a blog post about using ingest-attachment with .NET which includes an example mapping for Attachment; essentially, map it as an object data type and map the Content property on it as a text data type with .TermVector(TermVectorOption.WithPositionsOffsets) applied. You'll need to create this mapping in an index before indexing any documents with attachments into it.