Couchbase - Elasticsearch: custom dynamic type - elasticsearch

I'm using XDCR replication to sync the data between CB and Elasticsearch, using the couchbase transport plugin for Elasticsearch.
As far as i understand, all documents in Couchbase will come with the type "couchbaseDocument". But I have different documents types with a specific mapping for each document.
Is there a way to have specific dynamic type instead of the default "couchbaseDocument"?
(where if the json document have a field "type":"beer" it will be indexed in ES as _type:"beer" and if "type":"wine" it will be indexed as _type:"wine")
What I have in couchbase:
bucket: "drinks",
beer_1234:
{
"type": "beer",
"name": "leffe"
}
How it's indexed in Elasticsearch:
{
"_index": "drinks",
"_type": "couchbaseDocument", // <======================== ????
"_id": "beer_1234",
"_version": 1,
"_source": {
"doc": {
"type": "beer",
"name": "leffe"
},
"meta": {
"id": "beer_1234",
"rev": "9-000049e945bd62fa0000000000000000",
"expiration": 0,
"flags": 0
}
}
What I need:
{
"_index": "drinks",
"_type": "beer", // <======================== NICE TYPE
"_id": "beer_1234",
"_version": 1,
"_source": {
"doc": {
"type": "beer",
"name": "leffe"
},
"meta": {
"id": "beer_1234",
"rev": "9-000049e945bd62fa0000000000000000",
"expiration": 0,
"flags": 0
}
}
Thanks

The philosophy is to modify the default transport mapping to index your type field. For example:
curl -XPUT 'http: //localhost:9200/drinks/'-d'{
"mappings": {
"couchbaseCheckpoint": {
"dynamic": "true",
"_source": {
"includes": [
"doc.*"
]
},
"dynamic_templates": [
{
"store_no_index": {
"match": "*",
"mapping": {
"store": "no",
"index": "no",
"include_in_all": false
}
}
}
]
},
"couchbaseDocument": {
"_all": {
"enabled": false
},
"dynamic": "true",
"_source": {
"includes": [
"meta.*"
]
},
"dynamic_templates": [
{
"all_strings_to_avoid_collisions": {
"match": "*",
"mapping": {
"store": "no",
"index": "not_analyzed",
"include_in_all": false,
"type": "string",
"analyzer": "whitespace"
}
}
}
],
"properties": {
"doc": {
"properties": {
"type": {
"type": "string"
}
}
},
"meta": {
"properties": {
"id": {
"type": "string",
"analyzer": "whitespace"
}
}
}
}
}
}
}'

Related

Elastic Search_as_you_type case insensitive match

I want to perform partial search on 3 fields: UUID, tracking_id, and zip_code. They only contain 1 word and no special characters/space except hypen for UUID.
I'm not sure whether I should use search_as_you_type or edge ngram tokenizer or edge ngram token filter, so I tried search_as_you_type first.
I have created this index:
{
"settings": {
"index": {
"sort.field": [ "created_at", "id" ],
"sort.order": [ "desc", "desc" ]
}
},
"mappings": {
"properties": {
"id": { "type": "keyword", "fields": { "raw": { "type": "search_as_you_type" }}},
"current_status": { "type": "keyword" },
"tracking_id": { "type": "wildcard" },
"invoice_number": { "type": "keyword" },
"created_at": { "type": "date" }
}
}
}
and inserted this doc:
{
"id": "SIGRID",
"current_status": "unassigned",
"tracking_id": "AXXH",
"invoice_number": "xxx",
"created_at": "2021-03-24T09:36:10.717672467Z"
}
I sent this query:
{"query": {
"multi_match": {
"query": "sigrid",
"type": "bool_prefix",
"fields": [
"id"
]
}
}
}
this returns no result, but SIGRID, S, SIG returns the result. How can I make search_as_you_type query be case insensitive? should i use edge ngram tokenizer instead? Thanks
You can define a custom normalizer with a lowercase filter, lowercase filter will ensure that all the letters are changed to lowercase before indexing the document and searching. Modify your index mapping as
{
"settings": {
"index": {
"sort.field": [
"created_at",
"id"
],
"sort.order": [
"desc",
"desc"
]
},
"analysis": {
"normalizer": {
"my_normalizer": {
"type": "custom", // note this
"filter": [
"lowercase"
]
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword",
"normalizer": "my_normalizer", // note this
"fields": {
"raw": {
"type": "search_as_you_type"
}
}
},
"current_status": {
"type": "keyword"
},
"tracking_id": {
"type": "wildcard"
},
"invoice_number": {
"type": "keyword"
},
"created_at": {
"type": "date"
}
}
}
}
Search Query:
{
"query": {
"multi_match": {
"query": "sigrid",
"type": "bool_prefix"
}
}
}
Search Result:
"hits": [
{
"_index": "66792606",
"_type": "_doc",
"_id": "1",
"_score": 2.0,
"_source": {
"id": "SIGRID",
"current_status": "unassigned",
"tracking_id": "AXXH",
"invoice_number": "xxx",
"created_at": "2021-03-24T09:36:10.717672467Z"
}
}
]

Easticsearch reindexing multi-type parent/child index(v5.0) to join type index(v6.2)

I am reindexing my index data from ES 5.0(parent-child) to ES 6.2(Join type)
Data in index ES 5.0 is stored as parent-child documents in separate types and for reindex i have created new index/mapping based on 6.2 in my new cluster.
The parent documents flawlessly reindex to new index but the child documents throwing error as below
{
"index": "index_two",
"type": "_doc",
"id": "AVpisCkMuwDYFnQZiFXl",
"cause": {
"type": "mapper_parsing_exception",
"reason": "failed to parse",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "[routing] is missing for join field [field_relationship]"
}
},
"status": 400
}
scripts i am using to reindex the data
{
"source": {
"remote": {
"host": "http://myescluster.com:9200",
"socket_timeout": "1m",
"connect_timeout": "20s"
},
"index": "index_two",
"type": ["actions"],
"size": 5000,
"query":{
"bool":{
"must":[
{"term": {"client_id.raw": "cl14ous0ydao"}}
]
}
}
},
"dest": {
"index": "index_two",
"type": "_doc"
},
"script": {
"params": {
"jdata": {
"name": "actions"
}
},
"source": "ctx._routing=ctx._routing;ctx.remove('_parent');params.jdata.parent=ctx._source.user_id;ctx._source.field_relationship=params.jdata"
}
}
I have passed the routing field in painless script as the documents are dynamic from source index.
Mapping of the destination index
{
"index_two": {
"mappings": {
"_doc": {
"dynamic_templates": [
{
"template_actions": {
"match_mapping_type": "string",
"mapping": {
"fields": {
"raw": {
"index": true,
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
}
],
"date_detection": false,
"properties": {
"attributes": {
"type": "nested"
}
},
"cl_other_params": {
"type": "nested"
},
"cl_triggered_ts": {
"type": "date"
},
"cl_utm_params": {
"type": "nested"
},
"end_ts": {
"type": "date"
},
"field_relationship": {
"type": "join",
"eager_global_ordinals": true,
"relations": {
"users": [
"actions",
"segments"
]
}
},
"ip_address": {
"type": "ip"
},
"location": {
"type": "geo_point"
},
"processed_ts": {
"type": "date"
},
"processing_time": {
"type": "date"
},
"products": {
"type": "nested",
"properties": {
"traits": {
"type": "nested"
}
}
},
"segment_id": {
"type": "integer"
},
"start_ts": {
"type": "date"
}
}
}
}
}
My sample source document
{
"_index": "index_two",
"_type": "actions",
"_id": "AVvKUYcceQCc2OyLKWZ9",
"_score": 7.4023576,
"_routing": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
"_parent": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
"_source": {
"user_id": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
"client_id": "cl14ous0ydao",
"session_id": "CL-e0ec3941-6dad-4d2d-bc9b",
"source": "betalist",
"action": "pageview",
"action_type": "pageview",
"device": "Desktop",
"ip_address": "49.35.14.224",
"location": "20.7333 , 77",
"attributes": [
{
"key": "url",
"value": "https://www.google.com/",
"type": "string"
}
],
"products": []
}
}
I had the same issue and searching in elasticsearch discussions I found this that works:
POST _reindex
{
"source": {
"index": "old_index",
"type": "actions"
},
"dest": {
"index": "index_two"
},
"script": {
"source": """
ctx._type = "_doc";
String routingCode = ctx._source.user_id;
Map join = new HashMap();
join.put('name', 'actions');
join.put('parent', routingCode);
ctx._source.put('field_relationship', join);
ctx._parent = null;
ctx._routing = new StringBuffer(routingCode)"""
}
}
Hope this helps :) .
I'd like to point out that routing is generally not required for a join field, however if you're creating the child before the parent is created, then you're going to face this problem.
It's advisable to re-index all the parents first then the children.

Search across _all field in Elastic and return results with highlighting

I am using Elastic 5.4 and wanted to query across index containing documents of multiple types.(type a and type b). Below are example documents in the index:
Documents:
{
"_index": "test",
"_type": "a",
"_id": "1",
"_source": {
"id": "1",
"name": "john-usa-soccer",
"class": "5",
"lastseen": "2017-07-05",
"a_atts": {
"lastname": "tover",
"hobby": "soccer",
"country": "usa"
}
}
}
{
"_index": "test",
"_type": "b",
"_id": "2",
"_source": {
"id": "2",
"name": "john-usa",
"class": "5",
"lastseen": "2017-07-05",
"b_atts": {
"lastname": "kaml",
"hobby": "baseball",
"country": "usa"
}
}
}
Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_ngram_analyzer": {
"tokenizer": "my_ngram_tokenizer"
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "ngram",
"min_gram": "3",
"max_gram": "3",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"a": {
"dynamic_templates": [
{
"strings": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"analyzer": "my_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
},
"analyzer1": {
"type": "text",
"analyzer": "simple"
},
"analyzer2": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
]
},
"b": {
"dynamic_templates": [
{
"strings": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"analyzer": "my_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
},
"analyzer1": {
"type": "text",
"analyzer": "simple"
},
"analyzer2": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
]
}
}
}
My query is to search all documents which contain 'john' across any of the fields in any type and highlight the fields where the match was found. This query is constructed as per Elastic documentation. My Schema mappings has ngram_analyzer configured as analyzer instead of default analyzer for all fields of type string in the schema.
Query: http://localhost:9200/student/_search
{
"query": {
"bool": {
"should": [
{ "match": { "_all": "john"} }
]
}
},
"highlight": {
"fields": {
"name": {
"require_field_match": false
},
"a_atts.lastname":{
"require_field_match": false
},
"a_atts.hobby":{
"require_field_match": false
},
"a_atts.country":{
"require_field_match": false
}
}
}
}
Response:
{
"took": 79,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.17669111,
"hits": [
{
"_index": "student",
"_type": "a",
"_id": "AV1WjBeYEZrDBYsdGMtY",
"_score": 0.17669111,
"_source": {
"name": "john-usa-soccer",
"class": "5",
"lastseen": "2017-07-05",
"a_atts": {
"lastname": "tover",
"hobby": "soccer",
"country": "usa"
}
}
},
{
"_index": "student",
"_type": "b",
"_id": "AV1WjHFxEZrDBYsdGMtZ",
"_score": 0.17669111,
"_source": {
"name": "john-usa",
"class": "5",
"lastseen": "2017-07-05",
"b_atts": {
"lastname": "kaml",
"hobby": "baseball",
"country": "usa"
}
}
}
]
}
}
However, executing the above query against an index, returns documents matched with their _source content but not highlight field. It is missing the following:
"highlight": {
"name": [
"<em>john</em>-usa-soccer"
]
}
How can I return highlight in the results?
I got highlighter to work by following the answer provided in this link.
"highlight": {
"fields": {
"*": {}
},
"require_field_match": false
}

Kibana show no results for geohash aggregation

I'd like to show my dataset on tile map. I'm using kibana 4.1.1.
My data is set like this:
{
"_index": "business-data",
"_type": "users",
"_id": "AVkRMFztZOUsFUpKvZ-0",
"_score": 1,
"_source": {
"first_name": "Nessa",
"gender": "female",
"location": {
"lat": 48.8668481401949,
"lon": 2.19256871957155
}
}
}
The mapping:
{
"mappings": {
"user": {
"properties": {
"first_name": {
"type": "string"
},
"gender": {
"type": "string"
},
"location": {
"type": "geo_point"
}
}
}
}
}
Location is a valid geo_point.
The tile map is shown when the visualisation is being created, but "No result" when the basic geohash aggregation bu location field is requested.
giving:
I managed to do it with you advice, thanks. Anyways, the kibana index is not event based in my case.
New mapping:
{
"mappings": {
"user": {
"properties": {
"#timestamp" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"user_id": {
"type": "integer"
},
"first_name": {
"type": "string"
},
"gender": {
"type": "string"
},
"age": {
"type": "integer"
},
"location": {
"type": "geo_point"
}
}
}
}
}

ElasticSearch _suggest not returning results

I have an index that I'd like to get suggestions on:
{
"book": {
"_index": {
"enabled": true
},
"_id": {
"index": "not_analyzed",
"store": "yes"
},
"properties": {
"author": {
"type": "completion"
},
"characters": {
"type": "string"
},
"copies": {
"type": "long",
"ignore_malformed": false
},
"otitle": {
"type": "string"
},
"tags": {
"type": "string"
},
"title": {
"type": "string"
},
"year": {
"type": "long",
"ignore_malformed": false,
"index": "analyzed"
},
"available": {
"type": "boolean"
}
}
}
}
I've loaded some data into it using _bulk endpoint.
When I try to query against the suggest endpoint:
POST library/book/_suggest
{
"my_suggestion_1":{
"text": "He",
"term":{
"analyzer":"standard",
"field":"author"
}
}
}
I get back this:
{
"_index": "library",
"_type": "book",
"_id": "_suggest",
"_version": 8,
"created": false
}
What am I missing here?
I think you just need to use completion instead of term in your request body when you ask for suggestions:
POST library/book/_suggest
{
"my_suggestion_1":{
"text": "He",
"completion":{
"analyzer":"standard",
"field":"author"
}
}
}

Resources