ElasticSearch Mapping Issue - Nested to Non-Nested - elasticsearch

I am creating a mapping for data generated by a computer vision application. However, I am getting an error when I test pushing an example data message to ElasticSearch. I have read tons of forums where others have had this issue. Some have resolved their issue but I have tried everything I know to try. I actually think there may be a simple resolution but I am relatively new to Elastic
Search.
The index and mapping are created successfully using:
PUT vision_events
{
"settings" : {
"number_of_shards" : 5
},
"mappings" : {
"properties": {
"camera_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"hit_counts": {
"type": "long"
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"intersection": {
"type": "boolean"
},
"label": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locations": {
"type" : "nested",
"properties": {
"coords" : {
"type" : "float"
},
"location": {
"type": "text"
},
"street_segment": {
"type": "text"
},
"timestamp": {
"type": "date"
}
}
},
"pole_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"timestamp": {
"type": "date"
}
}
}
}
Once completed, I move on to validating the mapping is correct. I push the following example data:
POST /vision_events/1?pretty=true
{
"pole_id": "mlk-central-2",
"camera_id": "mlk-central-cam-2",
"intersection": true,
"id": "644d1c06-4c60-4ed8-93b4-1aa79b87a622",
"label": "car",
"timestamp": 1586838108683,
"locations": [
{
"timestamp": 1586838109448,
"coords": 1626.3220383482665,
"street_segment": "None"
},
{
"timestamp": 1586838109832,
"coords": 1623.3129222859882,
"street_segment": "None"
}
],
"hit_counts": 2
}
This produces the following error:
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "object mapping [locations] can't be changed from nested to non-nested"
}
],
"type" : "illegal_argument_exception",
"reason" : "object mapping [locations] can't be changed from nested to non-nested"
},
"status" : 400
}
The locations field is a list of "objects" which contain the fields: coords, location, street_segment and timestamp. Messages have varying length of locations. Any help would be greatly appreciated.

Put the unchanged mapping:
PUT vision_events
{"settings":{"number_of_shards":5},"mappings":{"properties":{"camera_id":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"hit_counts":{"type":"long"},"id":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"intersection":{"type":"boolean"},"label":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"locations":{"type":"nested","properties":{"coords":{"type":"float"},"location":{"type":"text"},"street_segment":{"type":"text"},"timestamp":{"type":"date"}}},"pole_id":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"timestamp":{"type":"date"}}}}
Insert a single doc in accordance w/ the POST structure from the docs:
POST /vision_events/_doc/1?pretty=true
{
"pole_id": "mlk-central-2",
"camera_id": "mlk-central-cam-2",
"intersection": true,
"id": "644d1c06-4c60-4ed8-93b4-1aa79b87a622",
"label": "car",
"timestamp": 1586838108683,
"locations": [
{
"timestamp": 1586838109448,
"coords": 1626.3220383482665,
"street_segment": "None"
},
{
"timestamp": 1586838109832,
"coords": 1623.3129222859882,
"street_segment": "None"
}
],
"hit_counts": 2
}

Related

ElasticSearch : "copy_to" a nested fields

I try to use the ES "copy_to" attribute to replicate an object field into a nested field, but I got an error despite my multiple tries. Here is my structure :
...
"identifiedBy": {
"type": "object",
"properties": {
"type": {
"type": "keyword",
"copy_to": "nested_identifiers.type"
},
"value": {
"type": "text",
"analyzer": "identifier-analyzer",
"copy_to": "nested_identifiers.type"
},
"note": {
"type": "text"
},
"qualifier": {
"type": "keyword"
},
"source": {
"type": "keyword",
"copy_to": "nested_identifiers.type"
},
"status": {
"type": "text"
}
}
},
"nested_identifiers": {
"type": "nested",
"properties": {
"type": {
"type": "keyword",
},
"value": {
"type": "text",
"analyzer": "identifier-analyzer",
},
"source": {
"type": "keyword",
}
}
}
...
The mapping error is
java.lang.IllegalArgumentException: Illegal combination of [copy_to] and [nested]
mappings: [copy_to] may only copy data to the current nested document or any of its
parents, however one [copy_to] directive is trying to copy data from nested object [null]
to [nested_identifiers]
I also try to place the "copy_to" at the "identifiedBy" root level : doesn't work.
I also try to use the a "fields" property into "identifiedBy" and "copy_to" this subfield : doesn't work.
Is anyone knows a solution to solve my problem ?
Thanks for your help.
Tldr;
Because of how Elasticsearch index nested documents. This is not possible ... without updating the mapping.
There is indeed a work around, using include_in_root: true setting.
Else I suggest you pre process you data before indexing it, and during this pre process copy the data over to the nested field. Maybe using an ingest pipeline ?
Ingest Pipeline
PUT /72270706/
{
"mappings": {
"properties": {
"root_type":{
"type": "keyword"
},
"nested_doc":{
"type": "nested",
"properties": {
"nested_type":{
"type": "keyword"
}
}
}
}
}
}
PUT _ingest/pipeline/set_nested_type
{
"processors": [
{
"set": {
"field": "nested_doc.nested_type",
"copy_from": "root_type"
}
}
]
}
POST /72270706/_doc?pipeline=set_nested_type
{
"root_type": "a type"
}
GET /72270706/_search
Should give you
{
"took" : 392,
"timed_out" : false,
"_shards" : {
...
},
"hits" : {
...
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "72270706",
"_id" : "laOB0YABOgujegeQNA8D",
"_score" : 1.0,
"_source" : {
"root_type" : "a type",
"nested_doc" : {
"nested_type" : "a type"
}
}
}
]
}
}
To work around
...
"identifiedBy": {
"type": "object",
"properties": {
"type": {
"type": "keyword",
"copy_to": "nested_identifiers.type"
},
"value": {
"type": "text",
"analyzer": "identifier-analyzer",
"copy_to": "nested_identifiers.type"
},
"note": {
"type": "text"
},
"qualifier": {
"type": "keyword"
},
"source": {
"type": "keyword",
"copy_to": "nested_identifiers.type"
},
"status": {
"type": "text"
}
}
},
"nested_identifiers": {
"type": "nested",
"include_in_root": true,
"properties": {
"type": {
"type": "keyword",
},
"value": {
"type": "text",
"analyzer": "identifier-analyzer",
},
"source": {
"type": "keyword",
}
}
}
...
You will need to re index the existing data.
But be aware the copy_to will not copy the information to the nested object. But to another field, that has the same name but is not nested.

How to sort on a text field with elastic search

{
"parent" : "some_id",
"type" : "support",
"metadata" : {
"account_type" : "Regular",
"subject" : "Test Subject",
"user_name" : "John Doe",
"origin" : "Origin",
"description" : "TEST",
"media" : [ ],
"ticket_number" : "XXXX",
"status" : "completed",
},
"create_time" : "2021-02-24T15:08:57.750Z",
"entity_name" : "comment"
}
This is my demo data. and when I try to sort by metadata.sort for e.g. ->
GET comments-*/_search
{
"query": {
"bool": {
"must": [{
"match": {
"type": "support"
}
}]
}
},
"from": 0,
"size": 50,
"sort": [{
"metadata.status": {
"order": "desc"
}
}]
}
it says -> Fielddata is disabled on text fields by default. Set fielddata=true on [metadata.status] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.
I am not sure how to achieve the same as I am very new to ESS. Any help would be appreciated
You can only sort by fields of type "keyword" on string fields.
Elasticsearch dynamic mappings will create 2 fields if you dont set the mappings before sending docs.
In this case "status" , and "status.keyword".
So try with "metadata.status.keyword".
TL;DR
It is a good practice for fields you will not be doing full text search (like status flags) to only store the keyword version of the field.
To do that you have to set the mappings before indexing any document.
There is a trick:
Ingest Data
POST test_predipa/_doc
{
"parent" : "some_id",
"type" : "support",
"metadata" : {
"account_type" : "Regular",
"subject" : "Test Subject",
"user_name" : "John Doe",
"origin" : "Origin",
"description" : "TEST",
"media" : [ ],
"ticket_number" : "XXXX",
"status" : "completed"
},
"create_time" : "2021-02-24T15:08:57.750Z",
"entity_name" : "comment"
}
Get the autogenerated mappings
GET test_predipa/_mapping
Create a new empty index with the same mappings and modify as you want (on this case remove the text type field from metadata.status and let only the keyword one.
PUT test_predipa_new
{
"mappings": {
"properties": {
"create_time": {
"type": "date"
},
"entity_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"metadata": {
"properties": {
"account_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"origin": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "keyword"
},
"subject": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ticket_number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"parent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
Move the data from the old index to the new empty one
POST _reindex
{
"source": {
"index": "test_predipa"
},
"dest": {
"index": "test_predipa_new"
}
}
Run the sort query
GET test_predipa_new/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"type": "support"
}
}
]
}
},
"from": 0,
"size": 50,
"sort": [
{
"metadata.status": {
"order": "desc"
}
}
]
}
Most probably, the issue is that metadata.status is of text type, which is not sortable (see docs). You can sort over a textual field if this is of a keyword type.
Please check the mapping of your index. Most probably, your index has default mapping (see docs), and a keyword sub-field is automatically assigned to every field with a string value.
TL;DR: try to run this query
GET comments-*/_search
{
"query": {
"bool": {
"must": [{
"match": {
"type": "support"
}
}]
}
},
"from": 0,
"size": 50,
"sort": [{
"metadata.status.keyword": {
"order": "desc"
}
}]
}

ElasticSearch Delete by Query is not working

I am trying to delete the docs from my index whose id is greater that 1500001. i have copied the code from the elastic documentation but it is not giving me any results. The code is
POST /us_data_master/_delete_by_query
{
"query": {
"range" : {
"id" : {
"gte" : 1500001
}
}
}
}
the response i get is
{
"error" : {
"root_cause" : [
{
"type" : "action_request_validation_exception",
"reason" : "Validation Failed: 1: query is missing;"
}
],
"type" : "action_request_validation_exception",
"reason" : "Validation Failed: 1: query is missing;"
},
"status" : 400
}
i don't understand what is the problem. Looking forward for help
Thanks
Edit 1:
The mapping as requested is
{
"mapping": {
"_doc": {
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"address": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"city_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"contact_no": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"date_added": {
"type": "date"
},
"date_updated": {
"type": "date"
},
"featured": {
"type": "long"
},
"id": {
"type": "long"
},
"location_id": {
"type": "long"
},
"main_cate": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"slug": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"source": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"state_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"zip_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
I'm guessing you are using Kibana. There is an additional empty line after POST and the query you have as below:
POST /us_data_master/_delete_by_query
<------ Remove this space
{
"query": {
"range" : {
"id" : {
"gte" : 1500001
}
}
}
}
Below is how it should be:
POST /us_data_master/_delete_by_query
{
"query": {
"range" : {
"id" : {
"gte" : 1500001
}
}
}
}
That should resolve the issue.

ElasticSearch xcontent for unknown value of type class java.math.BigInteger

I have created the index with the following mapping in elasticsearch:
PUT my_master
{
"mappings": {
"documents": {
"properties": {
"fields": {
"type": "nested",
"properties": {
"uid": {
"type": "keyword"
},
"value": {
"type": "text",
"copy_to": "fulltext",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 32766
}
}
}
}
},
"fulltext": {
"type": "text"
}
}
}
}
}
Added a document into it:
POST my_master/documents/1/_create
{
"fields": [
{
"uid": "number",
"value": 111111111111111000000000000001100000000000000
}
]
}
After adding I am using update API to update document :
POST my_master/documents/1/_update
{
"doc":{
"fields": [
{
"uid": "number",
"value": 1111111111111110000000000000011000000000000000
}
]
}}
But elasticsearch gives me following error while updating the document:
cannot write xcontent for unknown value of type class java.math.BigInteger
Please help me here? What is wrong here? How to solve this issue?
according to the mapping i would expect that you send the value as a string, therefore i think this might work:
"1111111111111110000000000000011000000000000000"

elasticsearch dynamic field nested detection

Hi im trying to create an index in my elastic search without defining the mapping so what i did was this.
PUT my_index1/my_type/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith",
"age" : "1",
"enabled": false
},
{
"first" : "Alice",
"last" : "White",
"age" : "10",
"enabled": true
}
]
}
if did this elastic search will create a mapping for this index which is the result is
{
"my_index1": {
"mappings": {
"my_type": {
"properties": {
"group": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"properties": {
"age": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"enabled": {
"type": "boolean"
},
"first": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"last": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
if you would notice the property user didn't have a type of nested other properties has their own type defined by elastic search is there a way to it automatically the mapping should be look like this for the user property
"user": {
type:"nested"
"properties": {
"age": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"enabled": {
"type": "boolean"
},
"first": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"last": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
which is missing. im currently using nest
is there a way to define a dynamic mapping to detect if the newly added data on index is nested?
By default, Elasticsearch/Lucene has no concept of inner objects. Therefore, it flattens object hierarchies into a simple list of field names and values.
The above document would be converted internally into a document that looks more like this: (See Nested field type for more details)
{
"group" : "fans",
"user.first" : [ "alice", "john" ],
"user.last" : [ "smith", "white" ]
}
There is no beautiful answer here. A common approach might be using dynamic template to convert object to nested (however, a side effect is that all fields of object type would be changed to nested type),
{
"mappings": {
"dynamic_templates": [
{
"objects": {
"match": "*",
"match_mapping_type": "object",
"mapping": {
"type": "nested"
}
}
}
]
}
}
Another approach is specify mapping for the field before inserting data.
PUT <your index>
{
"mappings": {
"properties": {
"user": {
"type": "nested"
}
}
}
}
You can define a dynamic template where you can define your own custom mapping which can be used later when you index documents in the index.
Adding a step by step procedure, with the help of which automatically the mapping of the user field would be mapped to that of nested type
First, you need to define a dynamic template for the index as shown below, which have a match parameter which will match the field name having pattern similar to user* and map it to nested type
PUT /<index-name>
{
"mappings": {
"dynamic_templates": [
{
"nested_users": {
"match": "user*",
"mapping": {
"type": "nested"
}
}
}
]
}
}
After creating this template, you need to index the documents into it
POST /<index-name>/_doc/1
{
"group": "fans",
"user": [
{
"first": "John",
"last": "Smith",
"age": "1",
"enabled": false
},
{
"first": "Alice",
"last": "White",
"age": "10",
"enabled": true
}
]
}
Now when you see the mapping of the index documents, using the Get Mapping API, the mapping would be similar to what you expect to see
GET /<index-name>/_mapping?pretty
{
"index-name": {
"mappings": {
"dynamic_templates": [
{
"nested_users": {
"match": "user*",
"mapping": {
"type": "nested"
}
}
}
],
"properties": {
"group": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"type": "nested", // note this
"properties": {
"age": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"enabled": {
"type": "boolean"
},
"first": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"last": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
Or as #Jacky1205 mentioned, if it is not field-specific then you can use the below template that will match all object type fields to be of nested type
{
"mappings": {
"dynamic_templates": [
{
"nested_users": {
"match": "*",
"match_mapping_type": "object",
"mapping": {
"type": "nested"
}
}
}
]
}
}

Resources