ElasticSearch Illegal Argument Exception - elasticsearch

I'm using Elasticsearch latest version on Ubuntu 16.04 and I'm having a little issue on putting data on it.
here is my json document (relevant part of it)
{ "products" : {
"232CDFDW89ENUXRB" : {
"sku" : "232CDFDW89ENUXRB",
"productFamily" : "Compute Instance",
"attributes" : {
"servicecode" : "AmazonEC2",
"location" : "US East (N. Virginia)",
"locationType" : "AWS Region",
"instanceType" : "d2.8xlarge",
"currentGeneration" : "Yes",
"instanceFamily" : "Storage optimized",
"vcpu" : "36",
"physicalProcessor" : "Intel Xeon E5-2676v3 (Haswell)",
"clockSpeed" : "2.4 GHz",
"memory" : "244 GiB",
"storage" : "24 x 2000 HDD",
"networkPerformance" : "10 Gigabit",
"processorArchitecture" : "64-bit",
"tenancy" : "Host",
"operatingSystem" : "Linux",
"licenseModel" : "No License required",
"usagetype" : "HostBoxUsage:d2.8xlarge",
"operation" : "RunInstances",
"enhancedNetworkingSupported" : "Yes",
"preInstalledSw" : "NA",
"processorFeatures" : "Intel AVX; Intel AVX2; Intel Turbo" }
}
}
}
and here's the returning response from ES when i try "PUT http://localhost:9200/aws"
{ "error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "unknown setting [index.products.232CDFDW89ENUXRB.attributes.clockSpeed] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
}
],
"type": "illegal_argument_exception",
"reason": "unknown setting [index.products.232CDFDW89ENUXRB.attributes.clockSpeed] please check that any required plugins are installed, or check the breaking changes documentation for removed settings" }, "status": 400 }
Seems to me ES thinks that "clockSpeed" is some sort of setting...?
I was hoping to use dynamic mapping to speed the process up instead of first mapping all the document and then importing it in ES.
Any suggestion?

The issue is you are missing document type and document id while indexing a document through PUT http://localhost:9200/aws command.
Proper way to index document is:
POST my-index/my-type/my-id-1
{
"name": "kibana"
}
i.e You have to provide document type (here my-type) and document id (here my-id-1). Note that document id is optional here so if you don't provide one then elasticsearch create one alphanumeric id for you.
Other couple of ways indexing a doc:
POST my-index/my-type
{
"name": "kibana"
}
//if you want to index document through PUT then you must provide document id
PUT my-index/my-type/my-id-1
{
"name": "kibana"
}
Note: If automatic index creation is disabled then you have to create index before indexing documents.

Given a clean mapping, XPOST works perfectly for me on elasticsearch 5.1.1.,
$ curl -XPOST localhost:9200/productsapp/productdocs -d '
{ "products" : {
"sku1" : {
"sku" : "SKU-Name",
"productFamily" : "Compute Instance",
"attributes" : {
"servicecode" : "AmazonEC2",
"location" : "US East (N. Virginia)",
"locationType" : "AWS Region",
"instanceType" : "d2.8xlarge",
"currentGeneration" : "Yes",
"instanceFamily" : "Storage optimized",
"vcpu" : "36",
"physicalProcessor" : "Intel Xeon E5-2676v3 (Haswell)",
"clockSpeed" : "2.4 GHz",
"memory" : "244 GiB",
"storage" : "24 x 2000 HDD",
"networkPerformance" : "10 Gigabit",
"processorArchitecture" : "64-bit",
"tenancy" : "Host",
"operatingSystem" : "Linux",
"licenseModel" : "No License required",
"usagetype" : "HostBoxUsage:d2.8xlarge",
"operation" : "RunInstances",
"enhancedNetworkingSupported" : "Yes",
"preInstalledSw" : "NA",
"processorFeatures" : "Intel AVX; Intel AVX2; Intel Turbo" }
}
}
}'
{"_index":"productsapp","_type":"productdocs","_id":"AVuhXdYYUiSguAb0FsSX","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"created":true}
GET the inserted doc
curl -XGET localhost:9200/productsapp/productdocs/_search
{"took":11,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":1.0,"hits":[{"_index":"productsapp","_type":"productdocs","_id":"AVuhXdYYUiSguAb0FsSX","_score":1.0,"_source":{ "products" : {
"sku1" : {
"sku" : "SKU-Name",
"productFamily" : "Compute Instance",
"attributes" : {
"servicecode" : "AmazonEC2",
"location" : "US East (N. Virginia)",
"locationType" : "AWS Region",
"instanceType" : "d2.8xlarge",
"currentGeneration" : "Yes",
"instanceFamily" : "Storage optimized",
"vcpu" : "36",
"physicalProcessor" : "Intel Xeon E5-2676v3 (Haswell)",
"clockSpeed" : "2.4 GHz",
"memory" : "244 GiB",
"storage" : "24 x 2000 HDD",
"networkPerformance" : "10 Gigabit",
"processorArchitecture" : "64-bit",
"tenancy" : "Host",
"operatingSystem" : "Linux",
"licenseModel" : "No License required",
"usagetype" : "HostBoxUsage:d2.8xlarge",
"operation" : "RunInstances",
"enhancedNetworkingSupported" : "Yes",
"preInstalledSw" : "NA",
"processorFeatures" : "Intel AVX; Intel AVX2; Intel Turbo" }
}
}
}}]}}
The mapping it creates is as below, with clockSpeed as text type.
curl -XGET localhost:9200/productsapp/productdocs/_mapping?pretty=true
{
"productsapp" : {
"mappings" : {
"productdocs" : {
"properties" : {
"products" : {
"properties" : {
"232CDFDW89ENUXRB" : {
"properties" : {
"attributes" : {
"properties" : {
"clockSpeed" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"currentGeneration" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"enhancedNetworkingSupported" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instanceFamily" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instanceType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"licenseModel" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"location" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"locationType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"memory" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"networkPerformance" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"operatingSystem" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"operation" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"physicalProcessor" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"preInstalledSw" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"processorArchitecture" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"processorFeatures" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"servicecode" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"storage" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tenancy" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"usagetype" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"vcpu" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"productFamily" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"sku" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
}
}
}
Can you check you mapping for attributes.clockSpeed and make sure its not screwed up.
And if you want to update the document do XPUT on the id of first document (which is AVuhXdYYUiSguAb0FsSX),
In following example, I am updating sku field to "sku name updated"
curl -XPUT localhost:9200/productsapp/productdocs/AVuhXdYYUiSguAb0FsSX -d '
{
"products" : {
"sku1" : {
"sku" : "sku name updated",
"productFamily" : "Compute Instance",
"attributes" : {
"servicecode" : "AmazonEC2",
"location" : "US East (N. Virginia)",
"locationType" : "AWS Region",
"instanceType" : "d2.8xlarge",
"currentGeneration" : "Yes",
"instanceFamily" : "Storage optimized",
"vcpu" : "36",
"physicalProcessor" : "Intel Xeon E5-2676v3 (Haswell)",
"clockSpeed" : "2.4 GHz",
"memory" : "244 GiB",
"storage" : "24 x 2000 HDD",
"networkPerformance" : "10 Gigabit",
"processorArchitecture" : "64-bit",
"tenancy" : "Host",
"operatingSystem" : "Linux",
"licenseModel" : "No License required",
"usagetype" : "HostBoxUsage:d2.8xlarge",
"operation" : "RunInstances",
"enhancedNetworkingSupported" : "Yes",
"preInstalledSw" : "NA",
"processorFeatures" : "Intel AVX; Intel AVX2; Intel Turbo"
}
}
}}'
{"_index":"productsapp","_type":"productdocs","_id":"AVu5OLfHPw6Pv_3O38-V","_version":2,"result":"updated","_shards":{"total":2,"successful":1,"failed":0},"created":false}

Related

Elastic: How to correct the auto generated mapping?

I'm using Elastic cloud hosted in Azure and use NEST for the client. I have a part of auto generated mapping that I need to change from
"Bonus" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
to
"Bonus" : {
"properties" : {
"Amount" : {
"properties" : {
"Value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"PayrollSyncDateTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
When I tried to do it, I get illegal_argument_exception error with the message "can't merge a non object mapping [activityData.Bonus] with an object mapping". How can I correct the auto generated mapping?

Why the _source of the index I created by default is disabled in elasticsearch

Why the _source of the index I created by default is disabled in elasticsearch
enter image description here
I did not use any templates,No matter how the index is created, _source is disabled.
My cluster may have insufficient disks before, but after my cleaning, the disk usage rate is within 20%
this is my index detail info
{
"test" : {
"aliases" : { },
"mappings" : {
"_source" : {
"enabled" : false
},
"properties" : {
"#timestamp" : {
"type" : "date"
},
"Exception" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1636705210909",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "8BRNRGnQSyOJzqMOHLXjHw",
"version" : {
"created" : "7030099"
},
"provided_name" : "test"
}
}
}
}

bucket Terms aggregation Elasticsearch

elasticsearch version
{
"name" : "abc-Inspiron-5521",
"cluster_name" : "elasticsearch",
"cluster_uuid" : "2vLvphpURJOtfAZSGDDX5w",
"version" : {
"number" : "7.10.2",
"build_flavor" : "default",
"build_type" : "deb",
"build_hash" : "747e1cc71def077253878a59143c1f785afa92b9",
"build_date" : "2021-01-13T00:42:12.435326Z",
"build_snapshot" : false,
"lucene_version" : "8.7.0",
"minimum_wire_compatibility_version" : "6.8.0",
"minimum_index_compatibility_version" : "6.0.0-beta1"
},
"tagline" : "You Know, for Search"
}
Document mapping
"user_data" : {
"aliases" : { },
"mappings" : {
"properties" : {
"experience" : {
"properties" : {
"brand" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"brand_segment" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"company" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"duration" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"property_type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"real_estate_type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
Document structure is right, please make changes if there is mismatch in parenthesis accordingly.
document sample
{
"_index" : "user_data",
"_type" : "_doc",
"_id" : "dONuEXgBU9vYaZRqY8Jo",
"_score" : 1.0,
"_source" : {
"experience" : [
{
"brand" : "Hilton",
"company" : "Hilton LLC",
"brand_segment" : "Luxury",
"property_type" : "All-Inclusive",
"duration" : "2 years",
"real_estate_type" : "Institutional"
},
{
"brand" : "Mantis",
"company" : "Accor LLC",
"brand_segment" : "Upper-Upscale",
"property_type" : "Condo",
"duration" : "2 years",
"real_estate_type" : "Family Office"
},
{
"brand" : "Marriott",
"company" : "Marriott LLC",
"brand_segment" : "Independent",
"property_type" : "Convention",
"duration" : "2 years",
"real_estate_type" : "Family Office"
}
]
}
}
my term aggregation query on brand_segment
GET user_data/_search
{
"aggs": {
"experience": {
"terms": { "field": "experience.brand_segment" }
}
}
}
Now I have 2 problems while making term aggregation
While executing term aggregation on 'brand_segment', the value 'Upper-Upscale' is suppose to be considered as single unit and count is to be made according but currently I am getting it as:
Second concern is if I want to count number of times brand_segment value is 'Luxury' or any value, but currently from above query I am getting count of number of documents in which Luxury occurs, not the number of times in all documents Luxury occurs. (multiple occurrences are getting counted as single for 1 document as of now).
wrong result
"aggregations" : {
"experience" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "independent",
"doc_count" : 15
},
{
"key" : "luxury",
"doc_count" : 15
},
{
"key" : "upper",
"doc_count" : 14
},
{
"key" : "upscale",
"doc_count" : 14
}
]
}
}
Desired output should have Upper-Upscale as one value. I have taken multiple sample documents hence this result.
feel free to use this as sample document for creating index
{
"id": 1,
"name": "abcs",
"source": "csv_status",
"profile_complition": "70%",
"creation_date": "2020-04-02",
"current_position": [
{
"position": "Financial Reporting",
"position_category": "Finance",
"position_level": 2
}
],
"seeking_position": [
{
"position": "Financial Planning and Analysis",
"position_category": "Finance",
"position_level": 3
}
],
"last_updation_date": "2021-02-02",
"experience": [
{
"brand": "Hilton",
"company": "Hilton LLC",
"brand_segment": "Luxury",
"property_type": "All-Inclusive",
"duration": "2 years",
"real_estate_type": "Institutional"
},
{
"brand": "Accor",
"company": "Accor LLC",
"brand_segment": "Luxury",
"property_type": "Condo",
"duration": "2 years",
"real_estate_type": "Family Office"
},
{
"brand": "Marriott",
"company": "Marriott LLC",
"brand_segment": "Independent",
"property_type": "Convention",
"duration": "2 years",
"real_estate_type": "Family Office"
}
]
}
other occurrences in brand_segment = ['Economy', 'Upscale', 'Midscale', 'Upper-Upscale', 'Luxury', 'Independent', 'Extended Stay']
PS: all brand_segment are desired to be considered as single entity ('Upper-Upscale' is not desired as 'Upper', 'Upscale'. Same for 'Extended Stay')
Let me know if further clarification required.
For the first issue, you need to make your aggregation on the keyword subfield:
GET user_data/_search
{
"aggs": {
"experience": {
"terms": { "field": "experience.brand_segment.keyword" }
}
}
}
To solve the second issue, you need to make your experience field nested, which means your mapping needs to look as follows:
"user_data" : {
"aliases" : { },
"mappings" : {
"properties" : {
"experience" : {
"type": "nested", <--- add this
"properties" : {
"brand" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},

Elastic Search and product nomenclature: hyphens and spaces

I'm having a hard time figuring out how to set up Elasticsearch for the typical product model nomenclature. For instance, a product called "Shure SM7B" should appear as a result when searching for SM7B, SM 7B, SM 7, SM-7... and vice versa: searching for SM7B should give results like SM-7, SM7...
For now, I'm getting this kind of results: if I search for "Roland D 50", I get Roland D 50, Roland D-50, Roland D-550, Roland D-20 and so on... but if I search for "Roland D50", I get only "Roland D50" results.
This is my current mapping/settings:
{
"products" : {
"mappings" : {
"Product" : {
"properties" : {
"article_reviews" : {
"type" : "integer"
},
"brand_id" : {
"type" : "integer"
},
"category" : {
"type" : "text"
},
"category_id" : {
"type" : "integer"
},
"date" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text"
},
"has_image" : {
"type" : "integer"
},
"id" : {
"type" : "integer"
},
"last_review_date" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"min_price" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text"
},
"name_order" : {
"type" : "keyword"
},
"price_history" : {
"type" : "integer"
},
"rating" : {
"type" : "float"
},
"reviews" : {
"type" : "integer"
},
"shops" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"widget" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
Also, I'd need to autocomplete my searches, so for instance, "Shure SM" should show results like Shure SM-7, Shure SM7, Shure SM58, Shure SM 57, etc... narrowing them down as I type.
Any clues? Thank you!

Is there a way to Search through Elastic Search to get all results that have an ID contained in an array of IDs?

Been trying to find a way to do this for a couple days now. I've looked through 'bool', 'constant_score', 'filtered' queries none of which seem to be able to come up with the result I want.
One that HAS come close is the 'ids' query (does exactly what I described in the title of this questions) the one problem is that the key that I'm trying to search is not the '_id' value of the Elastic search index. Instead it is 'posterId' in the index below:
"_index": "activity",
"_type": "activity",
"_id": "<unique string id>",
"_score": null,
"_source": {
...
misc keys
...
"posterId": "<QUERY BASED ON THIS VALUE>",
"time": 20171007173623
}
Query that returns based on the _id value:
ids : {
type : "activity",
values : ["<unique string id>", ...]
}
as seen here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
How I want my query to work:
posterId : {
type : "activity",
values : [<list of posterIds>]
}
Returning all indicies that have posterIds contained in "<list of posterIds>"
< Edit > I'm trying to do this in one query as apposed to looping through each member of my list of posterIds because I also need to sort based on the time key and be able to page the query.
So, does anyone know of a built in query that does this or a work around?
Side note: if you feel like you're about to downvote this please just comment why, I'm about to be banned and I've read through all the guidelines and I feel like I'm following them but my questions rarely perform well. :( It would be much appreciated
Edit:
{
"activity" : {
"aliases" : { },
"mappings" : {
"activity" : {
"properties" : {
"-Kvp7f3epvW_dXSONzKj" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"actionId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"actionType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"activityType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"attachedId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"attachedType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"cardType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"noteTitleDict" : {
"properties" : {
"noun" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"subject" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"verb" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"posterId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"segueType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"time" : {
"type" : "long"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1507678305995",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "<id>",
"version" : {
"created" : "5010199"
},
"provided_name" : "activity"
}
}
}
}
I think what you are looking for is a Terms Query
{
"query": {
"constant_score" : {
"filter" : {
"terms" : { "user" : ["kimchy", "elasticsearch"]}
}
}
}
}
This finds documents which contain the exact term Kimchy or elasticsearch in the index of the user field. You can read more about this here https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
In your case you need to replace
the user with posterId.keyword
Kimchy and elasticsearch with all your posterIds
Keep in mind that a terms query is case sensitive and the keyword field does not use a lowercase analyzer (which means it'll save/index the value in the same case it was received)

Resources