settings the "index" property of an elasticsearch object - elasticsearch

say I have a mapping of objects as such the mapping is:
{"my_type":
{"properties":
{"name":{"type":"string","store":"yes","index":"not_analyzed"},
"more":{"type":"object",
"properties":{"a_known_number":{"type":"long","index":"yes"},
"some_json_object":{"type":"object"}
}
}
}
}
}
I do not know what sub fields the "some_json_object" will have, but i DO know that I only want to store this object, but not index any of it's sub-fields.
Can I do:
{"my_type":
{"properties":
{"name":{"type":"string","store":"yes","index":"not_analyzed"},
"more":{"type":"object",
"properties":{"a_known_number":{"type":"long","index":"yes"},
"some_json_object":{"type":"object","store":"yes","index":"no"}
}
}
}
}
}
and affect all of the resulting sub-fields?

No, you can't specify the entire "object" as not indexed. However you can use dynamic_templates (http://www.elasticsearch.org/guide/reference/mapping/root-object-type/) to do this:
{
"my_type":{
"properties":{
"name":{
"type":"string",
"store":"yes",
"index":"not_analyzed"
}
},
"dynamic_templates":[
{
"stored_json_object_template":{
"path_match":"some_json_object.*",
"mapping":{
"store":"yes",
"index":"no"
}
}
}
]
}
}
This tells the mapper to map all properties for "some_json_object" as stored strings.
Update
Removed type from mapping in order to match all property types (match_path => path_match).
Update 2
If you then create an index:
{
"mappings":{
"my_type":{
"properties":{
"name":{
"type":"string",
"store":"yes",
"index":"not_analyzed"
}
},
"dynamic_templates":[
{
"stored_json_object_template":{
"path_match":"some_json_object.*",
"mapping":{
"store":"yes",
"index":"no"
}
}
}
]
}
}
}
and index an object:
{
"Name":"Henrik",
"some_json_object":{
"string":"string",
"long":12345
}
}
it will then get the following mapping:
{
"testindex":{
"my_type":{
"dynamic_templates":[
{
"stored_json_object_template":{
"mapping":{
"index":"no",
"store":"yes"
},
"path_match":"some_json_object.*"
}
}
],
"properties":{
"name":{
"type":"string",
"index":"not_analyzed",
"store":true,
"omit_norms":true,
"index_options":"docs"
},
"some_json_object":{
"properties":{
"long":{
"type":"long",
"index":"no",
"store":true
},
"string":{
"type":"string",
"index":"no",
"store":true
}
}
}
}
}
}
}

Related

Elasticsearch [match] unknown token [START_OBJECT] after [created_utc]

I am learning how to use elasticsearch using the 2006 dataset of reddit comments from pushshift.io.
created_utc is the field with the time a comment was created.
I am trying to get all the posts within a certain time range. I googled a bit and found out that I need to use the "range" keyword.
This is my query right now:
{
"query": {
"match" : {
"range": {
"created_utc": {
"gte": "1/1/2006",
"lte": "31/1/2006",
"format": "dd/MM/yyyy"
}
}
}
}
}
I then tried using a bool query so I can match time range with edited must not = False (edited being the boolean field that tells me whether a post has been edited or not):
{
"query": {
"bool" : {
"must" : {
"range" : {
"created_utc": {
"gte" : "01/12/2006", "lte": "31/12/2006", "format": "dd/MM/yyyy"
}
}
},
"must_not": {
"edited": False
}
}
}
}
However, this gave me another error that I can't figure out:
[edited] query malformed, no start_object after query name
I'd appreciate if anyone can help me out with this, thanks!
Here is my mapping for the comment if it helps:
{
"comment":{
"properties":{
"author":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"body":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"controversiality":{
"type":"long"
},
"created_utc":{
"type":"date"
},
"edited":{
"type":"boolean"
},
"gilded":{
"type":"long"
},
"id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"link_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"parent_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"score":{
"type":"long"
},
"subreddit":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
}
}
}
}
If you want to get all the posts within a time range, then you must be using a range query. The problem with your query is you are using range inside a match query which is not allowed in elasticsearch, so your query should look like:
{
"query": {
"range": {
"created_utc": {
"gte": 1136074029,
"lte": 1136076410
}
}
}
}
Providing the fact that the created_utc field is saved as epoch, you must use a epoch format to query.
The second query where you want to find the posts within a range where edited must not false:
{
"query": {
"bool": {
"must": [
{
"range": {
"created_utc": {
"gte": 1136074029,
"lte": 1136076410
}
}
}
],
"must_not": [
{
"match": {
"edited": false
}
}
]
}
}
}
Note: If your created_utc is stored in dd/MM/yyyy format then while querying you should use a strict companion format, i.e. instead of 1/1/2006 you should be giving 01/01/2006.
Hope this helps !

Apply analyzer on Object fields

I have this analyzer:
{
"index": {
"number_of_shards": 1,
"analysis": {
"filter": {
"word_joiner": {
"type": "word_delimiter",
"catenate_all": true,
"preserve_original": true
}
},
"analyzer": {
"word_join_analyzer": {
"type": "custom",
"filter": [
"word_joiner"
],
"tokenizer": "keyword"
}
}
}
}
}
I apply it on this field:
#Field(type = FieldType.Object, analyzer = "word_join_analyzer")
private Description description;
And here is the Description class:
public class Description {
#JsonProperty("localizedDescriptions")
private Map<String, String> descriptions = new HashMap<>();
}
This is the resulting Elasticsearch mapping for this field:
{
"description":{
"properties":{
"localizedDescriptions":{
"properties":{
"en":{
"type":"string"
},
"fr":{
"type":"string"
},
"it":{
"type":"string"
}
}
}
}
}
}
Like you can see, the anlyzer is not applied at all. It works well with string fields, but I have a hard time doing it with Object types. Any ideas?
Thanks!
EDIT: I tried to use a dynamic mapping:
{
"iam":{
"properties":{
"dynamic_templates":[
{
"localized_strings_values":{
"path_match":"description.localizedDescriptions.*",
"mapping":{
"type":"string",
"analyzer":"word_join_analyzer"
}
}
}
]
}
}
}
But I have this error:
Expected map for property [fields] on field [dynamic_templates] but got a class java.lang.String
Why do I get this error?
Finaly solved this. This is the correct mapping:
{
"cake": {
"dynamic_templates": [
{
"localized_descriptions": {
"path_match": "description.localizedDescriptions.*",
"mapping": {
"type": "string",
"analyzer": "word_join_analyzer"
}
}
}
]
}
}

ElasticSearch Query for range OR missing in array

I'm trying to create a query that will query the below mapping for values that have a "period" that either matches a specific date, or is missing it's value (with a null value). Please note that I am working with a third-party database, so I cannot change the mappings. Bear with me if the example data and mappings are large.. I've tried to cut everything nonessential away.
{
"EXAMPLE":{
"mappings":{
"company":{
"properties":{
"CompanyData":{
"properties":{
"participantRelations":{
"type":"nested",
"include_in_parent":true,
"properties":{
"participant":{
"type":"nested",
"include_in_parent":true,
"properties":{
"unitNumber":{
"type":"long"
}
},
"organizations":{
"properties":{
"memberData":{
"type":"nested",
"include_in_parent":true,
"properties":{
"attributes":{
"properties":{
"values":{
"properties":{
"period":{
"properties":{
"validFrom":{
"type":"date",
"format":"dateOptionalTime"
},
"validTo":{
"type":"date",
"format":"dateOptionalTime"
}
}
}
"value":{
"type":"string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
Here is some example data. I've translated it from the (Danish) source it comes from, so any slight missspellings etc are just misstypes.
{
"company": {
"companyData":{
"participantRelations":[
{
"participant":{
"unitNumber":4003857309
},
"organizations":[
{
"memberData":[
{
"attributtes":[
{
"values":[
{
"value":"chairman",
"period":{
"validFrom":"2014-10-01",
"validTo":"2016-08-11"
}
}
]
},
{
"values":[
{
"value":"generalassembly",
"period":{
"validFrom":"2014-10-01",
"validTo":"2016-08-11"
}
}
]
}
]
}
]
},
{
"memberData":[
{
"attributes":[
{
"values":[
{
"value":"chairman",
"period":{
"validFrom":"2016-08-16",
"validTo":"2017-06-08"
}
},
{
"value":"boardmember",
"period":{
"validFrom":"2017-06-09",
"validTo":null
}
}
]
},
{
"values":[
{
"value":"generalassembly",
"period":{
"validFrom":"2016-08-16",
"validTo":"2017-06-08"
}
},
{
"value":"generalassembly",
"period":{
"validFrom":"2017-06-09",
"validTo":null
}
}
]
}
]
}
]
}
]
}
]
}
}
}
What I want to do is something like the query below, which doesn't quite work as it has cases it cannot handle for reasons I do not know. What it needs to do is look for any company.participantRelations.organizations.memberData.attributes.values.period.validTo over a certain date, OR if the date is null. Now I know nulls are funky in ES, but I know that the date properties will always be there, but the validTo will be set to null if there is no date yet.
Furthermore, it needs to be nested on organizations as well, as I need a specific unitNumber to be present.
{
"query":{
"nested":{
"filter":{
"bool":{
"must":[
{
"nested":{
"filter":{
"bool":{
"must":[
{
"bool":{
"should":[
{
"range":{
"company.companyData.participantRelations.organizations.memberData.attributtes.values.period.validTo":{
"gte":"2017-08-14T15:23:11.011"
}
}
},
{
"missing":{
"field":"company.companyData.participantRelations.organizations.memberData.attributtes.values.period.validTo"
}
}
]
}
}
]
}
},
"path":"company.companyData.participantRelations.organizations.memberData"
}
},
{
"term":{
"company..companyData.participantRelations.participant.unitNumber":4003857309
}
}
]
}
},
"path":"company.companyData.participantRelations"
}
}
}
This query works in two cases:
Where there is only one entry in the list of values, and it's validTo date is null
Where the validTo date is greater or equal to my date limit.
It does not seem to work if there are two entries, the first of which has a date that is earlier than my limit, and the second entry has a null value (as in the example).
I realize this is kind of convoluted, but with the database I'm querying that is just the way it is. I hope I've simplified it enough for you to get my issue.
Thanks in advance.

Can i filter subarray in Elasticsearch?

I have orders and order products attached for each order as subarray in Elastic Search. When i'm aggregating Prices i need possibility to filter my order products in my documents of orders.
Example of my document in Elastic:
{
"OrderID":4567488,
"projectId":"4",
"Project":"direkt",
"legacy_id":null,
"supporterId":null,
"Origin":"FR",
"orderProducts":[
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"30",
"Price":"26.95",
},
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"15",
"Price":"15.22",
},
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"123",
"Price":"24.55",
},
]
}
How im filter right now:
{
"index":"order_index",
"from":0,
"size":100,
"body":{
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{
"term":{
"orderProducts.brandNo":"30"
}
}
],
}
}
}
}
}
}
What i'm expecting
{
"OrderID":4567488,
"projectId":"4",
"Project":"direkt",
"legacy_id":null,
"supporterId":null,
"Origin":"FR",
"orderProducts":[
{
"OrderProductID":"15694898",
"OrderID":"4567488",
"brandNo":"30",
"Price":"26.95",
},
]
}
What i'm really getting:
All document.
That is possible? To filter subarray data?
UPD.
Yes this is my schema mappings:
"mappings":{
"order":{
"dynamic_templates":[
{
"strings":{
"mapping":{
"type":"string",
"fields":{
"raw":{
"index":"not_analyzed",
"type":"string"
}
}
},
"match_mapping_type":"string"
}
}
],
"properties":{
"orderProducts":{
"include_in_parent":true,
"properties":{
"OrderProductID":{
"type":"long"
},
"OrderID":{
"type":"long"
},
"brandNo":{
"type":"long"
},
"Price":{
"type":"double"
}
},
"type":"nested"
},
"OrderID":{
"type":"long"
}
}
}
},
All right, after some experiments i discovered that that aggregation can be done like this:
{
"aggs":{
"sales":{
"nested":{
"path":"orderProducts"
},
"aggs":{
"filtered_nestedobjects":{
"filter":{
"bool":{
"must":[
{
"terms":{
"orderProducts.brandNo":[
"30"
]
}
}
]
}
},
"aggs":{
"Quantity":{
"sum":{
"field":"orderProducts.Quantity"
}
}
}
}
}
}
}
}
And the answer to main question can we filter subarray of elastic is yes. With the inner_hits only i did this.

Elasticsearch term aggregation document count issue

This the request I'm sending to ElasticSearch:
{
"aggregations":{
"followUpActivity.metainfo.metainfos.string1":{
"terms":{
"field":"metainfos.string1",
"missing":"null",
"order":{
"_count":"asc"
}
}
}
}
}
I'm asking for buckets on field metainfos.string1 and ordering them by _count. This is the response:
{
"took":7,
"timed_out":false,
"_shards":{
"total":1,
"successful":1,
"failed":0
},
"hits":{
"total":3,
"max_score":1.0,
"hits":[
{
"_index":"living_v1",
"_type":"fuas",
"_id":"be9b29f3-37a5-11e6-a66a-30b5c2122322",
"_score":1.0,
"_routing":"living_team",
"_source":{
"user":"living_team",
"timestamp":"2016-06-22T11:27:25.531Z",
"metainfos":{
"string1":[
"s1", <<<<<<<<<<<<<--------------
"s2" <<<<<<<<<<<<<--------------
]
}
}
},
{
"_index":"living_v1",
"_type":"fuas",
"_id":"c3af0f64-37a5-11e6-a66a-30b5c2122322",
"_score":1.0,
"_routing":"living_team",
"_source":{
"user":"living_team",
"timestamp":"2016-06-22T12:30:01.625Z",
"metainfos":{
"string1":[
"s1", <<<<<<<<<<<<<--------------
"s2" <<<<<<<<<<<<<--------------
]
}
}
},
{
"_index":"living_v1",
"_type":"fuas",
"_id":"ee790469-48f3-11e6-9f47-30b5c2122322",
"_score":1.0,
"_routing":"living_team",
"_source":{
"user":"living_team",
"timestamp":"2016-07-13T13:33:41.231Z",
"metainfos":{
"string1":[
"s2" <<<<<<<<<<<<<--------------
]
}
}
}
]
},
"aggregations":{
"followUpActivity.metainfo.metainfos.string1":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":"s2",
"doc_count":2 <<<<<<<<<<<<<--------------
},
{
"key":"s1",
"doc_count":3 <<<<<<<<<<<<<--------------
}
]
}
}
}
As you can see, there are two buckets: s1 and s2. However, s1 is present only in two documents but ES is telling me that doc_count = 3!!! Moreover, s2 is present in three documents, but ES is telling me that doc_count = 2!!!!
I'm performing it using only one node.
Any ideas?
MAPPING
{
"living_v1":{
"mappings":{
"fuas":{
"properties":{
"metainfos":{
"properties":{
"string1":{
"type":"string"
}
}
},
"timestamp":{
"type":"date",
"format":"strict_date_optional_time||epoch_millis"
},
"user":{
"type":"string",
"index":"not_analyzed"
}
}
}
}
}
}

Resources