Mapping array of long values from hive to elastic search index - elasticsearch

I have data in hive in following format
user_ids name city owner_ids
[1, 324, 456] some_name some_city [4567, 12345678]
I want to be able to search by user_ids = 324 as filter criteria or owner_ids = 12345678 and be able to get back above document as response. (Exact match on ids)
Currently I am using dynamic template for mapping which maps user_ids field to long and I am unable to get any results, what type should I force field mapping of user_ids and owner_ids to get this response?
Mapping configuration
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"doc": {
"dynamic_templates": [
{
"strings_as_keywords": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
]
}
}
}
Result mapping
{
"user_search" : {
"mappings" : {
"doc" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"city" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ds" : {
"type" : "date"
},
"user_ids" : {
"type" : "long"
},
"owner_ids" : {
"type" : "long"
}
}
}
}
}
}

Related

Elastic painless count unique occurrences

I'm using ELK stack version 7. What I need to do is to count the unique occurence of a value in my indexes.
My indexes are created by WSO2 Identity Server version 5.10 and they are so defined:
{
"login.wso2.node.ip-2021.03.11" : {
"aliases" : {
"alias_my_login" : { }
},
"mappings" : {
"dynamic" : "true",
"_meta" : { },
"_source" : {
"includes" : [ ],
"excludes" : [ ]
},
"dynamic_date_formats" : [
"strict_date_optional_time",
"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
],
"dynamic_templates" : [ ],
"date_detection" : true,
"numeric_detection" : false,
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "strict_date_optional_time"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_IP" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"java_class" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"level" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log_message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"rr" : {
"type" : "text"
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tenant_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1615481578543",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "9o-UQnn-SKaj7LbhO8GYxQ",
"version" : {
"created" : "7070199"
},
"provided_name" : "login.wso2.node.ip-2021.03.11"
}
}
}
}
What I need to do is to check if in the message field I have a SAML2 Response XML and if so I need to access to one value of this XML and count the unique occurrences.
So far so good. The message field is multi mapping field. It is both text type and keyword type so I can use text type for full search and keyword type for aggregation, sorting and so on.
What I did is to write this painless script:
GET login.wso2.node.ip-2021.03.11/_search
{
"query": {
"bool": {
"filter": [
{
"script": {
"script": {
"source": "doc['message.keyword'].value.contains('SAML_MESSAGES_LOGFILE') && doc['message.keyword'].value.contains('TINIT-')"
}
}
}
]
}
},
"aggs": {
"distinct_cf_count": {
"scripted_metric": {
"params": {
"fieldName":"message"
},
"init_script": "state.list = []",
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 && doc[params.fieldName].value.indexOf('TINIT-') > -1 ){
def valore = doc[params.fieldName].value;
def startIdx = valore.indexOf('TINIT-')+'TINIT-'.length();
state.list.add(valore.substring(startIdx, 16));
}
""",
"combine_script": "return state.list;",
"reduce_script": """
Map uniqueValueMap = new HashMap();
int count = 0;
for(shardList in states) {
if(shardList != null) {
for(key in shardList) {
if(!uniqueValueMap.containsKey(key)) {
count +=1;
uniqueValueMap.put(key, key);
}
}
}
}
return count;
"""
}
}
}
}
But I can't use regex because they are disabled and I should restart my ELK cluster in order to enable them. So I tried the contains and indexOf but I'm not able in counting the unique occurrences of this field.
Do you have any suggestion?
Thank you
Angelo
EDIT MORE INFO
gave a look. This check alwaus return 0 so it's like if message.keyword is always missing
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
**if( doc[params.fieldName].size()==0 ){**
** state.list.add(UUID.randomUUID().toString());**
** }**
//else{
// def valore = doc[params.fieldName].value;
// def cf = valore.splitOnToken('TINIT-')[1].substring(16);
// state.list.add(cf);
//}
""",
Do you have any suggestion? I'm really blocked here... at 1 step to the solution
Thank you

Elasticsearch change type existing fields

In my case, NIFI will receive data from syslog firewall, then after transformation sends JSON to ELASTIC. This is my first contact with ELASTICSEARCH
{
"LogChain" : "Corp01 input",
"src_ip" : "162.142.125.228",
"src_port" : "61802",
"dst_ip" : "177.16.1.13",
"dst_port" : "6580",
"timestamp_utc" : 1646226066899
}
In Elasticsearch automatically created Index with such types
{
"mt-firewall" : {
"mappings" : {
"properties" : {
"LogChain" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"dst_ip" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"dst_port" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"src_ip" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"src_port" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp_utc" : {
"type" : "long"
}
}
}
}
}
How to change type fields in Elasticsearch?
"src_ip": type "ip"
"dst_ip": type "ip"
"timestamp_utc": type "data"
You can change or configure field type using Mapping in Elasticsearch and some of the way i have given below:
1. Explicit Index Mapping
Here, you will define index mapping by your self with all the required field and specific type of field before indexing any document to Elasticsearch.
PUT /my-index-000001
{
"mappings": {
"properties": {
"src_ip": { "type": "ip" },
"dst_ip": { "type": "ip" },
"timestamp_utc": { "type": "date" }
}
}
}
2. Dyanamic Template:
Here, you will provide dynamic template while creating index and based on condition ES will map field with specific data type like if field name end with _ip then map field as ip type.
PUT my-index-000001/
{
"mappings": {
"dynamic_templates": [
{
"strings_as_ip": {
"match_mapping_type": "string",
"match": "*ip",
"runtime": {
"type": "ip"
}
}
}
]
}
}
Update 1:
If you want to update mapping in existing index then it is not recommndate as it will create data inconsistent.
You can follow bellow steps:
Use Reindex API to copy data to temp index.
Delete your original index.
define index with one of the above one method with index mapping.
Use Reindex API to copy data from temp index to original index (newly created index with Mapping)

How to split object (nested) into multiple columns in Elasticsearch / Kibana data table visualization

I have a nested object indexed in elasticsearch (7.10) and I need to visualize it with a kibana table. The problem is that kibana throws in the values from the nested field which have the same name in one column.
Part of the index:
{
"index" : {
"mappings" : {
"properties" : {
"data1" : {
"type" : "keyword"
},
"Details" : {
"type" : "nested",
"properties" : {
"Amount" : {
"type" : "float"
},
"Currency" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"DetailType" : {
"type" : "keyword"
},
"Price" : {
"type" : "float"
},
"Quantity" : {
"type" : "float"
},
"TotalAmount" : {
"type" : "float"
.......
The problem in the table:
How can I get three rows named Details each with one split term (e.g DetailType: "start_fee")?
Update:
I could query the nested object in the console:
GET _search
{
"query": {
"nested": {
"path": "Details",
"query": {
"bool": {
"must": [
{ "match": { "Details.DetailType": "energybased_fee" }}
]
}
},
"inner_hits": {
}
}}}
But how can I visualize in the table only the "inner_hits" value?

Copying co-ordinates to field geo_point type using copy_to in Elasticsearch

I am trying to work with geo code in elasticsearch, I have an index which is having two different unique field as latitude and longitude. Both are being stored as double, I want to use copy to feature of elasticsearch and copy both field value to a third field which will have geo_point type. I tried doing that but that's not working as intended.
{
"mappings": {
"properties": {
"unique_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"location_data": {
"properties": {
"latitude": {
"type": "float",
"copy_to": "last_location"
},
"longitude": {
"type": "float",
"copy_to": "last_location"
},
"last_location": {
"type": "geo_point"
}
}
}
}
}
}
When I index a sample document such as
{
"unique_id": "12345_mytest",
"location_data": {
"latitude": 37.16,
"longitude": -124.76
}
}
You will be able to see in the new mapping that the last_location field which was supposed to be inside location_data object is also populated at root level with a different data type other than geo_point.
{
"mappings" : {
"properties" : {
"last_location" : {
"type" : "float"
},
"location_data" : {
"properties" : {
"last_location" : {
"type" : "geo_point",
"store" : true
},
"latitude" : {
"type" : "float",
"copy_to" : [
"last_location"
]
},
"longitude" : {
"type" : "float",
"copy_to" : [
"last_location"
]
}
}
},
"unique_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
}
}
}
}
And furthermore when I query over the field I am unable to get the result as expected.
This doesn't works, any other ideas or way to do that. I know I can do that from the source itself or by altering the data before indexing, but I don't have to luxury to do that right away. Any other way by altering the mapping is most welcome. Thanks in advance for any pointers to get this done.
Thanks
Ashit

query to find all docs that match with exact terms with all the fields in the query

I have a simple doc structure as follows.
{
"did" : "1",
"uid" : "user1",
"mid" : "pc-linux1",
"path" : "/tmp/path1"
}
I need to query elastic ,that matches all fields exactly
GET index2/_search
{
"query": {
"bool":{
"must": [
{
"term" : { "uid" : "user1"}
},
{
"term" : { "mid" : "pc-linux1"}
},
{
"term" : { "did" : "1"}
},
{
"term" : { "path" : "/tmp/path1"}
}
]
}
}
}
The matching should happen without any kind of elastic 'analysis' on keywords, so that "/tmp/path1" is matched as a full term.
I tried to use a custom mapping: with
"index" : false
which does not work.
PUT /index2?include_type_name=true
{
"mappings" : {
"_doc": {
"properties" : {
"did" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"mid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"uid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
}
}
}
}
}
I am using elastic7.0 and few posts suggesting a custom mapping with
"index" : "not_analysed"
does not get accepted as a valid mapping in elastic 7.0
Any suggestions?
If you want to match exact terms, try this query:
GET index2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"uid": "user1"
}
},
{
"match": {
"mid": "pc-linux1"
}
},
{
"match": {
"did": "1"
}
},
{
"match": {
"path": "/tmp/path1"
}
}
]
}
}
}

Resources