Elastic painless count unique occurrences - elasticsearch

I'm using ELK stack version 7. What I need to do is to count the unique occurence of a value in my indexes.
My indexes are created by WSO2 Identity Server version 5.10 and they are so defined:
{
"login.wso2.node.ip-2021.03.11" : {
"aliases" : {
"alias_my_login" : { }
},
"mappings" : {
"dynamic" : "true",
"_meta" : { },
"_source" : {
"includes" : [ ],
"excludes" : [ ]
},
"dynamic_date_formats" : [
"strict_date_optional_time",
"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
],
"dynamic_templates" : [ ],
"date_detection" : true,
"numeric_detection" : false,
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "strict_date_optional_time"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_IP" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"java_class" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"level" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log_message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"rr" : {
"type" : "text"
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tenant_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1615481578543",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "9o-UQnn-SKaj7LbhO8GYxQ",
"version" : {
"created" : "7070199"
},
"provided_name" : "login.wso2.node.ip-2021.03.11"
}
}
}
}
What I need to do is to check if in the message field I have a SAML2 Response XML and if so I need to access to one value of this XML and count the unique occurrences.
So far so good. The message field is multi mapping field. It is both text type and keyword type so I can use text type for full search and keyword type for aggregation, sorting and so on.
What I did is to write this painless script:
GET login.wso2.node.ip-2021.03.11/_search
{
"query": {
"bool": {
"filter": [
{
"script": {
"script": {
"source": "doc['message.keyword'].value.contains('SAML_MESSAGES_LOGFILE') && doc['message.keyword'].value.contains('TINIT-')"
}
}
}
]
}
},
"aggs": {
"distinct_cf_count": {
"scripted_metric": {
"params": {
"fieldName":"message"
},
"init_script": "state.list = []",
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 && doc[params.fieldName].value.indexOf('TINIT-') > -1 ){
def valore = doc[params.fieldName].value;
def startIdx = valore.indexOf('TINIT-')+'TINIT-'.length();
state.list.add(valore.substring(startIdx, 16));
}
""",
"combine_script": "return state.list;",
"reduce_script": """
Map uniqueValueMap = new HashMap();
int count = 0;
for(shardList in states) {
if(shardList != null) {
for(key in shardList) {
if(!uniqueValueMap.containsKey(key)) {
count +=1;
uniqueValueMap.put(key, key);
}
}
}
}
return count;
"""
}
}
}
}
But I can't use regex because they are disabled and I should restart my ELK cluster in order to enable them. So I tried the contains and indexOf but I'm not able in counting the unique occurrences of this field.
Do you have any suggestion?
Thank you
Angelo
EDIT MORE INFO
gave a look. This check alwaus return 0 so it's like if message.keyword is always missing
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
**if( doc[params.fieldName].size()==0 ){**
** state.list.add(UUID.randomUUID().toString());**
** }**
//else{
// def valore = doc[params.fieldName].value;
// def cf = valore.splitOnToken('TINIT-')[1].substring(16);
// state.list.add(cf);
//}
""",
Do you have any suggestion? I'm really blocked here... at 1 step to the solution
Thank you

Related

Elastic: How to correct the auto generated mapping?

I'm using Elastic cloud hosted in Azure and use NEST for the client. I have a part of auto generated mapping that I need to change from
"Bonus" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
to
"Bonus" : {
"properties" : {
"Amount" : {
"properties" : {
"Value" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"PayrollSyncDateTime" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
When I tried to do it, I get illegal_argument_exception error with the message "can't merge a non object mapping [activityData.Bonus] with an object mapping". How can I correct the auto generated mapping?

Elastic Search and product nomenclature: hyphens and spaces

I'm having a hard time figuring out how to set up Elasticsearch for the typical product model nomenclature. For instance, a product called "Shure SM7B" should appear as a result when searching for SM7B, SM 7B, SM 7, SM-7... and vice versa: searching for SM7B should give results like SM-7, SM7...
For now, I'm getting this kind of results: if I search for "Roland D 50", I get Roland D 50, Roland D-50, Roland D-550, Roland D-20 and so on... but if I search for "Roland D50", I get only "Roland D50" results.
This is my current mapping/settings:
{
"products" : {
"mappings" : {
"Product" : {
"properties" : {
"article_reviews" : {
"type" : "integer"
},
"brand_id" : {
"type" : "integer"
},
"category" : {
"type" : "text"
},
"category_id" : {
"type" : "integer"
},
"date" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"description" : {
"type" : "text"
},
"has_image" : {
"type" : "integer"
},
"id" : {
"type" : "integer"
},
"last_review_date" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"min_price" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text"
},
"name_order" : {
"type" : "keyword"
},
"price_history" : {
"type" : "integer"
},
"rating" : {
"type" : "float"
},
"reviews" : {
"type" : "integer"
},
"shops" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"widget" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
Also, I'd need to autocomplete my searches, so for instance, "Shure SM" should show results like Shure SM-7, Shure SM7, Shure SM58, Shure SM 57, etc... narrowing them down as I type.
Any clues? Thank you!

update a nested object in Elasticsearch

i hope i can find some help
my problem is that i am triying to update a nasted object that i have.
here 's my index architecture
{
"grille_badge" : {
"mappings" : {
"properties" : {
"theme" : {
"type" : "nested",
"properties" : {
"categories" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"niveaux" : {
"type" : "nested",
"properties" : {
"grille_defis" : {
"type" : "nested",
"properties" : {
"defis" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
and here is exemple of my document that i wand to edit
{
"_index" : "grille_badge",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"_seq_no" : 0,
"_primary_term" : 1,
"found" : true,
"_source" : {
"theme" : [
{
"name" : "foot",
"categories" : [
{
"name" : "categorie1",
"niveaux" : [
{
"name" : "bronze",
"grille_defis" : [
{
"name" : "grille1",
"defis" : [ ]
}
]
}
]
},
{
"name" : "categorie2",
"niveaux" : [
{
"name" : "argent",
"grille_defis" : [
{
"name" : "grille1",
"defis" : [ ]
}
]
}
]
},
{
"name" : "categorie3",
"niveaux" : [
{
"name" : "or",
"grille_defis" : [
{
"name" : "grille1",
"defis" : [ ]
}
]
}
]
}
]
}
]
}
}
And i am triying to add a Defis to my empty list, like the code below that i found in the documentation
POST index_name/_update/doc_id
{
"script": {
"source": "ctx._source.cats.add(params.cat)",
"params": {
"cat": {
"colors": 4,
"name": "Leon",
"breed": "Persian"
}
}
}
}
i tried to make my own script that can go deep into my defis list but still don't know how to do it.
here's what i tried to do
POST grille_badge/_update/1
{
"script": {
"source": "def theme_target = ctx._source.theme.findAll(our_theme theme.name == params.current_theme); def categorie_target = theme_target.findAll(our_categorie categories.name == params.current_categorie); def niveau_target = categorie_target.findAll(our_niveau niveaux.name == params.current_niveau); def grille_target = niveau_target.findAll(our_grille grille_defis.name == params.current_grille); grille_target.defis.add(params.defis_ajouter)",
"params": {
"current_theme": "foot",
"current_categorie": "categorie1",
"current_niveau": "niveau1",
"current_grille": "grille1",
"defis_ajouter": "i am a defis"
}
}
}
please could anyone help me !!
thanks for advance
Welcome to SO. Your question is quite confusing but here's how you update a doc by a script where your params are an instance of HashMap:
Index the original document w/ an id of 1 -- collapsed for brevity:
POST grille_badge/_doc/1
{"theme":[{"name":"foot","categories":[{"name":"categorie1","niveaux":[{"name":"bronze","grille_defis":[{"name":"grille1","defis":[]}]}]},{"name":"categorie2","niveaux":[{"name":"argent","grille_defis":[{"name":"grille1","defis":[]}]}]},{"name":"categorie3","niveaux":[{"name":"or","grille_defis":[{"name":"grille1","defis":[]}]}]}]}]}
Update doc#1 (note that you can also use a wide-ranging _update_by_query too). Also note that you need to verify whether cats exist before add-ing a hashmap in there.
POST grille_badge/_update/1
{
"script": {
"source": "if (ctx._source.cats instanceof List) { ctx._source.cats.add(params.cat) } else { ctx._source.cats = [params.cat] }",
"params": {
"cat": {
"colors": 4,
"name": "Leon",
"breed": "Persian"
}
}
}
}
Verify that cats are there:
GET grille_badge/_search
EDIT & ELABORATION
Here's how you do it:
POST grille_badge/_update/1
{
"script": {
"source": """
def theme_target = ctx._source.theme.find(theme -> theme.name == params.current_theme);
def categorie_target = theme_target.categories.find(categories -> categories.name == params.current_categorie);
def niveau_target = categorie_target.niveaux.find(niveaux -> niveaux.name == params.current_niveau);
def grille_target = niveau_target.grille_defis.find(grille_defis -> grille_defis.name == params.current_grille);
grille_target['defis'].add(["name" : params['defis_ajouter']]);
""",
"params": {
"current_theme": "foot",
"current_categorie": "categorie1",
"current_niveau": "bronze",
"current_grille": "grille1",
"defis_ajouter": "i am a defis"
}
}
}
Note the proper arrow notation in the findAll methods.
Also note that your own mapping defined the innermost defis as a nested array of key-value pairs -- this means you cannot push a plain string using the .add method -- you need to use a HashMap. You instatiate one with the one liner ["your_key" : "your_value"]. Good luck.

query to find all docs that match with exact terms with all the fields in the query

I have a simple doc structure as follows.
{
"did" : "1",
"uid" : "user1",
"mid" : "pc-linux1",
"path" : "/tmp/path1"
}
I need to query elastic ,that matches all fields exactly
GET index2/_search
{
"query": {
"bool":{
"must": [
{
"term" : { "uid" : "user1"}
},
{
"term" : { "mid" : "pc-linux1"}
},
{
"term" : { "did" : "1"}
},
{
"term" : { "path" : "/tmp/path1"}
}
]
}
}
}
The matching should happen without any kind of elastic 'analysis' on keywords, so that "/tmp/path1" is matched as a full term.
I tried to use a custom mapping: with
"index" : false
which does not work.
PUT /index2?include_type_name=true
{
"mappings" : {
"_doc": {
"properties" : {
"did" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"mid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"uid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
}
}
}
}
}
I am using elastic7.0 and few posts suggesting a custom mapping with
"index" : "not_analysed"
does not get accepted as a valid mapping in elastic 7.0
Any suggestions?
If you want to match exact terms, try this query:
GET index2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"uid": "user1"
}
},
{
"match": {
"mid": "pc-linux1"
}
},
{
"match": {
"did": "1"
}
},
{
"match": {
"path": "/tmp/path1"
}
}
]
}
}
}

Is there a way to Search through Elastic Search to get all results that have an ID contained in an array of IDs?

Been trying to find a way to do this for a couple days now. I've looked through 'bool', 'constant_score', 'filtered' queries none of which seem to be able to come up with the result I want.
One that HAS come close is the 'ids' query (does exactly what I described in the title of this questions) the one problem is that the key that I'm trying to search is not the '_id' value of the Elastic search index. Instead it is 'posterId' in the index below:
"_index": "activity",
"_type": "activity",
"_id": "<unique string id>",
"_score": null,
"_source": {
...
misc keys
...
"posterId": "<QUERY BASED ON THIS VALUE>",
"time": 20171007173623
}
Query that returns based on the _id value:
ids : {
type : "activity",
values : ["<unique string id>", ...]
}
as seen here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
How I want my query to work:
posterId : {
type : "activity",
values : [<list of posterIds>]
}
Returning all indicies that have posterIds contained in "<list of posterIds>"
< Edit > I'm trying to do this in one query as apposed to looping through each member of my list of posterIds because I also need to sort based on the time key and be able to page the query.
So, does anyone know of a built in query that does this or a work around?
Side note: if you feel like you're about to downvote this please just comment why, I'm about to be banned and I've read through all the guidelines and I feel like I'm following them but my questions rarely perform well. :( It would be much appreciated
Edit:
{
"activity" : {
"aliases" : { },
"mappings" : {
"activity" : {
"properties" : {
"-Kvp7f3epvW_dXSONzKj" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"actionId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"actionType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"activityType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"attachedId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"attachedType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"cardType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"noteTitleDict" : {
"properties" : {
"noun" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"subject" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"verb" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"posterId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"segueType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"time" : {
"type" : "long"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1507678305995",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "<id>",
"version" : {
"created" : "5010199"
},
"provided_name" : "activity"
}
}
}
}
I think what you are looking for is a Terms Query
{
"query": {
"constant_score" : {
"filter" : {
"terms" : { "user" : ["kimchy", "elasticsearch"]}
}
}
}
}
This finds documents which contain the exact term Kimchy or elasticsearch in the index of the user field. You can read more about this here https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
In your case you need to replace
the user with posterId.keyword
Kimchy and elasticsearch with all your posterIds
Keep in mind that a terms query is case sensitive and the keyword field does not use a lowercase analyzer (which means it'll save/index the value in the same case it was received)

Resources