ElasticSearch Multi-Match in Nest - elasticsearch

I have this DSL query which works. It returns the result as expected.
GET /filedocuments/_search
{
"query": {
"multi_match": {
"query": "abc",
"fields": ["fileName", "metadata"]
}
}
}
But, when it runs at NEST library below, it returns no result. What have I missed out?
var response = await _elasticClient.SearchAsync<FileDocument>(s => s
.Query(q => q
.MultiMatch(c => c
.Fields(f => f.Field(p => p.FileName).Field(p => p.Metadata))
.Query("abc")
)
)
);
This is the mapping:
"fileName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
and
"metadata" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}

Solved after I convert it to .ToUpper()

Related

Elastic painless count unique occurrences

I'm using ELK stack version 7. What I need to do is to count the unique occurence of a value in my indexes.
My indexes are created by WSO2 Identity Server version 5.10 and they are so defined:
{
"login.wso2.node.ip-2021.03.11" : {
"aliases" : {
"alias_my_login" : { }
},
"mappings" : {
"dynamic" : "true",
"_meta" : { },
"_source" : {
"includes" : [ ],
"excludes" : [ ]
},
"dynamic_date_formats" : [
"strict_date_optional_time",
"yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
],
"dynamic_templates" : [ ],
"date_detection" : true,
"numeric_detection" : false,
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "strict_date_optional_time"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_IP" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"instance_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"java_class" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"level" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log_message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"rr" : {
"type" : "text"
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tenant_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"timestamp" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1615481578543",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "9o-UQnn-SKaj7LbhO8GYxQ",
"version" : {
"created" : "7070199"
},
"provided_name" : "login.wso2.node.ip-2021.03.11"
}
}
}
}
What I need to do is to check if in the message field I have a SAML2 Response XML and if so I need to access to one value of this XML and count the unique occurrences.
So far so good. The message field is multi mapping field. It is both text type and keyword type so I can use text type for full search and keyword type for aggregation, sorting and so on.
What I did is to write this painless script:
GET login.wso2.node.ip-2021.03.11/_search
{
"query": {
"bool": {
"filter": [
{
"script": {
"script": {
"source": "doc['message.keyword'].value.contains('SAML_MESSAGES_LOGFILE') && doc['message.keyword'].value.contains('TINIT-')"
}
}
}
]
}
},
"aggs": {
"distinct_cf_count": {
"scripted_metric": {
"params": {
"fieldName":"message"
},
"init_script": "state.list = []",
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 && doc[params.fieldName].value.indexOf('TINIT-') > -1 ){
def valore = doc[params.fieldName].value;
def startIdx = valore.indexOf('TINIT-')+'TINIT-'.length();
state.list.add(valore.substring(startIdx, 16));
}
""",
"combine_script": "return state.list;",
"reduce_script": """
Map uniqueValueMap = new HashMap();
int count = 0;
for(shardList in states) {
if(shardList != null) {
for(key in shardList) {
if(!uniqueValueMap.containsKey(key)) {
count +=1;
uniqueValueMap.put(key, key);
}
}
}
}
return count;
"""
}
}
}
}
But I can't use regex because they are disabled and I should restart my ELK cluster in order to enable them. So I tried the contains and indexOf but I'm not able in counting the unique occurrences of this field.
Do you have any suggestion?
Thank you
Angelo
EDIT MORE INFO
gave a look. This check alwaus return 0 so it's like if message.keyword is always missing
"map_script": """
//Controllo se c'è il campo message e se c'è fiscalnumber
//if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 ){
// def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
//if (matcher.find()) {
// state.list.add(matcher.group(1));
//}
**if( doc[params.fieldName].size()==0 ){**
** state.list.add(UUID.randomUUID().toString());**
** }**
//else{
// def valore = doc[params.fieldName].value;
// def cf = valore.splitOnToken('TINIT-')[1].substring(16);
// state.list.add(cf);
//}
""",
Do you have any suggestion? I'm really blocked here... at 1 step to the solution
Thank you

Mapping array of long values from hive to elastic search index

I have data in hive in following format
user_ids name city owner_ids
[1, 324, 456] some_name some_city [4567, 12345678]
I want to be able to search by user_ids = 324 as filter criteria or owner_ids = 12345678 and be able to get back above document as response. (Exact match on ids)
Currently I am using dynamic template for mapping which maps user_ids field to long and I am unable to get any results, what type should I force field mapping of user_ids and owner_ids to get this response?
Mapping configuration
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1
},
"mappings": {
"doc": {
"dynamic_templates": [
{
"strings_as_keywords": {
"match_mapping_type": "string",
"mapping": {
"type": "keyword"
}
}
}
]
}
}
}
Result mapping
{
"user_search" : {
"mappings" : {
"doc" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"city" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ds" : {
"type" : "date"
},
"user_ids" : {
"type" : "long"
},
"owner_ids" : {
"type" : "long"
}
}
}
}
}
}

query to find all docs that match with exact terms with all the fields in the query

I have a simple doc structure as follows.
{
"did" : "1",
"uid" : "user1",
"mid" : "pc-linux1",
"path" : "/tmp/path1"
}
I need to query elastic ,that matches all fields exactly
GET index2/_search
{
"query": {
"bool":{
"must": [
{
"term" : { "uid" : "user1"}
},
{
"term" : { "mid" : "pc-linux1"}
},
{
"term" : { "did" : "1"}
},
{
"term" : { "path" : "/tmp/path1"}
}
]
}
}
}
The matching should happen without any kind of elastic 'analysis' on keywords, so that "/tmp/path1" is matched as a full term.
I tried to use a custom mapping: with
"index" : false
which does not work.
PUT /index2?include_type_name=true
{
"mappings" : {
"_doc": {
"properties" : {
"did" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"mid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
},
"uid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"index" : false,
"ignore_above" : 256
}
}
}
}
}
}
}
I am using elastic7.0 and few posts suggesting a custom mapping with
"index" : "not_analysed"
does not get accepted as a valid mapping in elastic 7.0
Any suggestions?
If you want to match exact terms, try this query:
GET index2/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"uid": "user1"
}
},
{
"match": {
"mid": "pc-linux1"
}
},
{
"match": {
"did": "1"
}
},
{
"match": {
"path": "/tmp/path1"
}
}
]
}
}
}

Is there a way to Search through Elastic Search to get all results that have an ID contained in an array of IDs?

Been trying to find a way to do this for a couple days now. I've looked through 'bool', 'constant_score', 'filtered' queries none of which seem to be able to come up with the result I want.
One that HAS come close is the 'ids' query (does exactly what I described in the title of this questions) the one problem is that the key that I'm trying to search is not the '_id' value of the Elastic search index. Instead it is 'posterId' in the index below:
"_index": "activity",
"_type": "activity",
"_id": "<unique string id>",
"_score": null,
"_source": {
...
misc keys
...
"posterId": "<QUERY BASED ON THIS VALUE>",
"time": 20171007173623
}
Query that returns based on the _id value:
ids : {
type : "activity",
values : ["<unique string id>", ...]
}
as seen here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
How I want my query to work:
posterId : {
type : "activity",
values : [<list of posterIds>]
}
Returning all indicies that have posterIds contained in "<list of posterIds>"
< Edit > I'm trying to do this in one query as apposed to looping through each member of my list of posterIds because I also need to sort based on the time key and be able to page the query.
So, does anyone know of a built in query that does this or a work around?
Side note: if you feel like you're about to downvote this please just comment why, I'm about to be banned and I've read through all the guidelines and I feel like I'm following them but my questions rarely perform well. :( It would be much appreciated
Edit:
{
"activity" : {
"aliases" : { },
"mappings" : {
"activity" : {
"properties" : {
"-Kvp7f3epvW_dXSONzKj" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"actionId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"actionType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"activityType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"attachedId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"attachedType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"cardType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"noteTitleDict" : {
"properties" : {
"noun" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"subject" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"verb" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"posterId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"segueType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"time" : {
"type" : "long"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1507678305995",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "<id>",
"version" : {
"created" : "5010199"
},
"provided_name" : "activity"
}
}
}
}
I think what you are looking for is a Terms Query
{
"query": {
"constant_score" : {
"filter" : {
"terms" : { "user" : ["kimchy", "elasticsearch"]}
}
}
}
}
This finds documents which contain the exact term Kimchy or elasticsearch in the index of the user field. You can read more about this here https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
In your case you need to replace
the user with posterId.keyword
Kimchy and elasticsearch with all your posterIds
Keep in mind that a terms query is case sensitive and the keyword field does not use a lowercase analyzer (which means it'll save/index the value in the same case it was received)

elasticsearch: search within text

For example if name as P.Shanmukha Sharma and if user searches for Shanmukha will not be available for search result. its returning only for P.Shanmukha and Sharma, is there any way if i will search Shanmukha and it will return result?
"user" : {
"properties" : {
"city" : {
"type" : "string",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
},
"created" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"id" : {
"type" : "long"
},
"latitude" : {
"type" : "double"
},
"longitude" : {
"type" : "double"
},
"profile_image" : {
"type" : "string"
},
"state" : {
"type" : "string",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
},
"super_verification" : {
"type" : "string"
},
"type" : {
"type" : "string"
},
"username" : {
"type" : "string",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
}
}
}
username is defined as a search analyzer
and search query is
def EsSearch(self, index, page, size, searchTerm):
body = {
'query': {
'match': searchTerm
},
'sort': {
'created': {
'order': 'desc'
}
},
'filter': {
'term': {
'super_verification': 'verified'
}
}
}
res = self.conn.search(index=index, body=body)
output = []
for doc in res['hits']['hits']:
output.append(doc['_source'])
return output
so doing so much of research on ES i Got this solution with wildcard. Thanks EveryOne
{
"query": {
"wildcard": {
"username": {
"value": "*Shanmukha*"
}
}
}
}
Basically, 2 way in to do so,
By GET method and URL:
http://localhost:9200/your_index/your_type/_search?q=username:*Shanmukha*&pretty=true
By Fuzzy Query
as given by #krrish this one:

Resources