Elastic search query to find Total Login User count - elasticsearch

Total Distinct Login User Cumulative count - Column ( User ID)
Total Accounts searched User Cumulative count (Account ID)
I Tried Below queries but not getting the Count
GET /Logstach/_Search_Tracking/_count?q=user:userId
GET /Logstach/_Search_Tracking/_count?q=user:AccountId
Below output got
{
"count" : 0,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 0,
"failed" : 0
}
}
Below is the Index Mapping details
"_index" : "ABC",
"_type" : "_doc",
"_id" : "08c28b9c-dd07-47c0-8243-3afc4fe89c08",
"_score" : 1.0,
"_source" : {
"user" : {
"userId" : "A123",
"userCategory" : "Outside",
"accountId" : "ABC58"
},

You need to query like this: ...?q=user.userId:A123 and ...?q=user.accountId:ABC58 because both fields are located within the user object
GET /Logstach/_Search_Tracking/_count?q=user.userId:A123
^
|
add this
GET /Logstach/_Search_Tracking/_count?q=user.accountId:ABC58
^
|
add this
Now if you want the distinct number of users that have logged in, yu need to use the cardinality aggregation
GET /Logstach/_Search_Tracking/_search
{
"size": 0,
"aggs": {
"distinct_loggedin": {
"cardinality": {
"field": "user.userId"
}
}
}
}
Same for the total number of accounts searched:
GET /Logstach/_Search_Tracking/_search
{
"size": 0,
"aggs": {
"total_accounts": {
"cardinality": {
"field": "user.accountId"
}
}
}
}

Related

Elasticsearch: how to find a document by number in logs

I have an error in kibana
"The length [2658823] of field [message] in doc[235892]/index[mylog-2023.02.10] exceeds the [index.highlight.max_analyzed_offset] limit [1000000]. To avoid this error, set the query parameter [max_analyzed_offset] to a value less than index setting [1000000] and this will tolerate long field values by truncating them."
I know how to deal with it (change "index.highlight.max_analyzed_offset" for an index, or set the query parameter), but I want to find the document with long field and examine it.
If i try to find it by id, i get this:
q:
GET mylog-2023.02.10/_search
{
"query": {
"terms": {
"_id": [ "235892" ]
}
}
}
a:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
q:
GET mylog-2023.02.10/_doc/235892
a:
{ "_index" : "mylog-2023.02.10", "_type" : "_doc", "_id" :
"235892", "found" : false }
Maybe this number (doc[235892]) is not id? How can i find this document?
try use Query IDs:
GET /_search
{
"query": {
"ids" : {
"values" : ["1", "4", "100"]
}
}
}

QuickSight or Elasticsearch - Column wise aggregration

Is this possible to do in QuickSight or Elasticsearch? I have tried calculated fields in QuickSight and runtime scripts in Elasticsearch, not sure how to do it? Also, is what I'm not what I'm expecting is even possible in this tool.
Trying out a simple date difference between columns based on their action, here... "Time taken for 'creating a post' after a user registered"
Data Input:
Data output
It is possible using scripted metric aggregation
Data
"hits" : [
{
"_index" : "index121",
"_type" : "_doc",
"_id" : "aqJ3HnoBF6_U07qsNY-s",
"_score" : 1.0,
"_source" : {
"user" : "Jen",
"activity" : "Logged In",
"activity_Time" : "2020-01-08"
}
},
{
"_index" : "index121",
"_type" : "_doc",
"_id" : "a6J3HnoBF6_U07qsXY_8",
"_score" : 1.0,
"_source" : {
"user" : "Jen",
"activity" : "Created a post",
"activity_Time" : "2020-05-08"
}
},
{
"_index" : "index121",
"_type" : "_doc",
"_id" : "bKJ3HnoBF6_U07qsk4-0",
"_score" : 1.0,
"_source" : {
"user" : "Mark",
"activity" : "Logged In",
"activity_Time" : "2020-01-03"
}
},
{
"_index" : "index121",
"_type" : "_doc",
"_id" : "baJ3HnoBF6_U07qsu48g",
"_score" : 1.0,
"_source" : {
"user" : "Mark",
"activity" : "Created a post",
"activity_Time" : "2020-01-08"
}
}
]
Query
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "user.keyword",
"size": 10000
},
"aggs": {
"distinct_sum_feedback": {
"scripted_metric": {
"init_script": "state.docs = []",
"map_script": """ Map span = [
'timestamp':doc['activity_Time'],
'activity':doc['activity.keyword'].value
];
state.docs.add(span)
""",
"combine_script": "return state.docs;",
"reduce_script": """
def all_docs = [];
for (s in states)
{
for (span in s) {
all_docs.add(span);
}
}
all_docs.sort((HashMap o1, HashMap o2)->o1['timestamp'].getValue().toInstant().toEpochMilli().compareTo(o2['timestamp'].getValue().toInstant().toEpochMilli()));
Hashtable result= new Hashtable();
boolean found = false;
JodaCompatibleZonedDateTime loggedIn;
for (s in all_docs)
{
if(s.activity =='Logged In')
{
loggedIn=s.timestamp.getValue();
found= true;
}
if(s.activity =='Created a post' && found==true)
{
found=false;
def dt=loggedIn.getYear()+ '-' + loggedIn.getMonth() + '-' + loggedIn.getDayOfMonth();
def diff= s.timestamp.getValue().toInstant().toEpochMilli() - loggedIn.toInstant().toEpochMilli();
if(result.get(dt) == null)
{
result.put(dt, diff / 1000 / 60 / 60 / 24 )
}
}
}
return result;
"""
}
}
}
}
}
}
Result
"user" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Jen",
"doc_count" : 2,
"distinct_sum_feedback" : {
"value" : {
"2020-JANUARY-8" : 121
}
}
},
{
"key" : "Mark",
"doc_count" : 2,
"distinct_sum_feedback" : {
"value" : {
"2020-JANUARY-3" : 5
}
}
}
]
}
Explanation
"init_script":
Executed prior to any collection of documents. Allows the aggregation
to set up any initial state.
Have declared a Map"
"map_script"
Executed once per document collected
Loop through all document and add activity and timestamp to map
combine_script
Executed once on each shard after document collection is complete
Return collection of Map for all shards
reduce_script
Executed once on the coordinating node after all shards have returned their results
Once again go through through all Map and create a single collection and sort on timestamp. Then go through sorted Map and insert logged in and next "created post" time (diff of logged in and post created time)

SQL aggregation query corresponding in elasticsearch

I studied elasticsearch aggregation queries but couldn't find if it supports multiple aggregate function. In an other word, I wanna know if elasticsearch can generate the equivalent of this Sql aggregation query:
SELECT account_no, transaction_type, count(account_no), sum(amount), max(amount) FROM index_name GROUP BY account_no, transaction_type Having count(account_no) > 10
If yes, how?
Thank you.
There are two possible ways to do what you are looking for in ES and I've mentioned them both below.
I've also added sample mapping and sample documents for your reference.
Mapping:
PUT index_name
{
"mappings": {
"mydocs":{
"properties":{
"account_no":{
"type": "keyword"
},
"transaction_type":{
"type": "keyword"
},
"amount":{
"type":"double"
}
}
}
}
}
Sample Documents:
Notice carefully, I'm only creating list of 4 transactions for 1 customer.
POST index_name/mydocs/1
{
"account_no": "1011",
"transaction_type":"credit",
"amount": 200
}
POST index_name/mydocs/2
{
"account_no": "1011",
"transaction_type":"credit",
"amount": 400
}
POST index_name/mydocs/3
{
"account_no": "1011",
"transaction_type":"cheque",
"amount": 100
}
POST index_name/mydocs/4
{
"account_no": "1011",
"transaction_type":"cheque",
"amount": 100
}
There are two ways to get what you are looking for:
Solution 1: Using Elasticsearch Query DSL
Aggregation Query:
For Aggregation Query DSL, I've made use of the below aggregation queries to solve what you are looking for.
Terms Aggregation
Sum Aggregation Query (Metric Aggregation)
Max Aggregation Query (Metric Aggregation)
Below is how query is summarised version of the query so that you get the clarity on which queries are sibling and which are parents.
- Terms Aggregation (For Every Account)
- Terms Aggregation (For Every Transaction_type)
- Sum Amount
- Max Amount
Below is the actual query:
POST index_name/_search
{
"size": 0,
"aggs": {
"account_no_agg": {
"terms": {
"field": "account_no"
},
"aggs": {
"transaction_type_agg": {
"terms": {
"field": "transaction_type",
"min_doc_count": 2
},
"aggs": {
"sum_amount": {
"sum": {
"field": "amount"
}
},
"max_amount":{
"max": {
"field": "amount"
}
}
}
}
}
}
}
}
Important thing to mention is min_doc_count which is nothing but the having count(account_no)>10, which in my query I'm filtering only those transactions with having count(account_no) > 2
Query Response
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"account_no_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1011", <---- account_no
"doc_count" : 4, <---- count(account_no)
"transaction_type_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "cheque", <---- transaction_type
"doc_count" : 2,
"sum_amount" : { <---- sum(amount)
"value" : 200.0
},
"max_amount" : { <---- max(amount)
"value" : 100.0
}
},
{
"key" : "credit", <---- another transaction_type
"doc_count" : 2,
"sum_amount" : { <---- sum(amount)
"value" : 600.0
},
"max_amount" : { <---- max(amount)
"value" : 400.0
}
}
]
}
}
]
}
}
}
Notice the above result carefully, I've added comments wherever required so that it helps what part of sql query you are looking for.
Solution 2: Using Elasticsearch SQL(_xpack solution)
If you are making use of xpack feature of Elasticsearch's SQL Access, you can simply copy paste the SELECT Query as below for the mapping and document as mentioned above:
Elasticsearch SQL:
POST /_xpack/sql?format=txt
{
"query": "SELECT account_no, transaction_type, sum(amount), max(amount), count(account_no) FROM index_name GROUP BY account_no, transaction_type HAVING count(account_no) > 1"
}
Elasticsearch SQL Result:
account_no |transaction_type| SUM(amount) | MAX(amount) |COUNT(account_no)
---------------+----------------+---------------+---------------+-----------------
1011 |cheque |200.0 |100.0 |2
1011 |credit |600.0 |400.0 |2
Note that I've tested the query in ES 6.5.4.
Hope this helps!

Elasticsearch range with aggs

i want average rating of every user document but is not working according to me.please check the code given below.
curl -XGET 'localhost:9200/mentorz/users/_search?pretty' -H 'Content-Type: application/json' -d'
{"aggs" : {"avg_rating" : {"range" : {"field" : "rating","ranges" : [{ "from" : 3, "to" : 19 }]}}}}';
{ "_index" : "mentorz", "_type" : "users", "_id" : "555", "_source" : { "name" : "neeru", "user_id" : 555,"email_id" : "abc#gmail.com","followers" : 0,
"following" : 0, "mentors" : 0, "mentees" : 0, "basic_info" : "api test info",
"birth_date" : 1448451985397,"charge_price" : 0,"org" : "cz","located_in" : "noida", "position" : "sw developer", "exp" : 7, "video_bio_lres" : "test bio lres url normal signup","video_bio_hres" : "test bio hres url normal signup", "rating" : [ 5 ,4], "expertises" : [ 1, 4, 61, 62, 63 ] }
this is my user document,i want to filter only those users who have average rating range from 3 to 5.
Update Answer
I've made a query using script, hope the below query works for you.
GET mentorz/users/_search
{
"size": 0,
"aggs": {
"term": {
"terms": {
"field": "user.keyword",
"size": 100
},
"aggs": {
"NAME": {
"terms": {
"field": "rating",
"size": 10,
"script": {
"inline": "float var=0;float count=0;for(int i = 0; i < params['_source']['rating'].size();i++){var=var+params['_source']['rating'][i];count++;} float avg = var/count; if(avg>=4 && avg<=5) {avg}else{null}"
}
}
}
}
}
}
}
You can change the range of your desired rating range by changing the if condition "if(avg>=4 && avg<=5)".

Show all Elasticsearch aggregation results/buckets and not just 10

I'm trying to list all buckets on an aggregation, but it seems to be showing only the first 10.
My search:
curl -XPOST "http://localhost:9200/imoveis/_search?pretty=1" -d'
{
"size": 0,
"aggregations": {
"bairro_count": {
"terms": {
"field": "bairro.raw"
}
}
}
}'
Returns:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 16920,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"bairro_count" : {
"buckets" : [ {
"key" : "Barra da Tijuca",
"doc_count" : 5812
}, {
"key" : "Centro",
"doc_count" : 1757
}, {
"key" : "Recreio dos Bandeirantes",
"doc_count" : 1027
}, {
"key" : "Ipanema",
"doc_count" : 927
}, {
"key" : "Copacabana",
"doc_count" : 842
}, {
"key" : "Leblon",
"doc_count" : 833
}, {
"key" : "Botafogo",
"doc_count" : 594
}, {
"key" : "Campo Grande",
"doc_count" : 456
}, {
"key" : "Tijuca",
"doc_count" : 361
}, {
"key" : "Flamengo",
"doc_count" : 328
} ]
}
}
}
I have much more than 10 keys for this aggregation. In this example I'd have 145 keys, and I want the count for each of them. Is there some pagination on buckets? Can I get all of them?
I'm using Elasticsearch 1.1.0
The size param should be a param for the terms query example:
curl -XPOST "http://localhost:9200/imoveis/_search?pretty=1" -d'
{
"size": 0,
"aggregations": {
"bairro_count": {
"terms": {
"field": "bairro.raw",
"size": 10000
}
}
}
}'
Use size: 0 for ES version 2 and prior.
Setting size:0 is deprecated in 2.x onwards, due to memory issues inflicted on your cluster with high-cardinality field values. You can read more about it in the github issue here .
It is recommended to explicitly set reasonable value for size a number between 1 to 2147483647.
How to show all buckets?
{
"size": 0,
"aggs": {
"aggregation_name": {
"terms": {
"field": "your_field",
"size": 10000
}
}
}
}
Note
"size":10000 Get at most 10000 buckets. Default is 10.
"size":0 In result, "hits" contains 10 documents by default. We don't need them.
By default, the buckets are ordered by the doc_count in decreasing order.
Why do I get Fielddata is disabled on text fields by default error?
Because fielddata is disabled on text fields by default. If you have not wxplicitly chosen a field type mapping, it has the default dynamic mappings for string fields.
So, instead of writing "field": "your_field" you need to have "field": "your_field.keyword".
If you want to get all unique values without setting a magic number (size: 10000), then use COMPOSITE AGGREGATION (ES 6.5+).
From official documentation:
"If you want to retrieve all terms or all combinations of terms in a nested terms aggregation you should use the COMPOSITE AGGREGATION which allows to paginate over all possible terms rather than setting a size greater than the cardinality of the field in the terms aggregation. The terms aggregation is meant to return the top terms and does not allow pagination."
Implementation example in JavaScript:
const ITEMS_PER_PAGE = 1000;
const body = {
"size": 0, // Returning only aggregation results: https://www.elastic.co/guide/en/elasticsearch/reference/current/returning-only-agg-results.html
"aggs" : {
"langs": {
"composite" : {
"size": ITEMS_PER_PAGE,
"sources" : [
{ "language": { "terms" : { "field": "language" } } }
]
}
}
}
};
const uniqueLanguages = [];
while (true) {
const result = await es.search(body);
const currentUniqueLangs = result.aggregations.langs.buckets.map(bucket => bucket.key);
uniqueLanguages.push(...currentUniqueLangs);
const after = result.aggregations.langs.after_key;
if (after) {
// continue paginating unique items
body.aggs.langs.composite.after = after;
} else {
break;
}
}
console.log(uniqueLanguages);
Increase the size(2nd size) to 10000 in your term aggregations and you will get the bucket of size 10000. By default it is set to 10.
Also if you want to see the search results just make the 1st size to 1, you can see 1 document, since ES does support both searching and aggregation.
curl -XPOST "http://localhost:9200/imoveis/_search?pretty=1" -d'
{
"size": 1,
"aggregations": {
"bairro_count": {
"terms": {
"field": "bairro.raw",
"size": 10000
}
}
}
}'

Resources