Generating data tables in elastic search - elasticsearch

I'm trying to make a data table which consists of some calculations
******************************************************
** Bidder * Request * CPM * Revenue * Response Time **
******************************************************
I've created an index which holds all the data, so my data is stored in following format:
{
"data": {
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 78,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "nits_media_bid_won",
"_type": "nits_media_data_collection",
"_id": "MIyt6m8BWa2IbVphmPUh",
"_score": 1,
"_source": {
"bidderCode": "appnexus",
"width": 300,
"height": 600,
"statusMessage": "Bid available",
"adId": "43d59b34fd61b5",
"requestId": "2c6d19dcc536c3",
"mediaType": "banner",
"source": "client",
"cpm": 0.5,
"creativeId": 98493581,
"currency": "USD",
"netRevenue": true,
"ttl": 300,
"adUnitCode": "/19968336/header-bid-tag-0",
"appnexus": {
"buyerMemberId": 9325
},
"meta": {
"advertiserId": 2529885
},
"originalCpm": 0.5,
"originalCurrency": "USD",
"auctionId": "a628c0c0-bd4d-4f2a-9011-82fab780910e",
"responseTimestamp": 1580190231422,
"requestTimestamp": 1580190231022,
"bidder": "appnexus",
"timeToRespond": 400,
"pbLg": "0.50",
"pbMg": "0.50",
"pbHg": "0.50",
"pbAg": "0.50",
"pbDg": "0.50",
"pbCg": null,
"size": "300x600",
"adserverTargeting": {
"hb_bidder": "appnexus",
"hb_adid": "43d59b34fd61b5",
"hb_pb": "0.50",
"hb_size": "300x600",
"hb_source": "client",
"hb_format": "banner"
},
"status": "rendered",
"params": [
{
"placementId": 13144370
}
],
"nits_account": "asjdfagsd2384vasgd19",
"nits_url": "http://nitsmedia.local/run-ad",
"session_id": "YTGpETKSk2nHwLRB6GbP",
"timestamp": "2020-01-28T05:43:51.702Z",
"geo_data": {
"continent": "North America",
"address_format": "{{recipient}}\n{{street}}\n{{city}} {{region_short}} {{postalcode}}\n{{country}}",
"alpha2": "US",
"alpha3": "USA",
"country_code": "1",
"international_prefix": "011",
"ioc": "USA",
"gec": "US",
"name": "United States of America",
"national_destination_code_lengths": [
3
],
"national_number_lengths": [
10
],
"national_prefix": "1",
"number": "840",
"region": "Americas",
"subregion": "Northern America",
"world_region": "AMER",
"un_locode": "US",
"nationality": "American",
"postal_code": true,
"unofficial_names": [
"United States",
"Vereinigte Staaten von Amerika",
"États-Unis",
"Estados Unidos",
"アメリカ合衆国",
"Verenigde Staten"
],
"languages_official": [
"en"
],
"languages_spoken": [
"en"
],
"geo": {
"latitude": 37.09024000000000143018041853792965412139892578125,
"latitude_dec": "39.44325637817383",
"longitude": -95.7128909999999990532160154543817043304443359375,
"longitude_dec": "-98.95733642578125",
"max_latitude": 71.5388001000000031126546673476696014404296875,
"max_longitude": -66.8854170000000038953658076934516429901123046875,
"min_latitude": 18.77629999999999910187398199923336505889892578125,
"min_longitude": 170.595699999999993679011822678148746490478515625,
"bounds": {
"northeast": {
"lat": 71.5388001000000031126546673476696014404296875,
"lng": -66.8854170000000038953658076934516429901123046875
},
"southwest": {
"lat": 18.77629999999999910187398199923336505889892578125,
"lng": 170.595699999999993679011822678148746490478515625
}
}
},
"currency_code": "USD",
"start_of_week": "sunday"
}
}
},
//Remaining data set....
]
},
}
}
So as per my data set I want to fetch all unique bidderCode (which will be represented as Bidder in the table) and make the data with calculation respective to it. For example
Request - This will be total number of docs count in aggregation
CPM - CPM will be sum of all CPM divided by 1000
Revenue - Total CPM multiplied by 1000
Response time - Average of (responseTimestamp - requestTimestamp)
How can I achieve this, I'm bit confused with it. I tried building the blocks by:
return $this->elasticsearch->search([
'index' => 'nits_media_bid_won',
'body' => [
'query' => $query,
'aggs' => [
'unique_bidders' => [
'terms' => ['field' => 'bidderCode.keyword']
],
'aggs' => [
'sum' => [
'cpm' => [
'field' => 'cpm',
'script' => '_value / 1000'
]
]
],
]
]
]);
But it is showing me error:
{
"error":{
"root_cause":[
{
"type":"x_content_parse_exception",
"reason":"[1:112] [sum] unknown field [cpm], parser not found"
}
],
"type":"x_content_parse_exception",
"reason":"[1:112] [sum] unknown field [cpm], parser not found"
},
"status":400
}
I'm new to this help me out in it. Thanks.

ElasticSearch isn't wrong -- you've swapped the aggregation name with its type. It cannot parse the agg type cpm.
Here's the corrected query:
GET nits_media_bid_won/_search
{
"size": 0,
"aggs": {
"unique_bidders": {
"terms": {
"field": "bidderCode.keyword",
"size": 10
},
"aggs": {
"cpm": { <----------
"sum": { <----------
"field": "cpm",
"script": "_value / 1000"
}
}
}
}
}
}

Related

Logstash doesn't filter out JSON from the Twitter API

I want to remove unnecessary fields. There are many of them. I'm using JSON filter plugin for Logstash but it doesn't work properly. It doesn't want to filter the data or just doesn't send it to the output.
I've tried to use the mutate field but without success.
I want to remove for example the entities field which is a top-level field but none of my configs are working. I also want to remove some nested fields...
Here's my example JSON from the Twitter API:
{
"retweet_count": 0,
"created_at": "Mon Dec 14 18:43:09 +0000 2020",
"place": null,
"in_reply_to_user_id_str": null,
"lang": "pl",
"filter_level": "low",
"possibly_sensitive": false,
"id": 1338555139993591800,
"id_str": "1338555139993591814",
"quote_count": 0,
"is_quote_status": false,
"geo": null,
"entities": {
"symbols": [],
"user_mentions": [],
"urls": [
{
"indices": [
117,
140
],
"url": "xxx",
"expanded_url": "xxx"
}
],
"hashtags": [
{
"text": "koronawirus",
"indices": [
84,
96
]
},
{
"text": "COVID19",
"indices": [
97,
105
]
},
{
"text": "Lockdown",
"indices": [
106,
115
]
}
]
},
"timestamp_ms": "1607971389183",
"reply_count": 0,
"retweeted": false,
"text": "W Wielkiej Brytanii wykryto nowy wariant koronawirusa. Kolejne kraje z lockdownem­čĹç\n\n#koronawirus #COVID19 #Lockdown\n\nxxx",
"contributors": null,
"truncated": false,
"in_reply_to_user_id": null,
"source": "Twitter Web App",
"#timestamp": "2020-12-14T18:43:09.000Z",
"in_reply_to_screen_name": null,
"favorited": false,
"in_reply_to_status_id": null,
"user": {
"created_at": "Tue May 12 09:11:01 +0000 2009",
"profile_use_background_image": false,
"lang": null,
"contributors_enabled": false,
"profile_text_color": "000000",
"id": 39464882,
"id_str": "39464882",
"following": null,
"geo_enabled": false,
"profile_sidebar_fill_color": "000000",
"is_translator": false,
"protected": false,
"profile_image_url": "xxx",
"profile_link_color": "3B94D9",
"name": "Salon24.pl",
"profile_sidebar_border_color": "000000",
"favourites_count": 309,
"profile_background_image_url": "xxx",
"followers_count": 17473,
"description": null,
"location": "Polska",
"url": "xxx",
"profile_background_color": "000000",
"utc_offset": null,
"profile_background_image_url_https": "xxx",
"default_profile": false,
"follow_request_sent": null,
"verified": false,
"translator_type": "none",
"friends_count": 1028,
"time_zone": null,
"default_profile_image": false,
"screen_name": "Salon24pl",
"profile_image_url_https": "xxx",
"statuses_count": 48490,
"notifications": null,
"listed_count": 203,
"profile_background_tile": false
},
"in_reply_to_status_id_str": null,
"favorite_count": 0,
"#version": "1",
"coordinates": null
}
And here's my actual config:
input {
twitter {
id => "logstash_to_kafka_plugin"
consumer_key => "xxx"
consumer_secret => "xxx"
oauth_token => "xxx"
oauth_token_secret => "xxx"
keywords => [ "koronawirus" ]
full_tweet => true
ignore_retweets => true
}
}
filter {
json {
source => "message"
remove_field => [ "[message][entities]"]
}
}
output {
kafka {
codec => json
topic_id => "twitter_tweets"
}
}
I've tried different ways to indicate that field like:
remove_field => [ "entities" ] or
remove_field => [ "[entities]" ]
but that didn't work either.
Try adding a mutate filter with remove_field after the json filter block so the new mutate filter executes after the fields have been cretaed to the root via the json filter.
Your filter could look something like
filter {
json {
source => "message"
}
mutate {
remove_field => ["entities", "user.created_at"] // works for nested field as well
}
}

Validate 2 depth array on Laravel

Is there a way to validate this array(), Nothing works so far:
[
{
"transaction": {
"user_id": 6,
"month": 12,
"year": 2084
},
"entities": [
{
"name": "Allan Botsford",
"value": 3,
"is_total": false,
"type": "CASH"
},
{
"name": "Luisa Schiller Sr.",
"value": 6266,
"is_total": false,
"type": "CASH"
},
{
"name": "Susie Deckow MD",
"value": 506700,
"is_total": false,
"type": "CASH"
}
]
},
{
"transaction": {
"user_id": 7,
"month": 5,
"year": 2002
},
"entities": [
{
"name": "Raquel Jast",
"value": 7,
"is_total": false,
"type": "CASH"
},
{
"name": "Wendell Herman I",
"value": 4480,
"is_total": false,
"type": "CASH"
},
{
"name": "Oceane Greenfelder DDS",
"value": 46344,
"is_total": false,
"type": "CASH"
}
]
}
]
I can validate the transaction with the following rules:
[
'*.transaction.month' => 'required|numeric',
'*.transaction.year' => 'required|numeric',
'*.transaction.transaction_date' => 'sometimes|date_format:Y-m-d'
]
The problem is in the nested entities array because the following rules are ignored:
return [
'*.entities.*.is_total' => 'required|boolean',
'*.entities.*.name' => 'required|string',
'*.entities.*.value' => 'required|numeric',
'*.entities.*.type' => ['required', Rule::in(CashTemporaryInvestment::TYPES)]
]
I don't find any hint on laravel documentation. I will appreciate any help. I am using Laravel 7
Laravel Validator wildcard will work using the following preg_match() as seen in the Validator class
$pattern = str_replace('\*', '([^\.]+)', preg_quote($this->getPrimaryAttribute($attribute), '/'));
So your pattern will work if you can access the attributes using
[0]['entities'][0]['is_total'] for your rule '*.entities.*.is_total' => 'required|boolean',

How to save debug json to database in laravel?

I make a rest API with auth with api_token, and I use debugger by Lanin to show when the error happened.
If api_token is wrong the debug will show something like below
{
"error": "Unauthenticated.",
"debug": {
"database": {
"total": 1,
"items": [
{
"connection": "mysql",
"query": "select * from `users` where `api_token` = '630dcedd96aa370891b1649a972b577b2f262c90093b1903ae0a2ea63cf2c07cs' limit 1;",
"time": 2.89
}
]
},
"cache": {
"hit": {
"keys": [
"424f74a6a7ed4d4ed4761507ebcd209a6ef0937b"
],
"total": 1
},
"miss": {
"keys": [
"424f74a6a7ed4d4ed4761507ebcd209a6ef0937b",
"424f74a6a7ed4d4ed4761507ebcd209a6ef0937b:timer",
"424f74a6a7ed4d4ed4761507ebcd209a6ef0937b"
],
"total": 3
},
"write": {
"keys": [
"424f74a6a7ed4d4ed4761507ebcd209a6ef0937b:timer",
"424f74a6a7ed4d4ed4761507ebcd209a6ef0937b"
],
"total": 2
},
"forget": {
"keys": [],
"total": 0
}
},
"profiling": [
{
"event": "request-time",
"time": 0.2898838520050049
}
],
"memory": {
"usage": 11831808,
"peak": 11896088
}
}
}
and I want to save error log to the database. How to save and read data if I want to save this to the table database? I want to save the query
"debug": {
"database": {
"total": 1,
"items": [
{
"connection": "mysql",
**"query": "select * from `users` where `api_token` = '630dcedd96aa370891b1649a972b577b2f262c90093b1903ae0a2ea63cf2c07cs' limit 1;",
"time": 2.89**
}
]
},
With an appropriate model (Debug with "connection", "query" and "time" field), you could decode the json into an array and create the model from that.
Something like this :
$jsonArray = json_decode($rawJson, true);
$debug = Debug:create($jsonArray);

How Group By Queries in Elasticsearch?

I am working on a project where I would like to query the wikipedia database so I could bring some results ... I am using a Javascript application and Jquery with a self suggestion field as the user types a list of words should appear ....
For this to happen I need to create a query in elasticsearch by bringing all categories and only them into the results.
Type a SQL = SELECT Field FROM Table Group By Field ..
How would I do this?
My Test is basic and return all objects....
GET _search
{
"query": {
"query_string": {
"fields": ["category"],
"query": "*"
}
}
}
Part of result is:
{
"took": 34,
"timed_out": false,
"_shards": {
"total": 13,
"successful": 13,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2117924,
"max_score": 1,
"hits": [
{
"_index": "ptwikionary_content",
"_type": "page",
"_id": "41115",
"_score": 1,
"_source": {
"redirect": [],
"template": [
"Predefinição:-pt-",
"Predefinição:cabeçalho-idioma",
"Predefinição:nome categoria",
"Predefinição:pt",
"Predefinição:nome língua",
"Predefinição:flex.pt",
"Predefinição:link preto",
"Predefinição:paroxítona",
"Predefinição:gramática",
"Predefinição:gramática/core",
"Predefinição:etimologia",
"Predefinição:escopo",
"MediaWiki:Log"
],
"content_model": "wikitext",
"heading": [
"Português",
"Adjetivo",
"Antônimo",
"Etimologia"
],
"source_text": """
={{-pt-}}=
==Adjetivo==
{{flex.pt|s=desumilde|p=desumildes}}
{{paroxítona|de|su|mil|de}}, {{gramática|c2g}}
# que não é [[humilde]]
===Antônimo===
* [[humilde]]
=={{etimologia|pt}}==
: {{escopo|Morfologia}} [[des-]] + [[humilde]].
[[Categoria:Adjetivo (Português)]]
""",
"version_type": "external",
"wiki": "ptwiktionary",
"auxiliary_text": [
" Singular Plural Masculino desumilde desumildes Feminino"
],
"language": "pt",
"title": "desumilde",
"version": 2491983,
"external_link": [],
"namespace_text": "",
"namespace": 0,
"text_bytes": 274,
"incoming_links": 2,
"text": "de.su.mil.de, comum aos dois géneros que não é humilde humilde (Morfologia) des- + humilde.",
"category": [
"!Entrada (Português)",
"Polissílabo (Português)",
"Paroxítona (Português)",
"Entrada com etimologia (Português)",
"Adjetivo (Português)"
],
"defaultsort": false,
"outgoing_link": [
"desumilde",
"desumildes",
"des-",
"feminino",
"humilde",
"masculino",
"plural",
"português",
"singular",
"Categoria:Português"
],
"timestamp": "2018-03-29T21:53:29Z",
"popularity_score": 0.000065252908710355
}
},
{
How i Get category list from elasticsearch??? Type Sql :
Use aggregation for this. Here's an example:
GET _search
{
"aggs" : {
"categories" : {
"terms" : { "field" : "category" }
}
}
}

Update single value in sub sub array in RethinkDB

We are trying to update a single answer in our sub sub array.
However our query is causing the following error:
{
"deleted": 0 ,
"errors": 1 ,
"first_error": "Inserted value must be an OBJECT (got ARRAY):
[
{
"answers": [
{
"answer": "wassup",
"owner": 12201836
}
],
"question": "Vraag 1?",
"questionId": 0,
"time": "10"
},
{
"answers": [],
"question": "Vraag 2?",
"questionId": 1,
"time": "15"
},
{
"answers": [],
"question": "Vraga 3?",
"questionId": 2,
"time": "20"
}
]" ,
"inserted": 0 ,
"replaced": 0 ,
"skipped": 0 ,
"unchanged": 0
}
Our table structure looks like the following:
Youtube
- Id
- Course
- Unit
- Session
- Number
- Group
- Questions (array)
- Question Id
- Time
- Answers (array)
- Id
- Answer
- Owner
Our query:
r.db('GitSmurf')
.table('youtube')
.update(function (row) {
return row('questions').merge(function (q) {
return r.branch(q('questionId').eq(0), { "answers": q('answers').merge(function(answer) {
return r.branch(answer('owner').eq(12201836), {"answer": "wassup"}, {})} )},{})
})
})
Test content:
{
"completed": [ ],
"course": "swd" ,
"group": "dwa-group-b" ,
"id": "44443377-ed15-4358-a005-f561e7b6a42d" ,
"number": 1 ,
"session": 1 ,
"unit": 1,
"questions": [
{
"answers": [
{
"answer": "hallo" ,
"owner": 12201836
}
] ,
"question": "Vraag 1?" ,
"questionId": 0 ,
"time": "10"
} ,
{
"answers": [ ],
"question": "Vraag 2?" ,
"questionId": 1 ,
"time": "15"
} ,
{
"answers": [ ],
"question": "Vraga 3?" ,
"questionId": 2 ,
"time": "20"
}
] ,
}
Any help is greatly appreciated!
We forgot to return a new object in the update query.
When we added that it worked.
r.db('GitSmurf')
.table('youtube')
.update(function (row) {
return { questions: row('questions').merge(function (q) {
return r.branch(q('questionId'), { "answers": q('answers').merge(function(answer) {
return r.branch(answer('owner').eq(12201836), {"answer": "tom"}, {})
})},{})
})}
})

Resources