I've searched for affirmative docs to no avail.
Is it possible to work with sincedb's inode number in logstash.conf?
something like:
input { ... file ... }
filter { ... blah blah ... }
mutate {
add_field => { "sincedb_inode_number" => $SINCEDB_INODE-NUMBER_FOR_CURRENT_MATCH }
}
output {
stdout { codec => rubydebug }
gelf {
id => "graylog"
host => "graylog"
protocol => "TCP"
port => 12201
}
}
I need some help in creating an AggregationDictionary from the following elasticsearch query
GET organisations/_search
{
"size": 0,
"aggs": {
"by_country": {
"nested": {
"path": "country"
},
"aggs": {
"by_country2": {
"filter": {
"bool": {
"must": [
{
"term": {
"country.isDisplayed": "true"
}
}
]
}
},
"aggs": {
"by_country3": {
"terms": {
"field": "country.displayName.keyword",
"size": 9999
}
}
}
}
}
}
}
}
I managed to write this horrible piece of code which I am pretty sure it is wrong, I am totally new to this.
AggregationDictionary aggs = new AggregationDictionary()
{
{
"countries_step1",
new NestedAggregation("countries_step1")
{
Path = "country",
Aggregations = new AggregationDictionary()
{
{
"countries_step2",
new FilterAggregation("countries_step2")
{
Filter = new BoolQuery
{
Must = new QueryContainer[] {
new NestedQuery
{
Query = new TermQuery
{
Field = "country.isDisplayed",
Value = true
}
}
}
},
Aggregations = new AggregationDictionary
{
{
"countries_step3",
new TermsAggregation("countries_step3")
{
Field = "country.displayName.keyword",
Size = 9999
}
}
}
}
}
}
}
}
};
Can someone tell me if I am in the correct direction? I am using Nest 6.6.0. Is there any tool that helps with these translations?
What you have so far is pretty solid, but when you try to execute this aggregation with the following call
var searchAsync = await client.SearchAsync<Document>(s => s.Size(0).Aggregations(aggs));
you will get this error
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "query malformed, empty clause found at [14:22]"
}
],
"type" : "illegal_argument_exception",
"reason" : "query malformed, empty clause found at [14:22]"
},
"status" : 400
}
Checking request which was sent to elasticsearch give us the answer why it happened
{
"aggs": {
"countries_step1": {
"aggs": {
"countries_step2": {
"aggs": {
"countries_step3": {
"terms": {
"field": "country.displayName.keyword",
"size": 9999
}
}
},
"filter": {}
}
},
"nested": {
"path": "country"
}
}
},
"size": 0
}
filter clause is empty, this is because you tried to used nested query but you didn't pass path parameter. We don't need nested query here (as shown in your example query), we can simplify the whole query to
var aggs = new AggregationDictionary()
{
{
"countries_step1",
new NestedAggregation("countries_step1")
{
Path = "country",
Aggregations = new AggregationDictionary()
{
{
"countries_step2",
new FilterAggregation("countries_step2")
{
Filter = new BoolQuery
{
Must = new QueryContainer[]
{
new TermQuery
{
Field = "country.isDisplayed",
Value = true
}
}
},
Aggregations = new AggregationDictionary
{
{
"countries_step3",
new TermsAggregation("countries_step3")
{
Field = "country.displayName.keyword",
Size = 9999
}
}
}
}
}
}
}
}
};
Now we have a valid request sent to elasticsearch.
There are a couple of things we can improve here:
1. Remove unnecessary bool query
Filter = new BoolQuery
{
Must = new QueryContainer[]
{
new TermQuery
{
Field = "country.isDisplayed",
Value = true
}
}
},
to
Filter =
new TermQuery
{
Field = "country.isDisplayed",
Value = true
},
2. Replace string field names
Usually, when doing calls from .Net there is some kind of POCO type which is helping us with writing strongly-typed requests to elasticsearch which helps us managing clean code and refactoring. With this, we can change field definition from
"country.displayName.keyword"
to
Infer.Field<Document>(f => f.Country.FirstOrDefault().DisplayName.Suffix("keyword"))
my types definition
public class Document
{
public int Id { get; set; }
[Nested]
public List<Country> Country { get; set; }
}
public class Country
{
public bool IsDisplayed { get; set; }
public string DisplayName { get; set; }
}
3. Consider using a fluent syntax
With NEST you can write queries in two ways: using object initializer syntax (which you did) or with help of fluent syntax. Have a look. Trying to write above query with the fluent syntax you will get something like
var searchResponse = await client.SearchAsync<Document>(s => s
.Size(0)
.Aggregations(a => a.Nested("by_country", n => n
.Path(p => p.Country)
.Aggregations(aa => aa
.Filter("by_country2", f => f
.Filter(q => q
.Term(t => t
.Field(field => field.Country.FirstOrDefault().IsDisplayed)
.Value(true)))
.Aggregations(aaa => aaa
.Terms("by_country3", t => t
.Field(field => field.Country.FirstOrDefault().DisplayName.Suffix("keyword"))
.Size(9999)
)))))));
which I find a little bit easier to follow and write, maybe it will be better for you as well.
As a final note, have a look into docs and check how you can debug your queries.
Hope that helps.
How can I do multiple nested aggregation?
I have tried something like this:
Aggregations(x => x
.Nested("Facets", y => y.Path("categories")
.Aggregations(r => r.Terms("categories", w => w.Field(q => q.Categories.FirstOrDefault().Id))
)).Nested("Facets2", s => s.Path("brand")
.Aggregations(e => e.Terms("brand", w => w.Field(q => q.Brand.Id))
)));
But it returns Facets2 as a child of Facets
Can anyone help?
The aggregations that you have work as expected with NEST client version 1.7.1 as demonstrated with this example
void Main()
{
var settings = new ConnectionSettings();
var connection = new InMemoryConnection(settings);
var client = new ElasticClient(connection : connection);
var response = client.Search<Example>(s => s
.Aggregations(aggs => aggs
.Nested("Facets", nested => nested
.Path(p => p.Categories)
.Aggregations(r => r
.Terms("categories", w => w
.Field(q => q.Categories.FirstOrDefault().Id)
)
)
)
.Nested("Facets2", nested => nested
.Path(p => p.Brand)
.Aggregations(e => e
.Terms("brand", w => w
.Field(q => q.Brand.Id)
)
)
)
)
);
Console.WriteLine(Encoding.UTF8.GetString(response.RequestInformation.Request));
}
public class Example
{
public IList<Category> Categories { get; set; }
public Brand Brand { get; set; }
}
public class Brand
{
public int Id { get; set; }
}
public class Category
{
public int Id { get; set; }
}
This outputs the following request query
{
"aggs": {
"Facets": {
"nested": {
"path": "categories"
},
"aggs": {
"categories": {
"terms": {
"field": "categories.id"
}
}
}
},
"Facets2": {
"nested": {
"path": "brand"
},
"aggs": {
"brand": {
"terms": {
"field": "brand.id"
}
}
}
}
}
}
I use the following query in Sense and I get some results back:
POST myindex/mytype/_search
{
"query": {
"fuzzy_like_this_field" : {
"BookLabel" : {
"like_text" : "myBook",
"max_query_terms" : 12
}
}
}
}
But with the following code using Nest I get nothing:
var docs = client.Search<dynamic>(b => b
.Index("myindex")
.Type("mytype")
.Query(q => q
.Fuzzy(fz => fz
.OnField("BookLabel")
.Value("myBook")
)
)
).Documents.ToList();
I can't see the difference between them. What am I missing?
You NEST query you have above produces the following query DSL
{
"query": {
"fuzzy": {
"BookLabel": {
"value": "myBook"
}
}
}
}
To get the nearest equivalent to a fuzzy_like_this_field query (which is deprecated in Elasticsearch 1.6.0 and will be removed in 2.0), you can run a fuzzy_like_this query only on the field you're interested in
void Main()
{
var settings = new ConnectionSettings(new Uri("http://localhost:9200"));
var connection = new InMemoryConnection(settings);
var client = new ElasticClient(connection: connection);
var docs = client.Search<dynamic>(b => b
.Index("myindex")
.Type("mytype")
.Query(q => q
.FuzzyLikeThis(fz => fz
.LikeText("myBook")
.MaxQueryTerms(12)
.OnFields(new [] { "BookLabel" })
)
)
);
Console.WriteLine(Encoding.UTF8.GetString(docs.RequestInformation.Request));
}
This outputs the following query DSL
{
"query": {
"flt": {
"fields": [
"BookLabel"
],
"like_text": "myBook",
"max_query_terms": 12
}
}
}
which should yield the same results as you see in Sense.
I have an event in logstash that looks like:
{
"terms" : { "A" : 1, "B" : 0.5, "c" : 1.6 }
}
I would like to change it to:
{
"terms" : [ "A", "B", "C" ]
}
I didn't find any documentation about a for loop or get the keys of dictionary.
I would like to do something like:
filter {
for key in [terms]{
mutate {
merge => ["tmp_terms", key]
}
mutate {
remove_field => ["terms"]
rename => ["tmp_terms", "terms"]
}
}
Any suggestions ?
Logstash doesn't have a loop construct but you can use the ruby plugin:
filter {
ruby {
code => "event['terms'] = event['terms'].keys"
}
}