Convert DSL Query to NEST.net - elasticsearch

I have the following mapping, I am trying to rewrite it using NEST but can't seem to get it quite right yet.
PUT company
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"companyId": {
"type": "keyword"
},
"companyName": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion",
"contexts": [
{
"name": "companyGroupId",
"type": "category",
"path": "companyGroupId"
}
]
}
},
"analyzer": "standard"
},
"companyTradingName": {
"type": "text",
"index": false
},
"companyGroupId": {
"type": "keyword"
},
"agencyId": {
"type": "keyword"
}
}
}
}
}
This is to where I got, I still need to set the keywordstring and edgengram to the CompanyName field. But not sure how to add the those Analyzers
MODEL
[ElasticsearchType(Name = "company")]
public class CompanyDocument : IDocument
{
public long CompanyId { get; set; }
public CompletionField CompanyName { get; set; } // <-- I think this needs to be set as an CompletionField
public string CompanyTradingName { get; set; }
public long CompanyGroupId { get; set; }
public long AgencyId { get; set; }
}
QUERY
var response1 = this.Client.CreateIndex(
"testing_testing",
index => index
.Mappings(
ms => ms.Map<CompanyDocument>(
m => m.Properties(
p => p
.Keyword(t => t.Name(n => n.AgencyId))
.Keyword(t => t.Name(n => n.CompanyGroupId))
.Keyword(t => t.Name(n => n.CompanyId))
.Text(t => t.Name(n => n.CompanyName)
.Fields(ff =>
ff.Completion(descriptor => descriptor.Contexts(contextsDescriptor => contextsDescriptor.Category(contextDescriptor => contextDescriptor.Name("CompanyGroupId").Path("CompanyGroupId"))))))
.Text(t => t.Name(n => n.CompanyTradingName).Index(false)))))
.Settings(
f => f.Analysis(analysis => analysis
.Analyzers(analyzers => analyzers
.Custom("keyword_analyzer", _ => _.Filters("lowercase", "asciifolding", "trim").CharFilters().Tokenizer("keyword"))
.Custom("edge_ngram_analyzer", _ => _.Filters("lowercase").Tokenizer("edge_ngram_tokenizer")).Custom("edge_ngram_search_analyzer", _ => _.Tokenizer("lowercase")))
.Tokenizers(
descriptor => descriptor.EdgeNGram("edge_ngram_tokenizer", t => t.MinGram(2).MaxGram(5).TokenChars(TokenChar.Letter))))));
UPDATE
GET company/_search
{
"suggest": {
"company-suggest": {
"prefix": "trans",
"completion": {
"field": "companyName.completion",
"fuzzy": {
"fuzziness": 1
}
}
}
}
}

Rather than trying to re-write it into NEST, I decided to just use the low-level client.
So i created a json file and just reading the contents and then just using the following
var response = this.Client.LowLevel.IndicesCreate<CompanyResponse>("index-name", PostData.Bytes(bytes));
return response.ApiCall.Success;
This seems a lot easier than trying to recreate the DSL query

Related

Elasticsearch equivalent to SelectMany

I have a nested type in the Elasticsearch index:
class Category
{
public string Name;
public Category Child;
public Guid[] ProductIds;
}
For example:
Category1
|_Category2
|_Category3
|_Category4
Category3 and Category4 have 2 products each.
What is the right NEST query to get all 4 product ids by searching "Category3"?
I imaging something like this, but can't actually get it work:
var response = await elasticClient.SearchAsync<Category>(s => s
.StoredFields(sf => sf
.Fields(f => f.ProductIds)
)
.Query(q => q
.Nested(n => n
.Path(p => p.Child)
.Query(mu => mu
.Bool(b => b
.Must(m => m
.Match(m => m
.Query("Category Name")
)
)
)
)
)
)
)
One of the options could be to create a sort of breadcrumb hierarchy column and use anlyzer & facet to search over it.
So index can have similar settings for column & analyzer
"btb_breadcrumb": {
"type": "text",
"fields": {
"facet": {
"type": "text",
"analyzer": "custom_path_tree",
"fielddata": "true"
}
}
},
"settings" : {
"analysis": {
"analyzer": {
"english_exact": {
"tokenizer": "standard",
"filter": [
"lowercase"
]
},
"custom_path_tree": {
"tokenizer": "custom_hierarchy"
},
"custom_path_tree_reversed": {
"tokenizer": "custom_hierarchy_reversed"
}
},
"tokenizer": {
"custom_hierarchy": {
"type": "path_hierarchy",
"delimiter": ">"
},
"custom_hierarchy_reversed": {
"type": "path_hierarchy",
"delimiter": ">",
"reverse": "true"
}
}
}
}
and then you can search on this column like this
{
"bool" :
{
"must" :
[
{"match" : {"btb_breadcrumb.facet" : "catid1>catid2>"}}
]
}
}
now this will give you all items with catid2 and all its child categories. Similarly if you want to get all items that fall under any children of catid1, then you can use following query.
{
"bool" :
{
"must" :
[
{"match" : {"btb_breadcrumb.facet" : "catid1>"}}
]
}
}

elasticsearch 7 nest aggregation text keyword error

I Have an index with the following mappings:
{
"winnings": {
"mappings": {
"properties": {
"handId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"playerId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "float"
}
}
}
}
}
generated from the class:
public class ElasticWinnings
{
public Guid Id { get; set; }
public Guid HandId { get; set; }
public Guid PlayerId { get; set; }
public decimal Value { get; set; }
}
I created that in nest with the ConnectionSettings:
.DefaultMappingFor<ElasticWinnings>(u =>
u.IndexName("winnings")
.IdProperty(x => x.Id)
);
when I try and run the following query:
var result = _client.Search<ElasticWinnings>(s =>
s.Aggregations(a =>
a.Terms("term_Agg", t =>
t.Field(f => f.PlayerId)
.Aggregations(aa =>
aa.Sum("sum", sum =>
sum.Field(f => f.Value))
)
))
);
I get a 400 back, with the error:
type: illegal_argument_exception Reason: "Fielddata is disabled on text fields by default
It creates this query:
{
"aggs":{
"term_Agg":{
"aggs":{
"sum":{
"sum":{
"field":"value"
}
}
},
"terms":{
"field":"playerId"
}
}
}
}
If I changed that query to:
{
"aggs":{
"term_Agg":{
"aggs":{
"sum":{
"sum":{
"field":"value"
}
}
},
"terms":{
"field":"playerId.keyword"
}
}
}
}
and used that in postman, it works.
I am not sure why it is not putting the .keyword into the query. Is it the way the nest client is configured, the indicies or the query?
You need to change your query a little bit to tell NEST to use keyword field instead of text, you can do this with .Suffix extension method. Link to docs.
var result = _client.Search<ElasticWinnings>(s =>
s.Aggregations(a =>
a.Terms("term_Agg", t =>
t.Field(f => f.PlayerId.Suffix("keyword"))
.Aggregations(aa =>
aa.Sum("sum", sum =>
sum.Field(f => f.Value))
)
))
);
Hope that helps.
The solution I found was to add [Keyword] to the PlayerId property in ElasticWinnings class.
I kept the .DefaultMappingFor<ElasticWinnings>(u => u.IndexName("winnings") in the creation of the ConnectionSettings class, but added this before the Elastic client is returned:
var client = new ElasticClient(settings);
client.Indices.Create("winnings", c =>
c.Map<ElasticWinnings>(m => m.AutoMap())
);
Without adding the section above, it did not apply the attributes. This changed my mappings (http://localhost:9200/winnings/_mappings) to
{
"winnings": {
"mappings": {
"properties": {
"handId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"playerId": {
"type": "keyword"
},
"value": {
"type": "double"
}
}
}
}
}
This is the docs about setting up the mappings https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/fluent-mapping.html

How to search aggregations in ES?

I have a books index which contains an array of tags (with both text/keyword types), i'd like to offer an autocomplete for tags so users type "ro" and it returns "romance" or "rock and roll".
Here's my mapping:
/books {
...
tags: {
type: 'text',
field: {
keyword: {type: 'keyword'}
}
}
}
Example book
{ name: "foo", tags: ['romance', 'story', 'fiction'] }
My aggregation for tags:
{
size: 0,
aggregations: {
options: {
terms: {
field: `tags.keyword`,
size: 20
}
}
}
How can I only get all distinct tags that match "ro"?
Simply try:
GET book/_search
{
"query": {
"prefix": {
"tags.keyword": "ro"
}
}, "size": 0,
"aggs": {
"options": {
"terms": {
"field": "tags.keyword",
"size": 20
}
}
}
}
But for your use case I suggest to you to build a custom analyzer with ngram filter, like this:
"tags": {
"type": "text",
"analyzer": "english_custom",
"fields": {
"suggester": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
},
"keyword":{
"type": "keyword" }
}
The autocomplete analyzer should be something like this:
{"filter":{
....
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 8
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
EDIT:
Could you play with the include clause in terms aggregation?
GET /_search
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tags.keyword",
"include" : "ro.*"
}
}
}
}

ElasticSearch Nest: AutoMap with DynamicTemplates

I'm trying use a dynamic template in ES such that all string fields are multifields. I also want to apply some specific mappings to certain fields.
Take the following example class:
[ElasticsearchType(Name = "sample1")]
public class Sample1
{
public string ID { get; set; }
[String(Index = FieldIndexOption.No)]
public string DoNotIndex { get; set; }
public string MultiField1 { get; set; }
public string MultiField2 { get; set; }
}
I want to then create the dynamic template and apply the mapping to DoNotIndex using the following command:
_client.Map<Sample1>(m => m
.AutoMap()
.DynamicTemplates(dt=> dt
.DynamicTemplate("all_strings_multifields", t => t
.MatchMappingType("string")
.Mapping(tm => tm
.String(mf => mf
.Index(FieldIndexOption.Analyzed)
.Fields(mff => mff
.String(s => s
.Name("raw")
.Index(FieldIndexOption.NotAnalyzed)
)
)
)
)
)
)
)
.VerifySuccessfulResponse();
The result is:
{
"test1": {
"mappings": {
"sample1": {
"dynamic_templates": [
{
"all_strings_multifields": {
"match_mapping_type": "string",
"mapping": {
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"index": "analyzed",
"type": "string"
}
}
}
],
"properties": {
"doNotIndex": {
"type": "keyword",
"index": false
},
"iD": {
"type": "text"
},
"multiField1": {
"type": "text"
},
"multiField2": {
"type": "text"
}
}
}
}
}
}
Results
you'll see that the DoNotIndex property is indeed correct, but the multifield1 and multifield2 are not correct (they are not multi fields).
Workaround
I know I can "fix" this by NOT doing the AutoMap() and instead specifying each of the special indexes but there are a lot of fields and that isn't as clean of a solution.
Can I do AutoMap with DynamicTemplates?
Dynamic templates only apply to fields that are dynamically added to a mapping, so properties explicitly mapped with .AutoMap() will not be affected by dynamic mapping.
There is however a way to apply conventions to explicit mappings with NEST using the visitor pattern. It looks like you're using Elasticsearch 5.0, so you should use the text and keyword mappings.
First define a visitor
[ElasticsearchType(Name = "sample1")]
public class Sample1
{
public string ID { get; set; }
[Keyword(Index = false)]
public string DoNotIndex { get; set; }
public string MultiField1 { get; set; }
public string MultiField2 { get; set; }
}
public class AllStringsMultiFieldsVisitor : NoopPropertyVisitor
{
public override void Visit(ITextProperty type, PropertyInfo propertyInfo, ElasticsearchPropertyAttributeBase attribute)
{
// if a custom attribute has been applied, let it take precedence
if (propertyInfo.GetCustomAttribute<ElasticsearchPropertyAttributeBase>() == null)
{
type.Fields = new Properties
{
{
"raw", new KeywordProperty()
}
};
}
base.Visit(type, propertyInfo, attribute);
}
}
Then pass an instance of the visitor to .AutoMap()
client.Map<Sample1>(m => m
.AutoMap(new AllStringsMultiFieldsVisitor())
.DynamicTemplates(dt => dt
.DynamicTemplate("all_strings_multifields", t => t
.MatchMappingType("text")
.Mapping(tm => tm
.Text(mf => mf
.Index(true)
.Fields(mff => mff
.Keyword(s => s
.Name("raw")
)
)
)
)
)
)
);
produces
{
"dynamic_templates": [
{
"all_strings_multifields": {
"match_mapping_type": "text",
"mapping": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"index": true
}
}
}
],
"properties": {
"iD": {
"fields": {
"raw": {
"type": "keyword"
}
},
"type": "text"
},
"doNotIndex": {
"type": "keyword",
"index": false
},
"multiField1": {
"fields": {
"raw": {
"type": "keyword"
}
},
"type": "text"
},
"multiField2": {
"fields": {
"raw": {
"type": "keyword"
}
},
"type": "text"
}
}
}
I should point out however that the default automapping for a C# string property in NEST 5.0 is to map it as a text field with a keyword sub field with ignore_above:256. NEST 5.0 was released to nuget earlier this week
client.Map<Sample1>(m => m
.AutoMap()
.DynamicTemplates(dt => dt
.DynamicTemplate("all_strings_multifields", t => t
.MatchMappingType("text")
.Mapping(tm => tm
.Text(mf => mf
.Index(true)
.Fields(mff => mff
.Keyword(s => s
.Name("raw")
)
)
)
)
)
)
);
produces
{
"dynamic_templates": [
{
"all_strings_multifields": {
"match_mapping_type": "text",
"mapping": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"index": true
}
}
}
],
"properties": {
"iD": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
},
"doNotIndex": {
"type": "keyword",
"index": false
},
"multiField1": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
},
"multiField2": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
}

elasticsearch ngram analyzer/tokenizer not working?

it seems that the ngram tokenizer isn't working or perhaps my understanding/use of it isn't correct.
my tokenizer is doing a mingram of 3 and maxgram of 5. i'm looking for the term 'madonna' which is definitely in my documents under artists.name. i can find the term with other techniques (using simple analyzer and related), but not using ngram.
what i'm trying to accomplish by using the ngram is to find names and accounting for misspellings.
please see a shortened version of my mappings, my settings, and my query, and if you have any ideas, please let me know - it's driving me nuts!
settings...
{
"myindex": {
"settings": {
"index": {
"analysis": {
"analyzer": {
"ngramAnalyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "nGramTokenizer"
}
},
"tokenizer": {
"nGramTokenizer": {
"type": "nGram",
"min_gram": "3",
"max_gram": "5"
}
}
},
"number_of_shards": "5",
"number_of_replicas": "1",
"version": {
"created": "1020199"
},
"uuid": "60ggSr6TREaDTItkaNUagg"
}
}
}
}
mappings ...
{
"myindex": {
"mappings": {
"mytype": {
"properties": {
"artists.name": {
"type": "string",
"analyzer": "simple",
"fields": {
"ngram": {
"type": "string",
"analyzer": "ngramAnalyzer"
},
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
query ...
{"query": {"match": {"artists.name.ngram": "madonna"}}}
document ...
{
"_index": "myindex",
"_type": "mytype",
"_id": "602537592951",
"_version": 1,
"found": true,
"_source": {
"artists": [
{
"name": "Madonna",
"id": "P 64565"
}
]
}
}
EDIT
incidentally, this query works (without ngram):
{"query": {"match": {"artists.name": "madonna"}}}
this obviously has something to do with the nested object here. i'm apparently not applying the ngram to the nested object properly.
ideas?
ok - i figured it out. i really hope this helps someone b/c it drove me crazy.
here's what my mapping turned out to look like:
{
"myindex": {
"mappings": {
"mytype": {
"properties": {
"artists": {
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string",
"analyzer": "ngramAnalyzer",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
and here's how i did it using Nest syntax...
first i had a sub type (class) called Person which has a Name and Id which looks like this (POCO)...
[Serializable]
public class Person
{
public string Name { get; set; }
[ElasticProperty(Analyzer = "fullTerm", Index = FieldIndexOption.not_analyzed)]
public string Id { get; set; }
}
and then my mapping went something like this ...
.AddMapping<MyIndex>(m => m
.MapFromAttributes()
.Properties(props =>
{
props
.Object<Person>(x => x.Name("artists")
.Properties(pp => pp
.MultiField(
mf => mf
.Name(s => s.Name)
.Fields(f => f
.String(s => s.Name(o => o.Name).Analyzer("ngramAnalyzer"))
.String(s => s.Name(o => o.Name.Suffix("raw")).Index(FieldIndexOption.not_analyzed))
)
)
)
)
)
Note: the Object here which indicates it's another object beneath my type 'artists'.
Thanks, me!!!
edit:
curl mappings might be something like this...
curl-XPOST"http://localhost:9200/yourindex/_mappings"-H'Content-Type:application/json'-d'{"myindex":{"mappings":{"mytype":{"properties":{"artists":{"properties":{"id":{"type":"string"},"name":{"type":"string","analyzer":"ngramAnalyzer","fields":{"raw":{"type":"string","index":"not_analyzed"}}}}}}}}}}'

Resources