How to search aggregations in ES? - elasticsearch

I have a books index which contains an array of tags (with both text/keyword types), i'd like to offer an autocomplete for tags so users type "ro" and it returns "romance" or "rock and roll".
Here's my mapping:
/books {
...
tags: {
type: 'text',
field: {
keyword: {type: 'keyword'}
}
}
}
Example book
{ name: "foo", tags: ['romance', 'story', 'fiction'] }
My aggregation for tags:
{
size: 0,
aggregations: {
options: {
terms: {
field: `tags.keyword`,
size: 20
}
}
}
How can I only get all distinct tags that match "ro"?

Simply try:
GET book/_search
{
"query": {
"prefix": {
"tags.keyword": "ro"
}
}, "size": 0,
"aggs": {
"options": {
"terms": {
"field": "tags.keyword",
"size": 20
}
}
}
}
But for your use case I suggest to you to build a custom analyzer with ngram filter, like this:
"tags": {
"type": "text",
"analyzer": "english_custom",
"fields": {
"suggester": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
},
"keyword":{
"type": "keyword" }
}
The autocomplete analyzer should be something like this:
{"filter":{
....
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 8
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
EDIT:
Could you play with the include clause in terms aggregation?
GET /_search
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tags.keyword",
"include" : "ro.*"
}
}
}
}

Related

ElasticSearch Search-as-you-type field type field with partial search

I recently updating my ngram implementation settings to use Search-as-you-type field type.
https://www.elastic.co/guide/en/elasticsearch/reference/7.x/search-as-you-type.html
This worked great but I noticed that partial searching does not work.
If I search for number 00060434 I get the desired result but I would also like to be able to search for 60434, then it should return document 3.
Is there a way todo it with the Search-as-you-type field type or can i only do this with ngrams?
PUT searchasyoutype_example
{
"settings": {
"analysis": {
"analyzer": {
"englishAnalyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"trim",
"ascii_folding"
]
}
},
"filter": {
"ascii_folding": {
"type": "asciifolding",
"preserve_original": true
}
}
}
},
"mappings": {
"properties": {
"number": {
"type": "search_as_you_type",
"analyzer": "englishAnalyzer"
},
"fullName": {
"type": "search_as_you_type",
"analyzer": "englishAnalyzer"
}
}
}
}
PUT searchasyoutype_example/_doc/1
{
"number" : "00069794",
"fullName": "Employee 1"
}
PUT searchasyoutype_example/_doc/2
{
"number" : "00059840",
"fullName": "Employee 2"
}
PUT searchasyoutype_example/_doc/3
{
"number" : "00060434",
"fullName": "Employee 3"
}
GET searchasyoutype_example/_search
{
"query": {
"multi_match": {
"query": "00060434",
"type": "bool_prefix",
"fields": [
"number",
"number._index_prefix",
"fullName",
"fullName._index_prefix"
]
}
}
}
I think you need to query on number,number._2gram & number._3gram like below:
GET searchasyoutype_example/_search
{
"query": {
"multi_match": {
"query": "00060434",
"type": "bool_prefix",
"fields": [
"number",
"number._2gram",
"number._3gram",
]
}
}
}
search_as_you_type creates the 3 sub fields. You can check more on this article how it works:
https://ashish.one/blogs/search-as-you-type/

ElasticSearch: How to use edge_ngram and have real relevant hits to display first

I'm new with elasticsearch and I'm trying to develop a search for an ecommerce to suggested 5~10 matching products to the user.
As it should work while the user is typing, we found in the official documentation the use of edge_ngram and it KIND OF worked. But as we searched to test, the results were not the expected. As shows the example below (in our test)
Searching example
As it is shown in the image, the result for the term "Furadeira" (Power Drill) returns accessories before the power drill itself. How can I enhance the results? Even the order where the match is found in the string would help me, I guess.
So, this is the code I have until now:
//PUT example
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
},
"portuguese_stop": {
"type": "stop",
"stopwords": "_portuguese_"
},
"portuguese_stemmer": {
"type": "stemmer",
"language": "light_portuguese"
}
},
"analyzer": {
"portuguese": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"portuguese_stop",
"portuguese_stemmer"
]
},
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
}
}
/* mapping */
//PUT /example/products/_mapping
{
"products": {
"properties": {
"name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
}
}
/* Search */
//GET /example/products/_search
{
"query" : {
"query_string": {
"query" : "furadeira",
"type" : "most_fields", // Tried without this aswell
"fields" : [
"name^8",
"model^10",
"manufacturer^4",
"description"
]
}
}
}
/* Product example */
// PUT example/products/38313
{
"name": "FITA VEDA FRESTA (ESPUMA 4503) 12X5 M [ H0000164055 ]",
"description": "Caracteristicas do produto:Veā€¦Diminui ruidos indesejaveis.",
"price":21.90,
"product_id": 38313,
"image": "http://placehold.it/200x200",
"quantity": 92,
"width": 20.200,
"height": 1.500,
"length": 21.500,
"weight": 0.082,
"model": "167083",
"manufacturer": "3M DO BRASIL"
}
Thanks in advance.
you could enhance your query to be a so-called boolean query, which contains your existing query in a must clause, but have an additional query in a should clause, that matches exactly (not using the ngrammed field). If the query matches the should clause it will be scored higher.
See the bool query documentation.
let's assume you have a field that differentiates the Main product from Accessories. I call it level_field.
now you can have two approaches to go:
1) boost up The Main product _score by adding 'should' operation:
put your main query in the must operation and in should operation use level_field to boost the _score of documents which are the Main products.
{
"query": {
"bool": {
"must": {
"match": {
"name": {
"query": "furadeira"
}
}
},
"should": [
{ "match": {
"level_field": {
"query": "level1",
"boost": 3
}
}},
{ "match": {
"level_field": {
"query": "level2",
"boost": 2
}
}}
]
}
}
}
2) in second approach you can decrease _score for documents that they are not the Main products by using boosting query:
{
"query": {
"boosting": {
"positive": {
"query_string": {
"query" : "furadeira",
"type" : "most_fields",
"fields" : [
"name^8",
"model^10",
"manufacturer^4",
"description"
]
}
}
},
"negative": {
"term": {
"level_field": {
"value": "level2"
}
}
},
"negative_boost": 0.2
}
}
}
I hope it helps

Range Query on a score returned by match Query in Elastic Search

Suppose I have a set of documents like :-
{
"Name":"Random String 1"
"Type":"Keyword"
"City":"Lousiana"
"Quantity":"10"
}
Now I want to implement a full text search using an N-gram analyazer on the field Name and City.
After that , I want to filter only the results returned with
"_score" :<Query Score Returned by ES>
greater than 1.2 (Maybe By Range Query Aggregation Method)
And after that apply term aggregation method on the property: "Type" and then return the top results in each bucket by using "top_hits" aggregation method.
How can I do so ?
I've been able to implement everything apart from the Range Query on score returned by a search query.
if you want to score the documents organically then i you can use min_score in query to filter the matched documents for the score.
for ngram analyer i added whitespace tokenizer and a lowercase filter
Mappings
PUT index1
{
"settings": {
"analysis": {
"analyzer": {
"edge_n_gram_analyzer": {
"tokenizer": "whitespace",
"filter" : ["lowercase", "ednge_gram_filter"]
}
},
"filter": {
"ednge_gram_filter" : {
"type" : "NGram",
"min_gram" : 2,
"max_gram": 10
}
}
}
},
"mappings": {
"document_type" : {
"properties": {
"Name" : {
"type": "text",
"analyzer": "edge_n_gram_analyzer"
},
"City" : {
"type": "text",
"analyzer": "edge_n_gram_analyzer"
},
"Type" : {
"type": "keyword"
}
}
}
}
}
Index Document
POST index1/document_type
{
"Name":"Random String 1",
"Type":"Keyword",
"City":"Lousiana",
"Quantity":"10"
}
Query
POST index1/_search
{
"min_score": 1.2,
"size": 0,
"query": {
"bool": {
"should": [
{
"term": {
"Name": {
"value": "string"
}
}
},
{
"term": {
"City": {
"value": "string"
}
}
}
]
}
},
"aggs": {
"type_terms": {
"terms": {
"field": "Type",
"size": 10
},
"aggs": {
"type_term_top_hits": {
"top_hits": {
"size": 10
}
}
}
}
}
}
Hope this helps

Fuzzy contains query with elasticSearch

How can I perform a query that do fuzzy and contains on strings?
Let's say I have the following document:
{
...
"name":"william shakespeare"
...
}
I would like to receive the document for the following queries:
"William" (will return all the williams)
"Willeam" (same as 1)
"William Shake" (will return only the document that contains "William Shake"
"Wiliam sake" (same as 3)
"william shakespeare" / "William Shakespeare" / "William shakespeer" (will return only william shakespeare
I tried to use ngram analyzer and fuzziness queries with no success.
{
"settings": {
"analysis": {
"filter": {
"ngram_analyzer_filter": {
"type": "ngram",
"min_gram": 2,
"max_gram": 15
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"ngram_analyzer_filter"
]
}
}
}
},
"mappings": {
"my_type": {
"properties": {
"name": {
"type": "string",
"analyzer": "ngram_analyzer",
"search_analyzer": "standard",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
my query:
{
"query": {
"multi_match": {
"query": "william shake",
"fields": [
"name.raw"
],
"fuzziness": 2,
"minimum_should_match":2
}
}
}
It multi_match because I search more than one field.
Tried to use the analyzed field or not_analyzed field.
Tried to use "type":"phrase"
Elastic version 2.3.1
Try below query.
{
'query': {
'multi_match': {
'fields': [
"name.raw"
],
'query': $scope.search.queryTerm,
'fuzziness': 2,
'prefix_length': 1
}
}
}

Search results ordered by search-text-length/match length

I have this simple mapping:
PUT testindex
{
"settings": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "edgeNGram"]
}
},
"filter" : {
"ngram" : {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15
}
}
}
},
"mappings": {
"test": {
"properties": {
"name": {
"type": "string",
"analyzer" : "ngram_analyzer"
}
}
}
}
}
With these values:
PUT testindex/test/1
{"name" : "Power"}
PUT testindex/test/2
{"name" : "Pow"}
PUT testindex/test/3
{"name" : "PowerMax"}
PUT testindex/test/4
{"name" : "PowerRangers"}
And searched this:
GET testindex/test/_search
{
"query": {
"match": {
"name": "Po"
}
}
}
And got:
PowerRangers
Power
Pow
PowerMax
All with the same score of 0.2876821
Clearly, the closest result to "Po" is "Pow", and that I expect to receive first; but I don't.
How Should I modify my mapping to behave by this logic?
I think scripted sorting is the solution, but it comes with a performance decrease drawback. See here more about this. And the query you can use is this:
GET testindex/test/_search
{
"query": {
"match": {
"name": "Po"
}
},
"sort": {
"_script": {
"script": "_source['name'].value.length",
"type": "number",
"order": "asc"
}
}
}

Resources