Completion Suggester in Elasticsearch not working - elasticsearch

(Using ES 6.7)
I have an index and want to support search-as-you-type feature. For that, I want to try completion suggester but I'm having trouble in reindexing to change the mappings old index.
Here's the old index mappings
{
"old-index": {
"mappings": {
"doc": {
"properties": {
"content": {
"type": "text"
},
"project": {
"type": "keyword"
},
"title": {
"type": "text"
},
"version": {
"type": "keyword"
}
}
}
}
}
}
Here's the new test index mappings
PUT test-completion
{
"mappings": {
"doc": {
"properties": {
"content": {
"type": "text",
"fields": {
"autocomplete": {
"type": "completion",
"contexts": [
{
"name": "project",
"type": "category",
"path": "project"
},
{
"name": "version",
"type": "category",
"path": "version"
}
]
}
}
},
"title": {
"type": "text"
},
"project": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
}
}
}
Here's the reindexing query
POST _reindex
{
"source": {
"index": "old-index"
},
"dest": {
"index": "test-completion"
}
}
And here's the query which returns no results
POST test-completion/_search
{
"suggest": {
"autocompletion_suggest": {
"prefix": "part of documentation",
"completion": {
"field": "content.autocomplete",
"fuzzy": {
"fuzziness": "AUTO"
},
"contexts": {
"project": "xyz-project",
"version": "abc-version"
}
}
}
}
}
If the prefix is set to a or b, it returns results outside of context.
Where I'm doing wrong?
https://discuss.elastic.co/t/problem-with-completion-suggester/181695

Related

Reindex parent/child relationship from 2.x to 6.x in elasticsearch

I need way to re-index parent/child data from 2.4 to 6.8 Join. The old index has multiple entities.
old-index mapping:
{
"mappings": {
"parentType": {
"properties": {
"field_1": {
"type": "text"
},
"field_2": {
"type": "text"
},
"field_3": {
"type": "keyword"
},
"filed_4": {
"type": "long"
}
}
},
"child_type_1": {
"_parent": {
"type": "case"
},
"_routing": {
" required": true
},
"child1_field_1": {
"type": "text"
},
"child1_field_2": {
"type": "text"
},
"child1_field_3": {
"type": "keyword"
}
},
"child_type_2": {
"_parent": {
"type": "parentType"
},
"_routing": {
"required": true
},
"child2_field_1": {
"type": "text"
},
"child2_field_2": {
"type": "text"
}
}
}
}
I want to transform it into to the following 6.8 mapping:
{
"mappings": {
"doc": {
"properties": {
"parentType": {
"properties": {
"field_1": {
"type": "text"
},
"field_2": {
"type": "text"
},
"field_3": {
"type": "keyword"
},
"filed_4": {
"type": "long"
}
}
},
"child_type_1": {
"child1_field_1": {
"type": "text"
},
"child1_field_2": {
"type": "text"
},
"child1_field_3": {
"type": "keyword"
}
},
"child_type_2": {
"child2_field_1": {
"type": "text"
},
"child2_field_2": {
"type": "text"
}
},
"join_field": {
"type": "join",
"relations": {
"parentType": [
"child_type_1",
"child_type_2"
]
}
}
}
}
}
}
I know I am supposed to use the re-index API but I am not sure how exactly the script has to be written. I want to re-index all the parent and children documents into the new index where the _type is "doc"

Easticsearch reindexing multi-type parent/child index(v5.0) to join type index(v6.2)

I am reindexing my index data from ES 5.0(parent-child) to ES 6.2(Join type)
Data in index ES 5.0 is stored as parent-child documents in separate types and for reindex i have created new index/mapping based on 6.2 in my new cluster.
The parent documents flawlessly reindex to new index but the child documents throwing error as below
{
"index": "index_two",
"type": "_doc",
"id": "AVpisCkMuwDYFnQZiFXl",
"cause": {
"type": "mapper_parsing_exception",
"reason": "failed to parse",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "[routing] is missing for join field [field_relationship]"
}
},
"status": 400
}
scripts i am using to reindex the data
{
"source": {
"remote": {
"host": "http://myescluster.com:9200",
"socket_timeout": "1m",
"connect_timeout": "20s"
},
"index": "index_two",
"type": ["actions"],
"size": 5000,
"query":{
"bool":{
"must":[
{"term": {"client_id.raw": "cl14ous0ydao"}}
]
}
}
},
"dest": {
"index": "index_two",
"type": "_doc"
},
"script": {
"params": {
"jdata": {
"name": "actions"
}
},
"source": "ctx._routing=ctx._routing;ctx.remove('_parent');params.jdata.parent=ctx._source.user_id;ctx._source.field_relationship=params.jdata"
}
}
I have passed the routing field in painless script as the documents are dynamic from source index.
Mapping of the destination index
{
"index_two": {
"mappings": {
"_doc": {
"dynamic_templates": [
{
"template_actions": {
"match_mapping_type": "string",
"mapping": {
"fields": {
"raw": {
"index": true,
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
}
],
"date_detection": false,
"properties": {
"attributes": {
"type": "nested"
}
},
"cl_other_params": {
"type": "nested"
},
"cl_triggered_ts": {
"type": "date"
},
"cl_utm_params": {
"type": "nested"
},
"end_ts": {
"type": "date"
},
"field_relationship": {
"type": "join",
"eager_global_ordinals": true,
"relations": {
"users": [
"actions",
"segments"
]
}
},
"ip_address": {
"type": "ip"
},
"location": {
"type": "geo_point"
},
"processed_ts": {
"type": "date"
},
"processing_time": {
"type": "date"
},
"products": {
"type": "nested",
"properties": {
"traits": {
"type": "nested"
}
}
},
"segment_id": {
"type": "integer"
},
"start_ts": {
"type": "date"
}
}
}
}
}
My sample source document
{
"_index": "index_two",
"_type": "actions",
"_id": "AVvKUYcceQCc2OyLKWZ9",
"_score": 7.4023576,
"_routing": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
"_parent": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
"_source": {
"user_id": "cl14ous0ydaob71ab2a1-837c-4904-a755-11e13410fb94",
"client_id": "cl14ous0ydao",
"session_id": "CL-e0ec3941-6dad-4d2d-bc9b",
"source": "betalist",
"action": "pageview",
"action_type": "pageview",
"device": "Desktop",
"ip_address": "49.35.14.224",
"location": "20.7333 , 77",
"attributes": [
{
"key": "url",
"value": "https://www.google.com/",
"type": "string"
}
],
"products": []
}
}
I had the same issue and searching in elasticsearch discussions I found this that works:
POST _reindex
{
"source": {
"index": "old_index",
"type": "actions"
},
"dest": {
"index": "index_two"
},
"script": {
"source": """
ctx._type = "_doc";
String routingCode = ctx._source.user_id;
Map join = new HashMap();
join.put('name', 'actions');
join.put('parent', routingCode);
ctx._source.put('field_relationship', join);
ctx._parent = null;
ctx._routing = new StringBuffer(routingCode)"""
}
}
Hope this helps :) .
I'd like to point out that routing is generally not required for a join field, however if you're creating the child before the parent is created, then you're going to face this problem.
It's advisable to re-index all the parents first then the children.

ElasticSearch Dynamic Template for nested

I try to create dynamic template for a nested object.
Here is a document to index :
{
"title": "My title",
"attributes": {
"color": {
"id": 42,
"label": "red"
},
"material": {
"id": 43,
"label": "textile"
}
}
}
This is the template i tried, without success
{
"dynamic": "false",
"dynamic_templates": [
{
"attributes": {
"path_match": "attributes",
"mapping": {
"type": "nested"
}
}
},
{
"attributes_nested": {
"path_match": "attributes.*",
"mapping": {
"properties": {
"id": {
"type": "integer"
},
"value": {
"type": "string"
}
}
}
}
}
],
"properties": {
"title": {
"type": "string"
}
}
}
I'd like to be able to make aggregations on attributes.color.id, and attributes.material.id
Nevermind, the problem was that i had
{ "dynamic": false}
The correct mapping is
{
"dynamic": "false",
"dynamic_templates": [
{
"attributes_nested": {
"path_match": "attributes.*",
"mapping": {
"properties": {
"id": {
"type": "integer"
},
"value": {
"type": "string"
}
}
}
}
}
],
"properties": {
"title": {
"type": "string"
},
"attributes": {
"type": "nested",
"dynamic": true
}
}
}

Encountering ElasticsearchIllegalArgumentException when using Phrase Suggester

I'm trying to use ElasticSearch 1.7.3 to implement a Did-you-mean function for my company's search engine. I've followed the documentation to set up a Phrase Suggester and created a customized mapping to support that.
However, when I do a _suggest query, I get ElasticsearchIllegalArgumentException[Suggester[simple_phrase] not supported]. What am I doing wrong?
This is my query:
POST knowledge_graph/entities/_suggest
{
"suggest": {
"text" : "apple in",
"simple_phrase": {
"phrase" : {
"field" : "canonical_name"
}
}
}
}
I get the following response:
{
"_shards": {
"total": 5,
"successful": 0,
"failed": 5,
"failures": [
{
"index": "knowledge_graph",
"shard": 0,
"status": 500,
"reason": "BroadcastShardOperationFailedException[[knowledge_graph][0] ]; nested: ElasticsearchException[failed to execute suggest]; nested: ElasticsearchIllegalArgumentException[Suggester[simple_phrase] not supported]; "
},
...
]
}
}
Here's my index's settings and mappings:
{
"knowledge_graph": {
"aliases": {},
"mappings": {
"entities": {
"properties": {
"autocomplete": {
"type": "completion",
"analyzer": "simple",
"payloads": true,
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50
},
"canonical_name": {
"type": "string",
"analyzer": "simple",
"fields": {
"shingles": {
"type": "string",
"analyzer": "simple_shingle_analyzer"
}
}
},
"entity_query": {
"properties": {
"simple_phrase": {
"properties": {
"phrase": {
"properties": {
"field": {
"type": "string"
}
}
}
}
},
"text": {
"type": "string"
}
}
},
"suggest": {
"properties": {
"simple_phrase": {
"properties": {
"phrase": {
"properties": {
"field": {
"type": "string"
}
}
}
}
},
"text": {
"type": "string"
}
}
}
}
}
},
"settings": {
"index": {
"creation_date": "1449251691345",
"analysis": {
"filter": {
"shingles_1_6": {
"type": "shingle",
"max_shingle_size": "6",
"output_unigrams_if_no_shingles": "true"
}
},
"analyzer": {
"simple_shingle_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"shingles_1_6"
],
"tokenizer": "standard"
}
}
},
"number_of_shards": "5",
"number_of_replicas": "0",
"version": {
"created": "1070399"
},
"uuid": "g_Yp7z6kQHCDRtd6TvVlzQ"
}
},
"warmers": {}
}
}
There are two ways to execute suggest requests, one with _search endpoint and another with _suggest endpoint.
From the Docs
Suggest requests executed against the _suggest endpoint should omit
the surrounding suggest element which is only used if the suggest
request is part of a search.
Your query will work if you execute it against _search api
POST knowledge_graph/entities/_search <---- here
{
"suggest": {
"text" : "apple in",
"simple_phrase": {
"phrase" : {
"field" : "canonical_name"
}
}
},
"size" : 0
}
If you want to query with _suggest endpoint try this
POST knowledge_graph/_suggest
{
"suggest": {
"text": "apple in",
"phrase": {
"field": "canonical_name"
}
}
}
Note - I think you should be executing phrase suggest against canonical_name.shingles

How can I retrieve matching children only?

Consider a very simple model where we have locations and each location can have zero or more events. A location would have properties such as name, description and geo point data (lon/lat). An event should be attached to one location (its parent) and should have a name and description.
{
"location" : {
"properties": {
"name": { "type": "string", "boost": 2.0, "analyzer": "snowball" },
"description": { "type": "string", "analyzer": "snowball" },
"geo": { "type": "geo_point" },
"exhibits": {
"type": "nested",
"properties": {
"name": { "type": "string", "boost": 2.0, "analyzer": "snowball" },
"description": { "type": "string", "analyzer": "snowball" }
}
}
}
}
}
What I want to be able to do, is to query for the child documents (events) performing a full text search on their names and descriptions. I would like to get the matching events back and be able to also get their parent location's name. I would also like to narrow down the result set by location's coordinates. I don't want to get any events that do not match the query. Is that possible in Elastic Search? What types of queries should I use?
I have tried putting events as an array property under location (see above) and using the nested query but it does not return the kind of results I want (I think it returns the whole location, including all events, even the ones that do not match my query). I have tried putting events into a separate index (mapping?) providing the _parent property and then performing the top_children query on locations, but I don't get any results.
{
"exhibit": {
"_parent": { "type": "locations" },
"properties": {
"name": { "type": "string", "boost": 2.0, "analyzer": "snowball" },
"description": { "type": "string", "analyzer": "snowball" }
}
}
}
Could anyone shed some light? I don't know where to begin...
Here's the working solution to my problem, perhaps it will be useful to somebody.
Location mapping:
{
"location" : {
"properties": {
"name": { "type": "string", "boost": 2.0, "analyzer": "snowball" },
"description": { "type": "string", "analyzer": "snowball" },
"geo": { "type": "geo_point" }
}
}
}
Exhibit mapping:
{
"exhibit": {
"_parent": { "type": "locations" },
"properties": {
"name": { "type": "string", "boost": 2.0, "analyzer": "snowball" },
"description": { "type": "string", "analyzer": "snowball" }
}
}
}
Query:
{
"fields": [ "_parent", "name", "_source" ],
"query": {
"bool": {
"should": [
{ "text": { "name": "candy" } },
{ "text": { "description": "candy" } }
]
}
},
"filter": {
"and": [
{
"terms" : {
"_parent": [ "4e7089a9b97d640b30695b7a", "4e7089eeb97d640b30695b7b" ]
}
},
{ "range": { "start": { "lte": "2011-09-22" } } },
{ "range": { "end": { "gte": "2011-09-22" } } }
]
}
}
You should query using the _parent field and passing it an array of IDs of locations to which you want to limit the exhibits.

Resources