Couchbase Full text search matching partial numbers

Couchbase Full text search matching partial numbers - full-text-search

Good day.
I am using couchDB to store data of some entity which includes "firstName", "lastName", "dob" and "idNumber". (Various other dynamic data is also present, but not important now.)
I have created a full-text-search index to index specific types of documents and set up to search on the above mentioned fields only. The search works perfectly for "firstName" and "lastName" with partial matches as well.
However, for the idNumber, the exact number must be given. Partial searches on the idNumber does not work. For example, if '7605054321089' is the idNumber and I search for '760505432', nothing is returned. If I enter the idNumber completely, it is returned successfully. I have fiddled with the properties under the FTS section, to no avail.
My date-of-birth field does not work at all. I cannot search on that field, but I have parked that until I get the idNumber to work at least.
The JSon describing the filter as follows (sorry for the size):
{
"type": "fulltext-index",
"name": "entities_fts",
"uuid": "5c5dc5e32083535f",
"sourceType": "couchbase",
"sourceName": "entities",
"sourceUUID": "04bb2840ed12d26003797737e5a19908",
"planParams": {
"maxPartitionsPerPIndex": 32,
"numReplicas": 0,
"hierarchyRules": null,
"nodePlanParams": null,
"pindexWeights": null,
"planFrozen": false
},
"params": {
"mapping": {
"byte_array_converter": "json",
"default_analyzer": "standard",
"default_datetime_parser": "dateTimeOptional",
"default_field": "_all",
"default_mapping": {
"display_order": "1",
"dynamic": true,
"enabled": false
},
"default_type": "_default",
"index_dynamic": true,
"store_dynamic": false,
"type_field": "header.type",
"types": {
"person": {
"display_order": "0",
"dynamic": false,
"enabled": true,
"properties": {
"basic": {
"display_order": "1",
"dynamic": false,
"enabled": true,
"properties": {
"dob": {
"dynamic": false,
"enabled": true,
"fields": [
{
"analyzer": "",
"display_order": "0",
"include_in_all": true,
"include_term_vectors": true,
"index": true,
"name": "",
"store": false,
"type": "datetime"
}
]
},
"firstNames": {
"dynamic": false,
"enabled": true,
"fields": [
{
"analyzer": "",
"display_order": "2",
"include_in_all": true,
"include_term_vectors": true,
"index": true,
"name": "",
"store": false,
"type": "text"
}
]
},
"lastNames": {
"dynamic": false,
"enabled": true,
"fields": [
{
"analyzer": "",
"display_order": "1",
"include_in_all": true,
"include_term_vectors": false,
"index": true,
"name": "",
"store": false,
"type": "text"
}
]
}
}
},
"extended": {
"display_order": "0",
"dynamic": false,
"enabled": true,
"properties": {
"idNumber": {
"dynamic": false,
"enabled": true,
"fields": [
{
"analyzer": "keyword",
"display_order": "0",
"include_in_all": true,
"include_term_vectors": false,
"index": true,
"name": "",
"store": false,
"type": "text"
}
]
}
}
}
}
}
}
},
"store": {
"kvStoreName": "forestdb"
}
},
"sourceParams": {
"clusterManagerBackoffFactor": 0,
"clusterManagerSleepInitMS": 0,
"clusterManagerSleepMaxMS": 2000,
"dataManagerBackoffFactor": 0,
"dataManagerSleepInitMS": 0,
"dataManagerSleepMaxMS": 2000,
"feedBufferAckThreshold": 0,
"feedBufferSizeBytes": 0
}
}
Any help will be greatly appreciated, thank you.

for the idNumber search in couchbase FTS, it might be because you're attempting an exact match search? You might want to try a prefix search...
{
"from": 0,
"size": 10,
"query": {
"field": "name",
"prefix": "bobble"
}
}
https://github.com/blevesearch/bleve/blob/master/test/tests/basic/searches.json#L91

Related

Elasticsearch stops refreshing

in Elasticsearch 6.6.2 i'm having this weird problem
where elastic stops refreshing all indexes. Any UPDATE/INDEX/DELETE operation
returns OK result but search is not showing changes. I see no errors in logs.
If i try to manually force refresh by API this will run forever with no response.
If i restart ES it resumes working correctly and all documents indexed during blackout are there. The problem appears again after few days. This is happening on only one single instance out of many tens that we have in production with exactly the same configuration.
All indexes are open and green.
UPDATE:
this is the output of /someindex/_settings?include_defaults during problem:
{
"someindex": {
"settings": {
"index": {
"creation_date": "1627983403578",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "eWWHKAsHQYix3uZSTs6YXg",
"version": {
"created": "6060299"
},
"provided_name": "someindex"
}
},
"defaults": {
"index": {
"max_inner_result_window": "100",
"unassigned": {
"node_left": {
"delayed_timeout": "1m"
}
},
"max_terms_count": "65536",
"lifecycle": {
"name": "",
"rollover_alias": "",
"indexing_complete": "false"
},
"routing_partition_size": "1",
"max_docvalue_fields_search": "100",
"merge": {
"scheduler": {
"max_thread_count": "2",
"auto_throttle": "true",
"max_merge_count": "7"
},
"policy": {
"reclaim_deletes_weight": "2.0",
"floor_segment": "2mb",
"max_merge_at_once_explicit": "30",
"max_merge_at_once": "10",
"max_merged_segment": "5gb",
"expunge_deletes_allowed": "10.0",
"segments_per_tier": "10.0",
"deletes_pct_allowed": "33.0"
}
},
"max_refresh_listeners": "1000",
"max_regex_length": "1000",
"load_fixed_bitset_filters_eagerly": "true",
"number_of_routing_shards": "5",
"write": {
"wait_for_active_shards": "1"
},
"mapping": {
"coerce": "false",
"nested_fields": {
"limit": "50"
},
"depth": {
"limit": "20"
},
"ignore_malformed": "false",
"total_fields": {
"limit": "1000"
}
},
"source_only": "false",
"soft_deletes": {
"enabled": "false",
"retention": {
"operations": "0"
}
},
"max_script_fields": "32",
"query": {
"default_field": [
"*"
],
"parse": {
"allow_unmapped_fields": "true"
}
},
"format": "0",
"frozen": "false",
"sort": {
"missing": [],
"mode": [],
"field": [],
"order": []
},
"priority": "1",
"codec": "default",
"max_rescore_window": "10000",
"max_adjacency_matrix_filters": "100",
"gc_deletes": "60s",
"optimize_auto_generated_id": "true",
"max_ngram_diff": "1",
"translog": {
"generation_threshold_size": "64mb",
"flush_threshold_size": "512mb",
"sync_interval": "5s",
"retention": {
"size": "512mb",
"age": "12h"
},
"durability": "REQUEST"
},
"auto_expand_replicas": "false",
"mapper": {
"dynamic": "true"
},
"requests": {
"cache": {
"enable": "true"
}
},
"data_path": "",
"highlight": {
"max_analyzed_offset": "-1"
},
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"enable": "all",
"total_shards_per_node": "-1"
}
},
"search": {
"slowlog": {
"level": "TRACE",
"threshold": {
"fetch": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
},
"query": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
}
},
"throttled": "false"
},
"fielddata": {
"cache": "node"
},
"default_pipeline": "_none",
"max_slices_per_scroll": "1024",
"shard": {
"check_on_startup": "false"
},
"xpack": {
"watcher": {
"template": {
"version": ""
}
},
"version": "",
"ccr": {
"following_index": "false"
}
},
"percolator": {
"map_unmapped_fields_as_text": "false",
"map_unmapped_fields_as_string": "false"
},
"allocation": {
"max_retries": "5"
},
"refresh_interval": "1s",
"indexing": {
"slowlog": {
"reformat": "true",
"threshold": {
"index": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
},
"source": "1000",
"level": "TRACE"
}
},
"compound_format": "0.1",
"blocks": {
"metadata": "false",
"read": "false",
"read_only_allow_delete": "false",
"read_only": "false",
"write": "false"
},
"max_result_window": "10000",
"store": {
"stats_refresh_interval": "10s",
"type": "",
"fs": {
"fs_lock": "native"
},
"preload": []
},
"queries": {
"cache": {
"enabled": "true"
}
},
"ttl": {
"disable_purge": "false"
},
"warmer": {
"enabled": "true"
},
"max_shingle_diff": "3",
"query_string": {
"lenient": "false"
}
}
}
}
}
Any hint?
Thanks

Is there API based method to create an index pattern in Kibana if its index is present in ES

I have an index in ES.I need to create an index-pattern of the same in .kibana using an API call.In this creation, I even want to set the column which is going to be the timestamp column.Any help would be appreciated.

You can do it, but you'll need to construct the whole structure by yourself. An index pattern definition looks like this:
PUT .kibana/doc/index-pattern:<some-uuid>
{
"type": "index-pattern",
"updated_at": "2018-01-27T07:12:05.373Z",
"index-pattern": {
"title": "test*",
"timeFieldName": "#timestamp",
"fields": """ ... """,
}
}
title is the name of your index pattern, the same one you'd input if you create the index pattern through the UI
timeFieldName is the name of the timestamp field
fields is a string containing a JSON array of all the field definitions in your index pattern (see below)
The fields definition looks like this:
[
{
"name": "#timestamp",
"type": "date",
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": true,
"readFromDocValues": true
},
{
"name": "_id",
"type": "string",
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": true,
"readFromDocValues": false
},
{
"name": "_index",
"type": "string",
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": true,
"readFromDocValues": false
},
{
"name": "_score",
"type": "number",
"count": 0,
"scripted": false,
"searchable": false,
"aggregatable": false,
"readFromDocValues": false
},
{
"name": "_source",
"type": "_source",
"count": 0,
"scripted": false,
"searchable": false,
"aggregatable": false,
"readFromDocValues": false
},
{
"name": "_type",
"type": "string",
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": true,
"readFromDocValues": false
},
{
"name": "referer",
"type": "string",
"count": 0,
"scripted": false,
"searchable": true,
"aggregatable": false,
"readFromDocValues": false
},
...
]
So you need to create this array for each of your fields, then stringify it and put the string inside the fields field.
Here is a sample document representing an index pattern:
{
"type": "index-pattern",
"updated_at": "2018-01-27T07:12:05.373Z",
"index-pattern": {
"title": "test*",
"timeFieldName": "#timestamp",
"fields": """[{"name":"#timestamp","type":"date","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":true},{"name":"_id","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":false},{"name":"_index","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":false},{"name":"_score","type":"number","count":0,"scripted":false,"searchable":false,"aggregatable":false,"readFromDocValues":false},{"name":"_source","type":"_source","count":0,"scripted":false,"searchable":false,"aggregatable":false,"readFromDocValues":false},{"name":"_type","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":false},{"name":"referer","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":false,"readFromDocValues":false},{"name":"referer.keyword","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":true},{"name":"status","type":"number","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":true},{"name":"url","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":false,"readFromDocValues":false},{"name":"url.keyword","type":"string","count":0,"scripted":false,"searchable":true,"aggregatable":true,"readFromDocValues":true}]"""
}
}

Elasticsearch completion suggester context for nested fields

I'm working on simple search app with completion feature.
I need to somehow secure those suggestions so I figured out that simplest way to do so would be to add context to completion suggester. My problem is that I don't know how to use suggester context in nested fields.
This is how my mapping looks like, very simple, just 3 fields and one as nested.
curl-XPUT'http: //localhost: 9200/cr/_mapping/agreement_index'-d'{
"agreement_index": {
"properties": {
"agreement_name": {
"type": "string",
"fields": {
"suggest": {
"type": "completion",
"analyzer": "simple",
"payloads": false,
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50,
"context": {
"permitted": {
"type": "category",
"path": "permitted",
"default": []
}
}
}
}
},
"permitted": {
"type": "integer"
},
"team": {
"type": "nested",
"dynamic": "false",
"properties": {
"email": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
},
"suggest": {
"type": "completion",
"analyzer": "simple",
"payloads": false,
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50,
"context": {
"permitted": {
"type": "category",
"path": "permitted",
"default": []
}
}
}
}
},
"name": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
},
"suggest": {
"type": "completion",
"analyzer": "simple",
"payloads": false,
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50,
"context": {
"permitted": {
"type": "category",
"path": "permitted",
"default": []
}
}
}
}
},
"permitted": {
"type": "integer"
}
}
}
}
}
}'
During indexing documents like this:
curl-XPUT'http: //localhost: 9200/cr/agreement_index/1'-d'{
"agreement_name": "QWERTY",
"team": [{
"name": "Tomasz Sobkowiak",
"permitted": ["2"],
"email": "tsobkowiak#fake.com"
}],
"permitted": ["2"]
}'
I got below error:
{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"one or more prefixes needed"}],"type":"illegal_argument_exception","reason":"one or more prefixes needed"},"status":400}
After removing context from completion suggesters in nested fields everything work fine.
So my question is, how I can use context suggesters in nested fields with path pointed to field in outer document? Is something like that even possible?

The problem is in your mapping. Default can not be left empty. You need to to assign at least one default value in the mapping for context suggester.
"context": {
"permitted": {
"type": "category",
"path": "permitted",
"default": [] // <-- defaults can not be empty, provide at least one default integer value
}
}
The value of the default field is used, when ever no specific is
provided for the certain context. Note that a context is defined by at
least one value.
Also, In the document you are trying to index, you are using string in permitted whereas it is mapped as Integer.
"permitted": ["2"] // <-- change this to "permitted":[2]

ElasticSearch term query vs query_string?

When I query my index with query_string, I am getting results
But when I query using term query, I dont get any results
{
"query": {
"bool": {
"must": [],
"must_not": [],
"should": [
{
"query_string": {
"default_field": "Printer.Name",
"query": "HL-2230"
}
}
]
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
I know that term is not_analyzed and query_string is analyzed but Name is already as "HL-2230", why doesnt it match with term query? I tried also searching with "hl-2230", I still didnt get any result.
EDIT: mapping looks like as below. Printer is the child of Product. Not sure if this makes difference
{
"state": "open",
"settings": {
"index": {
"creation_date": "1453816191454",
"number_of_shards": "5",
"number_of_replicas": "1",
"version": {
"created": "1070199"
},
"uuid": "TfMJ4M0wQDedYSQuBz5BjQ"
}
},
"mappings": {
"Product": {
"properties": {
"index": "not_analyzed",
"store": true,
"type": "string"
},
"ProductName": {
"type": "nested",
"properties": {
"Name": {
"store": true,
"type": "string"
}
}
},
"ProductCode": {
"type": "string"
},
"Number": {
"index": "not_analyzed",
"store": true,
"type": "string"
},
"id": {
"index": "no",
"store": true,
"type": "integer"
},
"ShortDescription": {
"store": true,
"type": "string"
},
"Printer": {
"_routing": {
"required": true
},
"_parent": {
"type": "Product"
},
"properties": {
"properties": {
"RelativeUrl": {
"index": "no",
"store": true,
"type": "string"
}
}
},
"PrinterId": {
"index": "no",
"store": true,
"type": "integer"
},
"Name": {
"store": true,
"type": "string"
}
}
},
"aliases": []
}
}

As per mapping provided by you above
"Name": {
"store": true,
"type": "string"
}
Name is analysed. So HL-2230 will split into two tokens, HL and 2230. That's why term query is not working and query_string is working. When you use term query it will search for exact term HL-2230 which is not there.

Showing Different Document Types in Kibana from ElasticSearch

I'm in the process of trying to setup a Kibana dashboard. This dashboard is hitting an ElasticSearch index. My index has the following mappings:
"myindex": {
"mappings": {
"animals": {
"properties": {
"#timestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"#version": {
"type": "string"
},
"Class": {
"type": "string"
},
"Order": {
"type": "string"
},
"Family": {
"type": "string"
},
"Genus": {
"type": "string"
},
"Species": {
"type": "string"
}
}
},
"elements" : {
"properties": {
"#timestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"#version": {
"type": "string"
},
"Symbol": {
"type": "string"
},
"Name": {
"type": "string"
},
"Group": {
"type": "string"
},
"Period": {
"type": "string"
}
}
}
}
}
As the mappings show, my index has two different types of information. My challenge is, I don't know how to setup my kibana dashboard to just list the information for each type. I've confirmed that the data in my elasticsearch instance is the correct data.
In my dashboard, I'm trying to show two tables. One table will show all of the documents associated with "animals". The other table will show all of the documents associated with "elements". Unfortunately, I can't figure out how to focus the results of a table down to a specific type. I'm basically trying to figure out how to setup either a query or a filter (not sure the difference between the two in the kibana world) for a specific panel. Currently, my dashboard looks like this:
{
"title": "Research",
"services": {
"query": {
"list": {
"0": {
"query": "*",
"alias": "",
"color": "#7EB26D",
"id": 0,
"pin": false,
"type": "lucene"
}
},
"ids": [
0
]
},
"filter": {
"list": {
"0": {
"type": "time",
"field": "#timestamp",
"from": "now-{{ARGS.from || '24h'}}",
"to": "now",
"mandate": "must",
"active": true,
"alias": "",
"id": 0
}
},
"ids": [
0
]
}
},
"rows": [
{
"title": "Animals",
"height": "350px",
"editable": true,
"collapse": false,
"collapsable": true,
"panels": [
{
"title": "Animals",
"error": false,
"span": 12,
"editable": true,
"group": [
"default"
],
"type": "table",
"size": 100,
"pages": 5,
"offset": 0,
"sort": [
"#timestamp",
"desc"
],
"style": {
"font-size": "9pt"
},
"overflow": "min-height",
"fields": [
"Class",
"Order",
"Family",
"Genus",
"Species"
],
"localTime": true,
"timeField": "#timestamp",
"highlight": [],
"sortable": true,
"header": true,
"paging": true,
"spyable": true,
"queries": {
"mode": "all",
"ids": [
0
]
},
"field_list": true,
"status": "Stable",
"trimFactor": 300,
"normTimes": true
}
],
"notice": false
},
{
"title": "",
"height": "350px",
"editable": true,
"collapse": false,
"collapsable": true,
"panels": [
{
"title": "Elements",
"error": false,
"span": 12,
"editable": true,
"group": [
"default"
],
"type": "table",
"size": 100,
"pages": 5,
"offset": 0,
"sort": [
"#timestamp",
"desc"
],
"style": {
"font-size": "9pt"
},
"overflow": "min-height",
"fields": [
"Symbol",
"Name",
"Group",
"Period"
],
"localTime": true,
"timeField": "#timestamp",
"highlight": [],
"sortable": true,
"header": true,
"paging": true,
"spyable": true,
"queries": {
"mode": "all",
"ids": [
0
]
},
"field_list": true,
"trimFactor": 300,
"normTimes": true
}
],
"notice": false
}
],
"editable": true,
"failover": false,
"index": {
"interval": "none",
"default": "myindex"
},
"style": "dark",
"panel_hints": true,
"pulldowns": [
{
"type": "query",
"collapse": false,
"notice": false,
"query": "*",
"pinned": true,
"history": [],
"remember": 10
},
{
"type": "filtering",
"collapse": true,
"notice": false
}
],
"loader": {
"save_gist": false,
"save_elasticsearch": true,
"save_local": true,
"save_default": true,
"save_temp": true,
"save_temp_ttl_enable": true,
"save_temp_ttl": "30d",
"load_gist": true,
"load_elasticsearch": true,
"load_elasticsearch_size": 20,
"load_local": true,
"hide": false
},
"refresh": "30s"
}
Can someone tell me how to show two different types of documents in Kibana? I see a queries object on the table panel. Yet, I have no idea how to use it.
Thank you so much

You can use the _type field to narrow the result to a specific elastic search type (e.g. animals).
So when you define the query (or filter) for your table, just make sure to specify the relevant _type (i.e. _type: animals)

You can use scripted fields to have value of type as separate field which will be indexed.
or you can add _type field to search field it will be available.
In case of scripted fields add as doc['_type'].value and give it any name you want.
https://github.com/elastic/kibana/issues/5684

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Couchbase Full text search matching partial numbers - full-text-search

Related

Elasticsearch stops refreshing

Is there API based method to create an index pattern in Kibana if its index is present in ES

Elasticsearch completion suggester context for nested fields

ElasticSearch term query vs query_string?

Showing Different Document Types in Kibana from ElasticSearch

Categories

Resources