elasticsearch : object mapping and getting data from database - elasticsearch

I use elasticsearch and oracle for the database.
Database :
I create a view with all the data I need to index. I have 1-N relations between my "occurences" table and "determinations" table and between "occurrences" table and "multimedias" table so one occurrence have multiples determinations and multimedias.
Elasticsearch :
I create a mapping and the river to get data from the database view.
the problem is I need an array of object for multimedias and determinations instead of an array for each fields in elasticsearch result ( example below ).
mapping
curl -XPUT 'localhost:9200/botanic/' -d '{
"settings": {
"index": {
"analysis": {
"analyzer": {
"keylower": {
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
}
},
"mappings": {
"specimens": {
"_all": {
"enabled": true
},
"_index": {
"enabled": true
},
"_id": {
"index": "not_analyzed",
"store": false
},
"properties": {
"_id": {
"type": "string",
"store": "no",
"index": "not_analyzed"
},
...
"MULTIMEDIA": {
"_id": {
"path": "M_MULTIMEDIAID"
},
"type": "object",
"properties": {
"M_MULTIMEDIAID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"M_CREATOR": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"M_DESCRIPTION": {
"type": "string",
"store": "yes",
"index": "analyzed"
}
...
}
},
"DETERMINATIONS": {
"_id": {
"path": "D_OCCURRENCEID"
},
"type": "object",
"properties": {
"D_OCCURRENCEID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"D_DETERMINATIONID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"D_DATEIDENTIFIED": {
"type": "string",
"store": "yes",
"index": "analyzed"
},
"D_TYPESTATUS": {
"type": "string",
"store": "yes",
"index": "analyzed"
},
"D_CREATED": {
"type": "date",
"store": "yes",
"index": "analyzed"
}
}
},
...
"I_INSTITUTIONID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"I_INSTITUTIONCODE": {
"type": "string",
"store": "yes",
"index": "analyzed"
}
}
}
}
}'
the river
curl -XPUT 'localhost:9200/_river/botanic_river/_meta' -d '{
"type": "jdbc",
"jdbc": {
"index": "botanic",
"type": "specimens",
"url": "jdbc:oracle:thin:#localhost:1523:database",
"user": "user",
"password": "password",
"sql": "select * from elasticsearchview"
}
}'
the result I get ( multiple fields and for each an array ):
"hits": [
{
"_index": "botanic",
"_type": "specimens",
"_id": "345F5BEA7FDB4B17A7831514E25CD29B",
"_score": 0.4430604,
"_source": {
...
"M_MULTIMEDIAID": [
"0E91818D48DE40C785733F9F3A7932F1",
"833C6E79D7844D568B828DF2D8BA8AC7",
"F76F6766398042D38902DA9165D41514"
],
"M_CREATOR": [
"creator1",
"creator2",
"creator3"
],
"M_DESCRIPTION": [
"descr1",
"descr3",
"descr2"
],
...
}
}
]
but I need something like this ( array of object ) :
"hits": [
{
"_index": "botanic",
"_type": "specimens",
"_id": "345F5BEA7FDB4B17A7831514E25CD29B",
"_score": 0.4430604,
"_source": {
...
"MULTIMEDIA": [
{
"M_MULTIMEDIAID": "0E91818D48DE40C785733F9F3A7932F1",
"M_CREATOR": "creator1",
"M_DESCRIPTION": "descr1"
},
{
"M_MULTIMEDIAID": "833C6E79D7844D568B828DF2D8BA8AC7",
"M_CREATOR": "creator2",
"M_DESCRIPTION": "descr2"
},
{
"M_MULTIMEDIAID": "F76F6766398042D38902DA9165D41514",
"M_CREATOR": "creator3",
"M_DESCRIPTION": "descr3"
}
]
...
}
}
]
I tried "type" : "object" and "type" : "nested" in the mapping but same result.
how can do this ?

Related

elasticsearct puttemplate. [mapper_parsing_exception] Root mapping definition has unsupported parameters

Make template base on https://github.com/vanthome/winston-elasticsearch/blob/master/index-template-mapping.json
{
"index_patterns": ["applogs-*"],
"settings": {
"number_of_shards": 1
},
"mappings": {
"_source": { "enabled": true },
"properties": {
"#timestamp": { "type": "date" },
"#version": { "type": "keyword" },
"message": { "type": "text", "index": true },
"severity": { "type": "keyword", "index": true },
"geohash":{ "type": "geo-point", "index": true},
"location":{ "type": "geo-point", "index": true},
}
}
}
but get an error
[mapper_parsing_exception] Root mapping definition has unsupported parameters: [severity : {index=true, type=keyword}] [#timestamp : {type=date}] [#version : {type=keyword}] [message : {index=true, type=text}] [fields : {dynamic=true, properties={}}]
probably some obsolete version? What I should update?
Based on the docs:
PUT _template/template_1
{
"index_patterns": [
"applogs-*"
],
"settings": {
"number_of_shards": 1
},
"mappings": {
"_source": {
"enabled": true
},
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "keyword"
},
"message": {
"type": "text",
"index": true
},
"severity": {
"type": "keyword",
"index": true
},
"geohash": {
"type": "geo_point",
"index": true
},
"location": {
"type": "geo_point",
"index": true
}
}
}
}
Your json was invalid (one comma too much) and also geo-point --> geo_point.

Can't chain match_phrase_prefix in ElasticSearch

I am trying to chain more than one match_phrase_prefix queries, but
for some reason it doesn't return any results (also no errors). When i try just
one match_phrase_prefix everything works just fine. I am using Elastic Search version 2.3 and my query looks like this:
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"title": "נפ"
}
},
{
"match_phrase_prefix": {
"sub_title": "נפצ"
}
}
]
}
}
my mapping for the specific index is:
{
"wcm_articles": {
"mappings": {
"article": {
"properties": {
"approved_talkbacks_counter": {
"type": "integer"
},
"article_link": {
"type": "string",
"index": "not_analyzed"
},
"author": {
"type": "string",
"analyzer": "hebrew"
},
"category_id": {
"type": "string",
"index": "not_analyzed"
},
"date_updated": {
"type": "date",
"format": "date_hour_minute_second"
},
"is_old": {
"type": "boolean"
},
"is_promoted": {
"type": "boolean"
},
"is_video": {
"type": "boolean"
},
"last_update_user": {
"type": "string",
"index": "not_analyzed"
},
"launch_date": {
"type": "date",
"format": "date_hour_minute_second"
},
"plain_text": {
"type": "string",
"analyzer": "hebrew"
},
"promotion_notes": {
"type": "string",
"index": "not_analyzed"
},
"status": {
"type": "string",
"index": "not_analyzed"
},
"sub_title": {
"type": "string",
"analyzer": "hebrew"
},
"tags": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string",
"analyzer": "hebrew"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"waiting_talkbacks_counter": {
"type": "integer"
}
}
}
}
}
}
And some example documents are:
"_index": "wcm_articles",
"_type": "article",
"_id": "2828280",
"_score": 2.5246792,
"_source": {
"tags": [],
"category_id": 3,
"sub_title": "",
"promotion_notes": "",
"is_old": false,
"last_update_user": "",
"status": 1,
"type": "article",
"plain_text": "",
"launch_date": null,
"waiting_talkbacks_counter": 0,
"approved_talkbacks_counter": 0,
"is_video": false,
"article_link": "",
"date_updated": "2016-12-05T15:12:10",
"title": "קשישה כבת 80 נפצעה קשה מפגיעת רכב בחולון ",
"author": "",
"is_promoted": false
"_index": "wcm_articles",
"_type": "article",
"_id": "2829275",
"_score": 2.1283152,
"_source": {
"tags": [],
"category_id": 3,
"sub_title": "",
"promotion_notes": "",
"is_old": false,
"last_update_user": "",
"status": 1,
"type": "article",
"plain_text": "",
"launch_date": null,
"waiting_talkbacks_counter": 0,
"approved_talkbacks_counter": 0,
"is_video": false,
"article_link": "",
"date_updated": "2016-12-05T15:12:16",
"title": "רוכבת אופנוע נפצעה קשה בתאונת דרכים בצומת סירקין בפ\"ת",
"author": "",
"is_promoted": false
OK my issue is solved. There was no problem with chaining the queries. I was not using "match_phrase_prefix" the correct way, it wont work with a single word, only when you use it with more than one word, for a single word the correct query for the purpose of a form of autocomplete search (a search that includes a part of a word and not a whole word) is - "prefix". when chaining any combination of these two and used correctly for each - there is no problem at all.
Hope this helps someone else with a similar problem.

Documents and timestamp exists, but Kibana doesn't show any results in "Discover" tab

I have a few documents in my elasticsearch engine:
In Kibana, I've successfully created an index pattern, including a suggested time-field.
When searching documents in the last 5 years - can't find anything.
If i'm creating the index pattern without a time-field - I can see all documents.
Each _source content has document.
I'm really desperate, can anyone assist ?
EDITED
Mapping:
{ "settings" : { "number_of_shards" : 1 }, "mappings" : { "monitor" : { "properties" : { "ProcessName": { "type": "string", "index": "analyzed" }, "OpName": { "type": "string", "index": "analyzed" }, "Domain": { "type": "string", "index": "not_analyzed" }, "TraceType": { "type": "string", "index": "not_analyzed" }, "TraceDateTime": { "type": "date", "format": "yyyy-MM-dd HH:mm:ss" }, "ApplicationCode": { "type": "string", "index": "not_analyzed" }, "SrcMessageID": { "type": "string", "index": "not_analyzed" }, "ProcessID": { "type": "string", "index": "not_analyzed" }, "OpID": {"type": "string", "index": "not_analyzed" }, "OpParentID": { "type": "string", "index": "not_analyzed" }, "HostName": { "type": "string", "index": "not_analyzed"} } } } }
Document example:
{
"_index": "monitors",
"_type": "monitor",
"_id": "AVPkvD3YnwEx8EXuppEN",
"_score": 1,
"_source": {
"ProcessName": "myapp",
"OpName": "myop",
"Domain": "mydomain",
"TraceType": "Info",
"TraceDateTime": "2016-04-05 07:44:15",
"ApplicationCode": "71",
"SrcMessageID": "35e291e9-10ad-47a2-8ce8-935cebc62b26",
"ProcessID": "a017afa9-c541-4424-bc63-c83a3d8830e0",
"OpID": "84267eed-38db-4d84-88c4-f46d148e494a",
"HostName": "myHost"
}
EDITED 2
{"index":[".kibana-devnull"],"ignore_unavailable":true}
{"size":500,"sort":[{"TraceDateTime":{"order":"desc","unmapped_type":"boolean"}}],"highlight":{"pre_tags":["#kibana-highlighted-field#"],"post_tags":["#/kibana-highlighted-field#"],"fields":{"*":{}},"require_field_match":false,"fragment_size":2147483647},"query":{"filtered":{"query":{"query_string":{"query":"*","analyze_wildcard":true}},"filter":{"bool":{"must":[{"range":{"TraceDateTime":{"gte":1301984031628,"lte":1459836831628,"format":"epoch_millis"}}}],"must_not":[]}}}},"aggs":{"2":{"date_histogram":{"field":"TraceDateTime","interval":"1M","time_zone":"Asia/Jerusalem","min_doc_count":0,"extended_bounds":{"min":1301984031627,"max":1459836831627}}}},"fields":["*","_source"],"script_fields":{},"fielddata_fields":["TraceDateTime"]}
Issue was that I'm sending date-time in +03:00 GMT, and date-time is stored in elasticsearch as UTC timezone.

Elasticsearch - Filters not working

I am trying to build a query to return all products of a genre:
curl -XPOST 'http://172.17.0.2:9200/products/_search?pretty' -d '
{
"query" : {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"term" : {
"genre" : "Feminino"
}
}
}
}
}'
but the result is zero products. The query doesn't return anything.
If I remove the filter assignment the query works:
curl -XPOST 'http://172.17.0.2:9200/products/_search?pretty' -d '
{
"query" : {
"filtered" : {
"query" : {
"match_all" : {}
}
}
}
}'
The setting of my index are:
curl -s http://172.17.0.2:9200/products/_settings |python -m json.tool
{
"products": {
"settings": {
"index": {
"creation_date": "1455635219340",
"number_of_replicas": "1",
"number_of_shards": "5",
"products": {
"mappings": {
"properties": {
"avaliability": {
"index": "not_analyzed",
"type": "string"
},
"best_seller": {
"index": "not_analyzed",
"type": "string"
},
"brand": {
"index": "not_analyzed",
"type": "double"
},
"brand_lenses": {
"index": "not_analyzed",
"type": "string"
},
"category": {
"index": "not_analyzed",
"type": "string"
},
"color": {
"index": "not_analyzed",
"type": "string"
},
"color_arm": {
"index": "not_analyzed",
"type": "string"
},
"color_lense": {
"index": "not_analyzed",
"type": "string"
},
"description": {
"type": "string"
},
"duration": {
"index": "not_analyzed",
"type": "string"
},
"final_price": {
"index": "not_analyzed",
"type": "double"
},
"format": {
"index": "not_analyzed",
"type": "string"
},
"front_image": {
"type": "string"
},
"genre": {
"index": "not_analyzed",
"type": "string"
},
"installment_amount": {
"index": "not_analyzed",
"type": "double"
},
"installment_times": {
"index": "not_analyzed",
"type": "integer"
},
"lenses_type": {
"index": "not_analyzed",
"type": "string"
},
"link": {
"type": "string"
},
"manufacturer": {
"index": "not_analyzed",
"type": "string"
},
"material": {
"index": "not_analyzed",
"type": "string"
},
"model": {
"index": "not_analyzed",
"type": "string"
},
"name": {
"type": "string"
},
"new": {
"index": "not_analyzed",
"type": "string"
},
"pathology": {
"index": "not_analyzed",
"type": "string"
},
"price": {
"index": "not_analyzed",
"type": "double"
},
"price_in_cash": {
"index": "not_analyzed",
"type": "double"
},
"qty": {
"index": "not_analyzed",
"type": "integer"
},
"side_image": {
"type": "string"
},
"sku": {
"type": "string"
},
"toast": {
"index": "not_analyzed",
"type": "string"
},
"type": {
"index": "not_analyzed",
"type": "string"
}
}
}
},
"uuid": "jxYCUwUGSHW3Rj-A5Q0Tkg",
"version": {
"created": "2020099"
}
}
}
}
Is there something wrong in my query or in my index?
EDIT: The output of search without the filter:
curl -s -XPOST 'http://172.17.0.2:9200/products/_search?pretty&size=1' -d '
{
"query" : {
"filtered" : {
"query" : {
"match_all" : {}
}
}
}
}' | python -m json.tool
{
"_shards": {
"failed": 0,
"successful": 5,
"total": 5
},
"hits": {
"hits": [
{
"_id": "30-2024-MMBQ1090_C4",
"_index": "products",
"_score": 1.0,
"_source": {
"avaliability": "[out of stock]",
"best_seller": "",
"brand": "1.8.1",
"category": "\u00d3culos de Grau",
"color": "Marrom",
"color_arm": "Marrom",
"color_lense": "",
"description": "Esse charmoso \u00f3culos possui a super tend\u00eancia cor marsala, um vinho mais fechado pro marrom, que transmite eleg\u00e2ncia imediata. Al\u00e9m disso, na lateral da sua haste ele mostra um detalhe met\u00e1lico incr\u00edvel, que enche esse modelo de personalidade. Seu formato retangular d\u00e1 o toque final de estilo contempor\u00e2neo. Muito belo! preencher",
"final_price": 197.0,
"format": "Retangular",
"front_image": "https://media.eotica.com.br/catalog/product/cache/1/small_image/266x120/9df78eab33525d08d6e5fb8d27136e95/o/c/oculos-181-mmbq1090p-c4-1-rn.jpg",
"genre": [
"Feminino",
"Masculino"
],
"id": "30-2024-MMBQ1090_C4",
"installment_amount": 65.67,
"installment_times": "3",
"lenses_type": "",
"link": "https://www.eotica.com.br/oculos-de-grau-181-jacob-mmbq-1090-marrom-c4.html",
"material": "N\u00e3o Metal",
"model": "MMBQ1090 ",
"name": "181 Jacob MMBQ1090 - Marrom - C4 - \u00d3culos de Grau",
"new": "",
"price": 197.0,
"price_in_cash": 187.15,
"qty": 0,
"side_image": "https://media.eotica.com.br/catalog/product/o/c/oculos-181-mmbq1090p-c4-1-rn.jpg",
"sku": "30-2024-MMBQ1090_C4",
"toast": "0"
},
"_type": "product"
}
],
"max_score": 1.0,
"total": 10416
},
"timed_out": false,
"took": 1
}
Resolved.
The problem is the Standard Analyzer.
An analyzer of type standard is built using the Standard Tokenizer with the Standard Token Filter, Lower Case Token Filter, and Stop Token Filter.
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
So, the query:
curl -XPOST 'http://172.17.0.2:9200/products/_search?pretty' -d '
{"query" : {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"term" : {
"genre" : "feminino"
}
}
}
}
}'
work very good.

no results elasticsearch when query by multi_field

I have the following mapping on my index in elasticsearch.
{
"mail": {
"properties": {
"project": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"mailbox": {
"type": "string",
"index": "not_analyzed",
"null_value": "#na",
"store" : "yes"
},
"path": {
"type": "string",
"index": "not_analyzed",
"null_value": "#na",
"store" : "yes"
},
"messageid": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"nodeid":
{
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false",
"store" : "yes"
},
"replyto": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"references": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"subject": {
"boost": "3.0",
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"from": {
"type": "nested",
"properties": {
"name": {
"type" : "multi_field",
"fields" : {
"name" : {"type" : "string", "analyzer" : "standard", "index" : "analyzed"},
"untouched" : {"type" : "string", "index" : "not_analyzed"}
}
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"to": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"cc": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"bcc": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"message_snippet": {
"type": "string",
"index": "no",
"include_in_all": "false"
},
"text_messages": {
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"html_messages": {
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"message_attachments": {
"dynamic": "true",
"properties":{
"filename":{
"type": "string",
"store": "yes"
},
"content":{
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"hash":{
"type": "string",
"store": "yes",
"analyzer": "analyzer_keyword"
},
"nodeid":{
"type": "string"
}
}
},
"date": {
"type": "date"
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"analyzer": "analyzer_keyword"
},
"type": {
"type": "string",
"analyzer": "analyzer_keyword"
},
"nodeid":{
"type": "string"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
}
}
}
}
And I try searching on the mail.from.name field with the following query, but I doesn't give me any results.
{
"query": {
"nested": {
"path": "from",
"query": {
"term": {
"name": "mark"
}
}
}
}
}
What is wrong about my mapping or query?
A sample document looks like this
{
"project": "test",
"mailbox": "test.pst",
"messageid": "5e667f7f-4421-4836-91f3-8b5216c04839",
"nodeid": "671",
"subject": "No Subject",
"from": [
{
"name": "Mike Johnson",
"address": "mike#gmail.com",
"nodeid": "3",
"facet": "Mike Johnson"
}
],
"to": [
{
"name": "John Doe",
"address": "JDoe#gmail.com",
"nodeid": "367",
"facet": "John Doe"
}
],
"cc": [],
"bcc": [],
"textbody": "this is a test email with no further lines of text",
"htmlbody": "",
"snippet": "",
"transmitted": "",
"replyto": "",
"references": "",
"attachments": [],
"entities": [
{
"name": "google",
"type": "organization",
"nodeid": "656",
"facet": "google"
}
],
"domains": [
"google.com"
],
"path": ""
}
You need to address the nested objects name in your query again
{
"query": {
"nested": {
"path": "from",
"query": {
"term": {
"from.name": "mike"
}
}
}
}
}

Resources