Jolt spec for nifi - removing attributes - apache-nifi

I am newbie in NiFi. I am trying to remove some attributes (e.g. name in Swedish and Chinese, images from description, etc.) with JoltTransformJSON, with 'limited' success. Could you please help me? Many thanks in advance!
Alicia
Details as below:
JoltTransformJSON properties set as:
Jolt Transformation DSL: Remove
Jolt Specification: {"name": {"sv": "", "zh": ""}, "source_type": "", "description": {"image": {"url": "", "copyright_holder":"", "license_type": ""}}}
All the attributes:
[
{
"id": "string",
"name": {
"fi": "string",
"en": "string",
"sv": "string",
"zh": "string"
},
"source_type": {},
"info_url": "string",
"modified_at": "2019-12-27T16:34:17.896Z",
"location": {
"lat": {},
"lon": {},
"address": {
"street_address": "string",
"postal_code": "string",
"locality": "string"
}
},
"description": {
"intro": "string",
"body": "string",
"images": [
{
"url": "string",
"copyright_holder": "string",
"license_type": {}
}
]
},
"tags": [
{
"id": "string",
"name": "string"
}
],
"where_when_duration": {
"where_and_when": "string",
"duration": "string"
}
}
]
The problem is that transformation (removing specified fields) is not performed. The expected result should be:
"id": "string",
"name": {
"fi": "string",
"en": "string"
},
"info_url": "string",
"modified_at": "2019-12-27T16:34:17.896Z",
"location": {
"lat": {},
"lon": {},
"address": {
"street_address": "string",
"postal_code": "string",
"locality": "string"
}
},
"description": {
"intro": "string",
"body": "string"
}
]
},
"tags": [
{
"id": "string",
"name": "string"
}
],
"where_when_duration": {
"where_and_when": "string",
"duration": "string"
}
}
How can I remove unwanted fields?
Any help is highly appreciated.

According to your expected result, your wanted jolt specification is:
{
"name": {
"sv": "",
"zh": ""
},
"source_type": "",
"description": {
"images": ""
}
}

Related

How I can transform a field of the Jsonfile witn NiFi?

Good morning
I am new in NiFi and I want modify a field in a JSON file (I am using NiFi v.1.12.0) and save it in other PATH.
This is and example of my JSON file:
"Id": "2b2ef24a-f3ce-4249-ad92-db9a565b5b66",
"Tipo": "AuditEvent",
"SubTipo": "Plataforma",
"Accion": "Audit.Middleware.EventData.HttpResponseSentEvent",
"IDCorrelacion": "7af48a20-587d-4e60-9c3b-02cc6a074662",
"TiempoEvento": "2020-07-30 11:45:08.315",
"Resultado": "No informado",
"ResultadoDesc": "No informado",
"Origen": {
"IDOrigen": "132403308182038429",
"Tipo": "Backend",
"Aplicacion": "fabric:/Omnicanalidad.Canales.Muro_v1",
"Servicio": "fabric:/Omnicanalidad.Canales.Muro_v1/Muro",
"Maquina": "ibsfabbe02pru",
"IP": "ibsfabbe02pru"
},
"OrigenInterno": "Audit.Middleware.AuditMiddleware",
"Agente": {
"Rol": "Sin rol asignado",
"IDUsuario": "1428",
"AltIDUsuario": "20141115",
"Localizador": "197.183.27.17",
"PropositoUso": "No informado",
"IDSession": "",
"XForwardedPort": "443",
"XForwardedFor": "162.37.0.100:30279, 162.37.0.5:10158, 172.37.0.5",
"XForwardedHost": "ebeprate.es",
"XForwardedProto": "https",
"XOriginalURL": "/test/v1/Relation/ObtieneGestor?IdUser=4355625&NiciTitular=43485326",
"XOriginalHost": "ebeprate.es",
"Referer": null,
"AuthenticationType": "AuthenticationTypes.Federation",
"UserAgent": "HttpApplicationGateway",
"Claims": "Hello World",
"AcceptedLanguage": null
},
"DatosEvento": {
"Headers": ["Content-Length: 0", "Request-Context: appId=cid-v1:d8b40be1-4838-4a94-a4f8-3ec374989b27"],
"StatusCode": 204,
"Body": ""
}
}
I want modify the field TiempoEvento from date to timestamp.
In this case 2020-07-30 11:45:08.315 convert to 1596109508
So I use this procedure:
1.- I used the GetFile Processor for take the file. I configure the properties (without any problems) and everything it is ok.
2.- I used UpdateRecord Processor to modify the field. (The problems appears)
In properties I have 3 properties:
I read that I need configure a schema-registry if I want to work with any data in NiFi (I don't know if it is totaly true). In this case how I am working with a JsonFile I supposed that I need it, so I did it.
In controller service I configure JsonPathReader, JsonRecordSetWriter and AvroSchemaRegistry.
I started with AvroSchemaRegistry.
SETTING
Name: Test
PROPERTIES
Validate Field Names -> true
test-schema ->
{
"name": "MyFirstNiFiTest",
"type": "record",
"namespace": "test.example",
"fields": [
{
"name": "Id",
"type": "string"
},
{
"name": "Tipo",
"type": "string"
},
{
"name": "SubTipo",
"type": "string"
},
{
"name": "Accion",
"type": "string"
},
{
"name": "IDCorrelacion",
"type": "string"
},
{
"name": "TiempoEvento",
"type": "string"
},
{
"name": "Resultado",
"type": "string"
},
{
"name": "ResultadoDesc",
"type": "string"
},
{
"name": "Origen",
"type": {
"name": "Origen",
"type": "record",
"fields": [
{
"name": "IDOrigen",
"type": "string"
},
{
"name": "Tipo",
"type": "string"
},
{
"name": "Aplicacion",
"type": "string"
},
{
"name": "Servicio",
"type": "string"
},
{
"name": "Maquina",
"type": "string"
},
{
"name": "IP",
"type": "string"
}
]
}
},
{
"name": "OrigenInterno",
"type": "string"
},
{
"name": "Agente",
"type": {
"name": "Agente",
"type": "record",
"fields": [
{
"name": "Rol",
"type": "string"
},
{
"name": "IDUsuario",
"type": "string"
},
{
"name": "AltIDUsuario",
"type": "string"
},
{
"name": "Localizador",
"type": "string"
},
{
"name": "PropositoUso",
"type": "string"
},
{
"name": "IDSession",
"type": "string"
},
{
"name": "XForwardedPort",
"type": "string"
},
{
"name": "XForwardedFor",
"type": "string"
},
{
"name": "XForwardedHost",
"type": "string"
},
{
"name": "XForwardedProto",
"type": "string"
},
{
"name": "XOriginalURL",
"type": "string"
},
{
"name": "XOriginalHost",
"type": "string"
},
{
"name": "Referer",
"type": [
"string",
"null"
]
},
{
"name": "AuthenticationType",
"type": [
"string",
"null"
]
},
{
"name": "UserAgent",
"type": "string"
},
{
"name": "Claims",
"type": "string"
},
{
"name": "AcceptedLanguage",
"type": [
"string",
"null"
]
}
]
}
},
{
"name": "DatosEvento",
"type": {
"name": "DatosEvento",
"type": "record",
"fields": [
{
"name": "Name",
"type": "string"
},
{
"name": "Category",
"type": "string"
},
{
"name": "EventType",
"type": "int"
},
{
"name": "Id",
"type": "int"
},
{
"name": "ApiName",
"type": "string"
},
{
"name": "Token",
"type": "string"
},
{
"name": "ApiScopes",
"type": {
"type": "array",
"items": "string"
}
},
{
"name": "TokenScopes",
"type": {
"type": "array",
"items": "string"
}
},
{
"name": "Message",
"type": "string"
},
{
"name": "ActivityId",
"type": "string"
},
{
"name": "TimeStamp",
"type": "int",
"logicalType": "date"
},
{
"name": "ProcessId",
"type": "int"
},
{
"name": "LocalIpAddress",
"type": "string"
},
{
"name": "RemoteIpAddress",
"type": "string"
}
]
}
}
]
}
I converted the JSON file to avroSchema
I enable it and everything it is OK.
Then I configure the JsonRecordSetWrite:
SETTING
Name: TestRecordSetWriter
PROPERTIES
I enable it and everything it is OK.
and then I configue de JsonPathReader
SETTING
Name: TestPathReader
PROPERTIES
And in this point I have and alert that said:
'JSON paths' is invalid bacause No JSON Paths were specified
and I can't enable this controller services, and I don't know what am I missing?
I don't know if there are another way to do it easier. I don't know if I am totally wrong. So I need some help.
Thank you
I found the answer. I has a bad configuration in JsonPathreader, because I had not configured the records of the schema in the properties.

Problem with schema validation using Postman

Body of my req:
[
{
"postId": 1,
"id": 1,
"name": "name abc",
"email": "Eliseo#gardner.biz",
"body": "something"
},
...
]
I am trying to validate it like below:
var schema = {
"type": "array",
"properties": {
"postId": {
"type": "integer"
},
"id": {
"type": "integer"
},
"name": {
"type": "string"
},
"email": {
"type": "string",
"pattern": "^[A-Z0-9._%+-]+#[A-Z0-9.-]+\.[A-Z]{2,}$"
},
"body": {
"type": "string"
}
},
"required": [
"postId",
"id",
"name",
"email",
"body"
]
};
pm.test('Schemat jest poprawny', function() {
pm.expect(tv4.validate(jsonData, schema)).to.be.true;
});
The test is ok even if I change for example id type for string or email pattern for invalid one.
What is wrong with that code?
I would recommend moving away from tv4 for schema validations and use the built-in jsonSchema function, as this uses AJV.
Apart from that, your schema didn't look right and was missing the validation against the object, it looks like it was doing it against the array.
This might help you out:
let schema = {
"type": "array",
"items": {
"type": "object",
"required": [
"postId",
"id",
"name",
"email",
"body"
],
"properties": {
"postId": {
"type": "integer"
},
"id": {
"type": "integer"
},
"name": {
"type": "string"
},
"email": {
"type": "string",
"pattern": "^[A-Z0-9._%+-]+#[A-Z0-9.-]+\.[A-Z]{2,}$"
},
"body": {
"type": "string"
}
}
}
}
pm.test("Schemat jest poprawny", () => {
pm.response.to.have.jsonSchema(schema)
})

combine filters using match_phrase - Elastic 1.7

Im using ES 1.7, trying to look for documents using match_phrase to search using exact match string. The filter works fine when used alone, however when I combine the filters, I get an error.
example: people document
q=aaron&address=scarborough - searching a person by name and address, works fine.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"nested": {
"path": "addresses",
"query": {
"match_phrase": {
"address": "scarborough"
}
}
}
}
}
},
q=aaron&phone=813-689-6889 - searching a person by name and phone number works fine as well.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}
}
}
However, When I try to use both filters, address and phone I get a No filter registered for [match_phrase] error
for example: q=aaron&address=scarborough&phone=813-689-6889
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"nested": {
"path": "addresses",
"query": {
"match_phrase": {
"address": "scarborough"
}
}
},
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}
}
}
the error, when using address and phone filters together:
nested: QueryParsingException[[pl_people] No filter registered for [match_phrase]]; }]","status":400}):
index mapping (person):
{
"pl_people": {
"mappings": {
"person": {
"properties": {
"ac_name": {
"type": "string",
"analyzer": "autocomplete"
},
"addresses": {
"type": "nested",
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string",
"index": "not_analyzed"
},
"city_id": {
"type": "long"
},
"country": {
"type": "string",
"index": "not_analyzed"
},
"county": {
"type": "string",
"index": "not_analyzed"
},
"county_id": {
"type": "long"
},
"id": {
"type": "long"
},
"location": {
"type": "geo_point"
},
"parameterized": {
"type": "string"
},
"state": {
"type": "string",
"index": "not_analyzed"
},
"state_id": {
"type": "long"
},
"zip": {
"type": "string",
"index": "not_analyzed"
}
}
},
"author": {
"type": "string",
"index": "not_analyzed"
},
"body": {
"type": "string",
"analyzer": "remove_html",
"fields": {
"ns_body": {
"type": "string",
"analyzer": "remove_html_stopwords"
}
}
},
"charities": {
"type": "nested",
"properties": {
"email": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "long"
}
}
},
"community": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"date_of_birth": {
"type": "date",
"format": "dateOptionalTime"
},
"delimiters": {
"type": "nested",
"properties": {
"delimiter_type": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "long"
}
}
},
"description": {
"type": "string"
},
"employments": {
"type": "nested",
"properties": {
"email": {
"type": "string",
"index": "not_analyzed"
},
"employment_status": {
"type": "string",
"index": "not_analyzed"
},
"foia_contact": {
"type": "boolean"
},
"id": {
"type": "long"
},
"phone": {
"type": "string",
"index": "not_analyzed"
},
"phone_extension": {
"type": "string",
"index": "not_analyzed"
}
}
},
"first_name": {
"type": "string",
"fields": {
"na_first_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"last_name": {
"type": "string",
"fields": {
"na_last_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"market": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"middle_name": {
"type": "string",
"fields": {
"na_middle_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"most_recent_organization": {
"properties": {
"description": {
"type": "string"
},
"id": {
"type": "long"
},
"name": {
"type": "string"
},
"parameterized": {
"type": "string"
},
"phone": {
"type": "string"
}
}
},
"name": {
"type": "string",
"fields": {
"na_name": {
"type": "string",
"index": "not_analyzed"
},
"ngram_name": {
"type": "string",
"analyzer": "my_start"
},
"ns_name": {
"type": "string",
"analyzer": "no_stopwords"
}
}
},
"organizations": {
"properties": {
"name": {
"type": "string"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
}
}
},
"package": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"phone": {
"type": "string"
},
"photo": {
"properties": {
"large": {
"type": "string"
},
"medium": {
"type": "string"
},
"teaser": {
"type": "string"
},
"thumb": {
"type": "string"
},
"url": {
"type": "string"
}
}
},
"projects": {
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"public_offices": {
"type": "nested",
"properties": {
"email": {
"type": "string",
"index": "not_analyzed"
},
"employment_status": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "long"
}
}
},
"published": {
"type": "string",
"index": "not_analyzed"
},
"region": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"resource": {
"type": "string"
},
"short_description": {
"type": "string"
},
"show_path": {
"type": "string"
},
"time": {
"type": "date",
"format": "dateOptionalTime"
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
}
Document I am using to test
[
{
"_index": "pl_people",
"_type": "person",
"_id": "813106",
"_score": null,
"sort": [
-9223372036854775808
],
"resource": "Person",
"parameterized": "813106-aaron-mcguire",
"created_at": "2011-10-29T19:51:24.000-05:00",
"updated_at": "2014-12-11T07:21:08.000-06:00",
"name": "Aaron McGuire",
"title": null,
"photo": {
"url": "/assets/140x140.gif"
},
"description": null,
"short_description": null,
"time": "2014-12-11",
"show_path": "/people/813106-aaron-mcguire",
"published": true,
"aliases": [],
"phone": "813-689-6889",
"date_of_birth": "1991-03-01",
"first_name": "Aaron",
"middle_name": "",
"last_name": "McGuire",
"delimiters": [],
"market": null,
"region": null,
"most_recent_organization": null,
"households": [],
"court_cases": [],
"addresses": [
{
"id": 1,
"parameterized": "1",
"address": "123 Scarborough road",
"zip": "L5A2A9",
"city": "Ontario",
"country": "USA",
"state": "California",
"location": null,
"state_id": null,
"county_id": null,
"city_id": null
}
],
"projects": [],
"voter_ids": [],
"id": "813106"
}
]
Finally, I was able to reproduce the issue, Looks like "include_in_parent": true, missed in your mapping, due to which I was getting the error mentioned In my comment..
To fix, the issue I used the same mapping provided you but add "include_in_parent": true, at the top level of address nested field. For brevity providing only the address part of modified mapping.
"addresses": {
"type": "nested",
"include_in_parent": true, --> added only this in your mapping.
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string",
"index": "not_analyzed"
},
"city_id": {
"type": "long"
},
"country": {
"type": "string",
"index": "not_analyzed"
},
"county": {
"type": "string",
"index": "not_analyzed"
},
"county_id": {
"type": "long"
},
"id": {
"type": "long"
},
"location": {
"type": "geo_point"
},
"parameterized": {
"type": "string"
},
"state": {
"type": "string",
"index": "not_analyzed"
},
"state_id": {
"type": "long"
},
"zip": {
"type": "string",
"index": "not_analyzed"
}
}
}
After that indexed some sample docs, which have different values of address and name param, to verify the right outcome of my search query.
And final search query which includes both the filter, please note, I removed the nested part on address field as it's included at the parent document, so you can directly query now on it. This query looks much simple and now gives the expected result.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": [{
"query": {
"match_phrase": {
"address": "scarborough"
}
}
},
{
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}]
}
}
}
Output on sample docs
"hits": [
{
"_index": "so-match-phrase",
"_type": "pl_people",
"_id": "4",
"_score": 0.19178301,
"_source": {
"resource": "Person",
"parameterized": "813106-aaron-mcguire",
"created_at": "2011-10-29T19:51:24.000-05:00",
"updated_at": "2014-12-11T07:21:08.000-06:00",
"name": "aaron McGuire",
"title": null,
"photo": {
"url": "/assets/140x140.gif"
},
"description": null,
"short_description": null,
"time": "2014-12-11",
"show_path": "/people/813106-aaron-mcguire",
"published": true,
"aliases": [],
"phone": "813-689-6889",
"date_of_birth": "1991-03-01",
"first_name": "Aaron",
"middle_name": "",
"last_name": "McGuire",
"delimiters": [],
"market": null,
"region": null,
"most_recent_organization": null,
"households": [],
"court_cases": [],
"addresses": [
{
"id": 1,
"parameterized": "1",
"address": "Scarborough road",
"zip": "L5A2A9",
"city": "Ontario",
"country": "USA",
"state": "California",
"location": null,
"state_id": null,
"county_id": null,
"city_id": null
}
],
"id": "813106"
}
},
{
"_index": "so-match-phrase",
"_type": "pl_people",
"_id": "1",
"_score": 0.19178301,
"_source": {
"resource": "Person",
"parameterized": "813106-aaron-mcguire",
"created_at": "2011-10-29T19:51:24.000-05:00",
"updated_at": "2014-12-11T07:21:08.000-06:00",
"name": "Aaron McGuire",
"title": null,
"photo": {
"url": "/assets/140x140.gif"
},
"description": null,
"short_description": null,
"time": "2014-12-11",
"show_path": "/people/813106-aaron-mcguire",
"published": true,
"aliases": [],
"phone": "813-689-6889",
"date_of_birth": "1991-03-01",
"first_name": "Aaron",
"middle_name": "",
"last_name": "McGuire",
"delimiters": [],
"market": null,
"region": null,
"most_recent_organization": null,
"households": [],
"court_cases": [],
"addresses": [
{
"id": 1,
"parameterized": "1",
"address": "123 Scarborough road",
"zip": "L5A2A9",
"city": "Ontario",
"country": "USA",
"state": "California",
"location": null,
"state_id": null,
"county_id": null,
"city_id": null
}
],
"id": "813106"
}
}
]

Elasticsearch MapperParsingException

I am trying to index following data to elasticsearch,
{
"_id": "5619578c1983757a72efef15",
"aseg": {},
"cs": {
"source": "None",
"ss": "In Transit",
"sr": "Weight Captured",
"act": "+B",
"pid": "BAG21678106",
"st": "UD",
"dest": "Bharatpur_DC (Rajasthan)",
"u": "J",
"sl": "Jaipur_Hub (Rajasthan)",
"ud": "2015-10-12T14:59:44.270000",
"sd": "2015-10-12T14:59:44.270000"
},
"nsl": [
{
"dt": [
2015,
10,
10
],
"code": "X-PPONM"
},
{
"dt": [
2015,
10,
11
],
"code": "X-UCI"
},
]
}
but in return i am getting this error
MapperParsingException[failed to parse [cs.nsl]]; nested: ElasticsearchIllegalArgumentException[unknown property [dt]];
I checked the mapping, mapping is correct, nsl nested inside cs dict has a different mapping than nsl at root level.
"cs": {
"properties": {
"act": {
"type": "string"
},
"add": {
"type": "string"
},
"asr": {
"type": "string"
},
"bucket": {
"type": "string"
},
"dest": {
"type": "string",
"index": "not_analyzed"
},
"dwbn": {
"type": "string"
},
"lcld": {
"type": "string"
},
"lat": {
"type": "string"
},
"lon": {
"type": "string"
},
"loc": {
"type": "double"
},
"nsl": {
"type": "string",
"index": "not_analyzed"
},
"ntd": {
"type": "date",
"format": "dateOptionalTime"
},
"pbs": {
"type": "string"
},
"pid": {
"type": "string"
},
"pupid": {
"type": "string"
},
"sd": {
"type": "date",
"format": "dateOptionalTime"
},
"sl": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"properties": {
"source": {
"type": "string"
},
"source_id": {
"type": "string"
},
"source_type": {
"type": "string"
}
}
},
"sr": {
"type": "string"
},
"ss": {
"type": "string",
"index": "not_analyzed"
},
"st": {
"type": "string"
},
"u": {
"type": "string",
"index": "not_analyzed"
},
"ud": {
"type": "date",
"format": "dateOptionalTime"
},
"vh": {
"type": "string"
}
}
},
and for nsl at root level mapping is as follow
"nsl": {
"properties" : {
"code" : {
"type" : "string",
"index": "not_analyzed"
},
"dt" : {
"type" : "string",
"index": "not_analyzed"
}
}
},
this is happening for only a few records, rest all are syncing fine.
there isn't any changes in payload.
Futher nsl is a sparse key inside cs.
In your mapping nsl is as follows -
"nsl": {
"type": "string",
"index": "not_analyzed"
},
As per mapping , Elasticsearch is expecting a concrete string value to the nsl field but its a object array in the document you have provided.
Elasticsearch once it has a mapping , its definite. You cant insert an object data into a string field.
I tried your document without pre-setting any mapping as follows:
{
"aseg": {},
"cs": {
"source": "None",
"ss": "In Transit",
"sr": "Weight Captured",
"act": "+B",
"pid": "BAG21678106",
"st": "UD",
"dest": "Bharatpur_DC (Rajasthan)",
"u": "J",
"nsl":"foo",
"sl": "Jaipur_Hub (Rajasthan)",
"ud": "2015-10-12T14:59:44.270000",
"sd": "2015-10-12T14:59:44.270000"
},
"nsl": [
{
"dt": [
2015,
10,
10
],
"code": "X-PPONM"
},
{
"dt": [
2015,
10,
11
],
"code": "X-UCI"
}
]
}
And the ES created the mapping as follows:
"nsl": {
"properties": {
"dt": {
"type": "long"
},
"code": {
"type": "string"
}
}
}
As you can see ES put the "dt" type as "long" which is the internal representation of a date type. So, may be need to change that type?
Also, without seeing the successful document it is difficult to guess but I believe those documents do not have the "dt" field value.
Of course, you are free to put "not_analyzed" as you see fit for any field.

Elastic Search queries not working with curl

Running the command:
curl -XGET http://127.0.0.1:9200/30556/_search -d '{
"query": {
"constant_score" : {
"filter" : {
"term" : { "portal_type" : "Folder"}
}
}
}
}'
yields 0 results. The output is:
{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}
If fact, I can't get any queries to yield results.
However, when I run the same query using the head plugin, it works fine.
I'm on elasticsearch 0.20.2 on Mac OS X. I'm starting elastic search using the command:
bin/elasticsearch -f
Is there something obvious I'm missing? Seems like I have the correct syntax and I don't get any errors.
Mapping:
{
"30556": {
"portal_catalog": {
"properties": {
"CreationDate": {
"type": "date",
"format": "dateOptionalTime"
},
"Creator": {
"type": "string"
},
"Date": {
"type": "date",
"format": "dateOptionalTime"
},
"Description": {
"type": "string"
},
"ModificationDate": {
"type": "date",
"format": "dateOptionalTime"
},
"SearchableText": {
"type": "string"
},
"Title": {
"type": "string"
},
"Type": {
"type": "string"
},
"UID": {
"type": "string"
},
"allowedRolesAndUsers": {
"type": "string"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"effective": {
"type": "date",
"format": "dateOptionalTime"
},
"effectiveRange": {
"dynamic": "true",
"properties": {
"effectiveRange1": {
"type": "date",
"format": "dateOptionalTime"
},
"effectiveRange2": {
"type": "date",
"format": "dateOptionalTime"
}
}
},
"exclude_from_nav": {
"type": "boolean"
},
"expires": {
"type": "date",
"format": "dateOptionalTime"
},
"getId": {
"type": "string"
},
"getObjPositionInParent": {
"type": "long"
},
"getObjSize": {
"type": "string"
},
"id": {
"type": "string"
},
"is_default_page": {
"type": "boolean"
},
"is_folderish": {
"type": "boolean"
},
"listCreators": {
"type": "string"
},
"meta_type": {
"type": "string"
},
"modified": {
"type": "date",
"format": "dateOptionalTime"
},
"object_provides": {
"type": "string"
},
"path": {
"dynamic": "true",
"properties": {
"depth": {
"type": "long"
},
"path": {
"type": "string"
}
}
},
"portal_type": {
"type": "string"
},
"review_state": {
"type": "string"
},
"sortable_title": {
"type": "string"
},
"total_comments": {
"type": "long"
}
}
}
}
}
Example Indexed Document:
{
"_index": "30556",
"_type": "portal_catalog",
"_id": "30613",
"_score": 1,
"_source": {
"sortable_title": "news",
"exclude_from_nav": false,
"meta_type": "ATFolder",
"Date": "2013-01-14T09:24:56-06:00",
"CreationDate": "2013-01-14T09:24:56-06:00",
"path": {
"depth": 2,
"path": "/el/news"
},
"allowedRolesAndUsers": [
"Anonymous"
],
"portal_type": "Folder",
"id": "news",
"UID": "3116b6c7ec384a9393f238fdde778612",
"expires": "2499-12-31T00:00:00-06:00",
"Subject": [],
"is_folderish": true,
"is_default_page": false,
"effectiveRange": {
"effectiveRange1": "1000-01-01T00:00:00-06:00",
"effectiveRange2": "2499-12-31T00:00:00-06:00"
},
"commentators": [],
"created": "2013-01-14T09:24:56-06:00",
"getRawRelatedItems": [],
"cmf_uid": [],
"Creator": "admin",
"end": [],
"modified": "2013-01-14T09:24:56-06:00",
"Description": "Site News",
"ModificationDate": "2013-01-14T09:24:56-06:00",
"total_comments": 0,
"in_reply_to": [],
"getIcon": "",
"effective": "1000-01-01T00:00:00-06:00",
"SearchableText": "news News Site News ",
"getObjPositionInParent": 61,
"object_provides": [
"collective.syndication.interfaces.ISyndicatable",
"Products.ATContentTypes.interfaces.folder.IATFolder",
"Products.CMFCore.interfaces._content.IContentish",
"z3c.relationfield.interfaces.IHasIncomingRelations",
"webdav.interfaces.IWriteLock"
],
"last_comment_date": null,
"review_state": "published",
"start": [],
"Type": "Folder",
"listCreators": [
"admin"
],
"getId": "news",
"getObjSize": "1 kB",
"Title": "News"
}
Try to use lower case index names.
Does it work?
If not, can you provide your indexed document and mapping if any?
UPDATE:
You use default analyzer so your field is broken into tokens which are lowercased.
A TermFilter is not analyzed So it does not match.
You can lowercase you TermFilter or use a MatchQuery which is analyzed or change your mapping and set the field to not_analyzed.

Resources