How to transform a json to flat using JOLT? - apache-nifi

I just need to make the attributes element to be on the same level with the id.
I am just having an issue to copy the attributes to be on the same level.
This is my sample JSON
{
"data" : [ {
"type" : "types",
"id" : "CYY7",
"attributes" : {
"description" : null,
"color" : "#dfc12d",
"duration" : 15,
"created_at" : "2017-01-10T04:51:22Z",
"updated_at" : "2017-01-10T04:51:22Z",
"slug" : "15min",
"active" : false,
"location" : null
}
}, {
"type" : "types",
"id" : "BGER",
"attributes" : {
"description" : null,
"color" : "#8989fc",
"duration" : 30,
"created_at" : "2017-01-10T04:51:22Z",
"updated_at" : "2017-01-10T04:51:22Z",
"slug" : "30min",
"active" : true,
"location" : null
}
}
This is my sample transformation.
[{
"operation": "shift",
"spec": {
"data": {
"*": {
"id": "event_type[&1].id",
"type": "event_type[&1].type",
"attributes": "event_type[&1].attributes[&1].description"
}
}
}
}]
The desired output would be
{
"event_type" : [ {
"type" : "types",
"id" : "CYY7",
"description" : null,
"color" : "#dfc12d",
"duration" : 15,
}, {
"type" : "types",
"id" : "BGER",
"description" : null,
"color" : "#8989fc",
"duration" : 30,
}]
}

Try with this Jolt Spec:
[{
"operation": "shift",
"spec": {
"data": {
"*": {
"id": "event_type[&1].id",
"type": "event_type[&1].type",
"attributes": {
"description": "event_type[&2].description",
"color": "event_type[&2].color",
"duration": "event_type[&2].duration"
}
}
}
}
}]
Output:
{
"event_type" : [ {
"id" : "CYY7",
"type" : "types",
"description" : null,
"color" : "#dfc12d",
"duration" : 15
}, {
"id" : "BGER",
"type" : "types",
"description" : null,
"color" : "#8989fc",
"duration" : 30
} ]
}

Related

Elasticsearch aggregation based on nested terms

I have es documents as below
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "oNJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T22:33:30.923914+00:00",
"user_id" : 74780,
"user_name" : "Steven-c96b54",
"first_name" : "Steven",
"last_name" : "Lu",
"email" : "stevenlu5a#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Shanghai",
"state" : "Shanghai",
"address" : "Shanghai, Shanghai, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Food & Beverage",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "12-15",
"current_organizational_level" : "property",
"base_salary" : 123,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "Rising Mogul",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Assistant Director of F&B",
"seeking_position" : [
{
"seeking_position" : "Food & Beverage General Manager"
},
{
"seeking_position" : "Director of Food & Beverage"
},
{
"seeking_position" : "General Manager"
}
],
"current_location" : [
{
"city" : "Shanghai",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 123,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Other",
"company" : "Other"
},
{
"brand" : "Grand Hyatt",
"company" : "Hyatt Hotel Corporation"
},
{
"brand" : "St Regis",
"company" : "Marriott"
},
{
"brand" : "Shangri-La",
"company" : "Other Lifestyle - Luxury"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Upper-Upscale"
},
{
"brand_segment" : "Luxury"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "odJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T21:32:43.813128+00:00",
"user_id" : 74779,
"user_name" : "Steven-8d832e",
"first_name" : "Steven",
"last_name" : "Liu",
"email" : "stevenliu5a#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Shandong",
"state" : "Shandong",
"address" : "Qingdao, Shandong, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Rooms",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "9-12",
"current_organizational_level" : "corporate",
"base_salary" : 120,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "",
"seeking_position" : [
{
"seeking_position" : "Director of Room Operations"
},
{
"seeking_position" : "General Manager"
},
{
"seeking_position" : "Director of Rooms"
}
],
"current_location" : [
{
"city" : "Shandong",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 120,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Holiday Inn Hotels & Resorts",
"company" : "InterContinental Hotels Group"
},
{
"brand" : "Pullman",
"company" : "Accor"
},
{
"brand" : "Other",
"company" : "Other"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Midscale"
},
{
"brand_segment" : "Upper-Upscale"
},
{
"brand_segment" : "Upscale"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "otJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T22:33:25.563336+00:00",
"user_id" : 74778,
"user_name" : "Steven-706c40",
"first_name" : "Steven",
"last_name" : "Liu",
"email" : "stevenliu47#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Shanghai",
"state" : "Shanghai",
"address" : "Shanghai, Shanghai, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Sales & Marketing",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "9-12",
"current_organizational_level" : "property",
"base_salary" : 130,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "Rising Mogul",
"languages" : [
{
"language" : "English"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Account Director - MICE & Leisure",
"seeking_position" : [
{
"seeking_position" : "Director of Sales & Marketing"
},
{
"seeking_position" : "Regional Director of Sales"
},
{
"seeking_position" : "Director of Sales Strategy & Planning"
}
],
"current_location" : [
{
"city" : "Shanghai",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 130,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Hilton Hotels & Resorts",
"company" : "Hilton"
},
{
"brand" : "Westin",
"company" : "Marriott"
},
{
"brand" : "Swissotel",
"company" : "Accor"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Upscale"
},
{
"brand_segment" : "Upper-Upscale"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "o9JLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T22:46:07.919661+00:00",
"user_id" : 74777,
"user_name" : "Steven-513bc8",
"first_name" : "Steven",
"last_name" : "Li",
"email" : "stevenli77#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Jiangsu",
"state" : "Jiangsu",
"address" : "Suzhou, Jiangsu, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Rooms",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "6-9",
"current_organizational_level" : "property",
"base_salary" : 140,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Quality Manager",
"seeking_position" : [
{
"seeking_position" : "Director of Rooms"
},
{
"seeking_position" : "General Manager"
},
{
"seeking_position" : "Director of Room Operations"
}
],
"current_location" : [
{
"city" : "Jiangsu",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 140,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "InterContinental Hotels Group",
"company" : "InterContinental Hotels Group"
},
{
"brand" : "W Hotels",
"company" : "Marriott"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Luxury"
},
{
"brand_segment" : "Upper-Upscale"
},
{
"brand_segment" : "Upscale"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "pNJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T21:32:40.526078+00:00",
"user_id" : 74776,
"user_name" : "Steven-be8fa6",
"first_name" : "Steven",
"last_name" : "Li",
"email" : "stevenli2b#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Beijing",
"state" : "Beijing",
"address" : "Beijing, Beijing, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Sales & Marketing",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "3-6",
"current_organizational_level" : "property",
"base_salary" : 150,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Asst. Director of Sales",
"seeking_position" : [
{
"seeking_position" : "Regional Director of Sales"
},
{
"seeking_position" : "Director of Sales & Marketing"
},
{
"seeking_position" : "Director of Sales Strategy & Planning"
}
],
"current_location" : [
{
"city" : "Beijing",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 150,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Shangri-La",
"company" : "Other Lifestyle - Luxury"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Luxury"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
}
Previously I was aggregating on nested compensation_seeking.compensation with term aggregation on title_rank.keyword as below
GET candidate/_search
{
"aggs": {
"comp": {
"terms": {
"field": "title_rank.keyword",
"size": 50000
},
"aggs": {
"level": {
"nested": {
"path": "compensation_seeking"
},
"aggs": {
"level": {
"avg": {
"field": "compensation_seeking.compensation"
}
}
}
}
}
}
}
}
But now I desire to modify this aggregation on basis of seeking_position.seeking_position.keyword, that is instead of title rank I am looking for average compensation candidate is looking for based on his seeking_position.
I tried replacing title_rank.keyword with seeking_position.seeking_position.keyword but it returns empty bucket.
I also tried something like this but is of no help
GET candidate/_search
{
"aggs": {
"comp": {
"nested": {
"path": "seeking_position"
},
"aggs": {
"level": {
"terms": {
"field": "seeking_position.seeking_position.keyword"
}
},
"aggs": {
"level": {
"nested": {
"path": "compensation_seeking"
},
"aggs": {
"level": {
"avg": {
"field": "compensation_seeking.compensation"
}
}
}
}
}
}
}
}
}
Kindly suggest
{
"aggs": {
"comp": {
"nested": {
"path": "seeking_position"
},
"aggs": {
"seek_pos": {
"terms": {
"field": "seeking_position.seeking_position.keyword",
"exclude": "",
"size": 50000
},
"aggs": {
"level": {
"nested": {
"path": "compensation_seeking"
},
"aggs": {
"level": {
"avg": {
"field": "compensation_seeking.compensation"
}
}
}
}
}
}
}
}
}
}

Possibility to only get one nested object from another nested object

I have an index with shop orders. Each document represents one order. In short my mapping looks like this:
{
"properties": {
"id": {
"type": "keyword"
},
"shopid": {
"type": "keyword"
},
"orderarticles": {
"type": "nested",
"properties": {
"orderid": {"type": "keyword"},
"artid": {"type": "keyword"},
"categories": {
"type": "nested",
"properties": {
"catid": {"type": "keyword"},
"pos": {"type": "integer"}
}
}
}
}
}
I want to get only the category with the smallest pos value, per orderarticle
Any Idea how to get those ?
I tried with a construct of nested and reverse nested aggregations but this did not brought me a solution.
Maybe I just need a hint :-)
EDIT:
Sample Doc:
"_source" : {
"id" : "0461a3310615643a1ffb4e3842c10c66",
"shopid" : "2",
"userid" : "ef7316c9f9cf3d12143aae63c43401c2",
"orderdate" : "2020-05-19T18:53:19+02:00",
"ordernr" : "999999",
"ramandantid" : "1",
"billemail" : "test#test.de",
"billcountryid" : "a7c40f631fc920687.20179984",
"delcountryid" : "",
"totalnetsum" : "132",
"delcost" : "11.9",
"discount" : "0",
"currency" : "EUR",
"currate" : "1",
"orderarticles" : [
{
"orderid" : "0461a3310615643a1ffb4e3842c10c66",
"artid" : "47eaed3db45b3685b58f70fdfc5d70dd",
"artnum" : "60.0605.8",
"title" : "Test Article",
"amount" : "3",
"pic1" : "r6006058-01.jpg",
"nprice" : "44",
"netprice" : "132",
"convertedtotalnetprice" : "132",
"convertednetprice" : "44",
"categories" : [
{
"catid" : "085328abacf9366f60728bff31a310df",
"pos" : "75"
},
{
"catid" : "8c7e45d5fc6bdde76deaeca4f4eabbb9",
"pos" : "14"
},
{
"catid" : "73afd2bd79791f9761731c2e157e06c1",
"pos" : "37"
}
]
}
],
"billcountryisoalpha2" : "DE",
"billcountrytitle" : "Germany",
"delcountryisoalpha2" : null,
"delcountrytitle" : null,
"exchangerate" : 1,
"convertedtotalnetsum" : "132"
}
}
"_source" : {
"id" : "0461a3310615643a1ffb4e3842c10c66",
"shopid" : "2",
"userid" : "ef7316c9f9cf3d12143aae63c43401c2",
"orderdate" : "2020-05-19T18:53:19+02:00",
"ordernr" : "999999",
"ramandantid" : "1",
"billemail" : "test#test.de",
"billcountryid" : "a7c40f631fc920687.20179984",
"delcountryid" : "",
"totalnetsum" : "132",
"delcost" : "11.9",
"discount" : "0",
"currency" : "EUR",
"currate" : "1",
"orderarticles" : [
{
"orderid" : "0461a3310615643a1ffb4e3842c10c66",
"artid" : "47eaed3db45b3685b58f70fdfc5d70dd",
"artnum" : "60.0605.8",
"title" : "Test Article",
"amount" : "3",
"pic1" : "r6006058-01.jpg",
"nprice" : "156",
"netprice" : "132",
"convertedtotalnetprice" : "132",
"convertednetprice" : "44",
"categories" : [
{
"catid" : "another category id",
"pos" : "75"
},
{
"catid" : "8c7e45d5fc6bdde76deaeca4f4eabbb9",
"pos" : "14"
},
{
"catid" : "another category id",
"pos" : "37"
}
]
},
{
"orderid" : "0461a3310615643a1ffb4e3842c10c66",
"artid" : "another article id",
"artnum" : "60.0605.7",
"title" : "Test Article 2",
"amount" : "3",
"pic1" : "r6006058-01.jpg",
"nprice" : "200",
"netprice" : "600",
"convertedtotalnetprice" : "600",
"convertednetprice" : "200",
"categories" : [
{
"catid" : "another category id",
"pos" : "10"
},
{
"catid" : "8c7e45d5fc6bdde76deaeca4f4eabbb9",
"pos" : "100"
},
{
"catid" : "another category id",
"pos" : "37"
}
]
}
],
"billcountryisoalpha2" : "DE",
"billcountrytitle" : "Germany",
"delcountryisoalpha2" : null,
"delcountrytitle" : null,
"exchangerate" : 1,
"convertedtotalnetsum" : "132"
}
}

Elasticsearch ngram tokenizer returns all results regardless of query input

I am trying to build a query to search for records in the following format: TR000002_1_2020.
Users should be able to search for results the following ways:
TR000002 or 2_1_2020 or TR000002_1_2020 or 2020. I figured an ngram tokenization query would be best suited for my needs. I am using Elasticsearch 6.8 so I cannot use the built in Search-As-You-Type introduced in E7.
Here's my implementation I followed from docs here. The only thing I modified was EdgeNGram -> NGram as the user can search from any point of the text.
My Analysis block looks like this:
.Analysis(a => a
.Analyzers(aa => aa
.Custom("autocomplete", ca => ca
.Tokenizer("autocomplete")
.Filters(new string[] {
"lowercase"
})
)
.Custom("autocomplete_search", ca => ca
.Tokenizer("lowercase")
)
)
.Tokenizers(t => t
.NGram("autocomplete", e => e
.MinGram(2)
.MaxGram(16)
.TokenChars(new TokenChar[] {
TokenChar.Letter,
TokenChar.Digit,
TokenChar.Punctuation,
TokenChar.Symbol
})
)
)
)
Then in my mapping I define:
.Text(t => t
.Name(tr => tr.TestRecordId)
.Analyzer("autocomplete")
.SearchAnalyzer("autocomplete_search")
)
When I search for TR000002, my query returns all results instead of just the records that contain those specific characters. What am I doing wrong? Is there a better tokenizer for this specific use case? Thanks!
EDIT: Here's a sample of what is returned:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 27,
"max_score" : 0.105360515,
"hits" : [
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "3",
"_score" : 0.105360515,
"_source" : {
"id" : 3,
"testRecordId" : "TR000002_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 12,
"testStatus" : {
"testStatusId" : 12,
"name" : "Complete: Postponed Until Further Notice"
},
"discriminator" : "SingleEventEffectsRecord",
"testRecordServiceOrders" : [
{
"testRecordId" : 3,
"serviceOrderId" : 9,
"serviceOrder" : {
"serviceOrderId" : 9,
"serviceOrderNumber" : "105702"
}
}
],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
},
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"sEETestRates" : [ ]
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "11",
"_score" : 0.105360515,
"_source" : {
"id" : 11,
"testRecordId" : "TR000011_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"partLDC" : "12",
"waferLot" : "1",
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"testStartDate" : "2020-07-30T00:00:00",
"actualCompletionDate" : "2020-07-31T00:00:00"
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "17",
"_score" : 0.105360515,
"_source" : {
"id" : 17,
"testRecordId" : "TR000017_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "lewallen",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false
}
},
Also here's what shows for mapping:
"testRecordId" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
I guess I should also mention, I've been testing this query in the console like so:
GET test-records-development/_search
{
"query": {
"match": {
"testRecordId": {
"query": "TR000002_1_2020"
}
}
}
}
EDIT 2: Added API response from index _settings endpoint:
{
"test-records-development-09-09-2020-02-00-00" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "test-records-development-09-09-2020-02-00-00",
"creation_date" : "1599617013874",
"analysis" : {
"analyzer" : {
"autocomplete" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "autocomplete"
},
"autocomplete_search" : {
"type" : "custom",
"tokenizer" : "lowercase"
}
},
"tokenizer" : {
"autocomplete" : {
"token_chars" : [
"letter",
"digit",
"punctuation",
"symbol"
],
"min_gram" : "2",
"type" : "ngram",
"max_gram" : "16"
}
}
},
"number_of_replicas" : "0",
"uuid" : "FSeCa0YwRCOJVbjfxYGkig",
"version" : {
"created" : "6080199"
}
}
}
}
}
As I don't have the analyzer setting access in JSON format,I can't confirm it but most probably issue is with your search analyzer autocomplete_search which is creating search time tokens which are matching the index time tokens.
For example: you are searching for TR000002_1_2020 and if it creates 2020 as a token and for document containing TR000011_1_2020 also creates a 2020 token than your query will match it.
You can use the analyze API to check the generated tokens based on a analyzer and as mentioned earlier mostly there is some tokens which are matching as shown above.

Import only arrays that match conditions in Elastic Search

I'm dealing with nested nested data in ElasticSearch.
I want it to work like a SELECT * from where in an RDBMS.
If you have the following data
POST test-stack/test/1234_5678
{
"Id" : 1234,
"availables":
[
{
"Id" : 4444,
"date" : "2019-09-10",
"time" : [
{
"dateTime" : "2019-09-10T09:30:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:00:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:30:00+09:00",
"Count" : 50
}
]
},
{
"Id" : 5555,
"date" : "2019-09-11",
"time" : [
{
"dateTime" : "2019-09-11T09:30:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-11T10:00:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-11T10:30:00+09:00",
"Count" : 50
}
]
},
{
"Id" : 6666,
"date" : "2019-09-12",
"time" : [
{
"dateTime" : "2019-09-12T09:30:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-12T10:00:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-12T10:30:00+09:00",
"Count" : 50
}
]
}
]
}
If I do this,
Select * from test t where t.availables.date == '2019-09-10';
So, I want to get this answer,
"Id" : 4444,
"date" : "2019-09-10",
"time" : [
{
"dateTime" : "2019-09-10T09:30:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:00:00+09:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:30:00+09:00",
"Count" : 50
}
]
}
I'm a beginner in Elastic Search and I wonder if this is possible in Elastic Search.
I've studied painless scripts but still don't know.
You need to use nested query and inner hits.
Nested query will help you to filter on nested field and inner hits will return matching nested document
Mapping:
PUT testindex11/_mapping
{
"properties": {
"Id": {
"type": "text"
},
"availables": {
"type": "nested",
"properties": {
"Id": {
"type": "text"
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
},
"time":{
"type": "nested",
"properties": {
"dateTime" :{
"type":"date",
"format":"yyyy-MM-dd'T'HH:mm:ss"
},
"count":{
"type":"integer"
}
}
}
}
}
}
}
Query:
GET testindex11/_search
{
"query": {
"nested": {
"path": "availables",
"query": {
"term": {
"availables.date": {
"value": "2019-09-10"
}
}
},
"inner_hits": {}
}
}
}
Result:
[
{
"_index" : "testindex11",
"_type" : "_doc",
"_id" : "PXuHQm0B4boMRQnoJOpR",
"_score" : 1.0,
"_source" : {
"Id" : 1234,
"availables" : [
{
"Id" : 4444,
"date" : "2019-09-10",
"time" : [
{
"dateTime" : "2019-09-10T09:30:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:00:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:30:00",
"Count" : 50
}
]
},
{
"Id" : 5555,
"date" : "2019-09-11",
"time" : [
{
"dateTime" : "2019-09-11T09:30:00",
"Count" : 50
},
{
"dateTime" : "2019-09-11T10:00:00",
"Count" : 50
},
{
"dateTime" : "2019-09-11T10:30:00",
"Count" : 50
}
]
},
{
"Id" : 6666,
"date" : "2019-09-12",
"time" : [
{
"dateTime" : "2019-09-12T09:30:00",
"Count" : 50
},
{
"dateTime" : "2019-09-12T10:00:00",
"Count" : 50
},
{
"dateTime" : "2019-09-12T10:30:00",
"Count" : 50
}
]
}
]
},
"inner_hits" : {
"availables" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "testindex11",
"_type" : "_doc",
"_id" : "PXuHQm0B4boMRQnoJOpR",
"_nested" : {
"field" : "availables",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"Id" : 4444,
"date" : "2019-09-10",
"time" : [
{
"dateTime" : "2019-09-10T09:30:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:00:00",
"Count" : 50
},
{
"dateTime" : "2019-09-10T10:30:00",
"Count" : 50
}
]
}
}
]
}
}
}
}
]

Using mongodb $lookup on big documents is slow

I have users_users document with 966.628 entries and orders_orders with 1.419.081 (and above 14.000.000 entries inside orders).
I need to retrieve user's data based on multiple filters (location, birthday, nb orders, nb products bought, etc.) but it never ends. I'm new with mongodb so I probably do bad things and need to learn.
db.users_users.aggregate([{
$match: {
locale: {
$in: ["fr_FR", "fr_BE"]
},
"users_addresses.country_iso2": "FR",
mobile: {
$ne: null
}
}
}, {
$lookup: {
from: "orders_orders",
localField: "_id",
foreignField: "id_user",
as: "orders"
}
}, {
$unwind: "$orders"
}, {
$group: {
"_id": "$_id",
"lastname": {
$first: "$lastname"
},
"firstname": {
$first: "$firstname"
},
"email": {
$first: "$email"
},
"date_birth": {
$first: "$date_birth"
},
"locale": {
$first: "$locale"
},
"nb_orders": {
$sum: 1
},
"order_total": {
$sum: "$orders.tax_inclusive_amount"
},
"last_order": {
$max: "$orders.date_creation"
},
"entries": {
$push: "$orders.entries"
},
"countries": {
$addToSet: "$users_addresses.id_country"
},
}
}, {
$unwind: "$entries"
}, {
$unwind: "$entries"
}, {
$group: {
"_id": "$_id",
"lastname": {
$first: "$lastname"
},
"firstname": {
$first: "$firstname"
},
"email": {
$first: "$email"
},
"date_birth": {
$first: "$date_birth"
},
"locale": {
$first: "$locale"
},
"nb_orders": {
$first: "$nb_orders"
},
"order_total": {
$first: "$order_total"
},
"last_order": {
$first: "$last_order"
},
"countries": {
$first: "$countries"
},
"nb_entries": {
$sum: 1
}
}
}, {
$match: {
nb_orders: {
$gt: 1
},
nb_entries: {
$gt: 10
}
}
}])
EDIT:
Indexes, documents and output as requested
users_users indexes
> db.users_users.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"pre_mongified_id" : 1
},
"name" : "pre_mongified_id_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"email" : 1
},
"name" : "email_1",
"ns" : "elf.users_users",
"background" : true
},
{
"v" : 1,
"key" : {
"date_birth" : 1
},
"name" : "date_birth_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"mobile" : 1
},
"name" : "mobile_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"locale" : 1
},
"name" : "locale_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"users_addresses.postal_code" : 1
},
"name" : "users_addresses.postal_code_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"users_addresses.city" : 1
},
"name" : "users_addresses.city_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"users_addresses.country_iso2" : 1
},
"name" : "users_addresses.country_iso2_1",
"ns" : "elf.users_users"
}
]
orders_orders indexes
> db.orders_orders.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "elf.orders_orders"
},
{
"v" : 1,
"key" : {
"pre_mongified_id" : 1
},
"name" : "pre_mongified_id_1",
"ns" : "elf.orders_orders"
},
{
"v" : 1,
"key" : {
"id_user" : 1
},
"name" : "id_user_1",
"ns" : "elf.orders_orders"
},
{
"v" : 1,
"key" : {
"entries.id_target" : 1,
"entries.type" : 1
},
"name" : "entries.id_target_1_entries.type_1",
"ns" : "elf.orders_orders",
"background" : true
},
{
"v" : 1,
"key" : {
"number" : 1
},
"name" : "number_1",
"ns" : "elf.orders_orders"
}
]
users_users sample
> db.users_users.find().limit(2).pretty()
{
"_id" : ObjectId("56c46f6eae6f960fb6f59107"),
"id_civilitytitle" : 2,
"date_creation" : ISODate("2008-09-05T18:17:42Z"),
"date_update" : null,
"firstname" : "xxx",
"lastname" : "YYY",
"email" : "xxx#xxx.fr",
"phone" : "xxxxxxxxxx",
"mobile" : null,
"fax" : "",
"disabled" : false,
"confirmed" : true,
"date_birth" : null,
"locale" : "fr_FR",
"users_addresses" : [
{
"id_country" : ObjectId("56c43401ae6f960fb6000396"),
"name" : "Adresse",
"fullname" : "YYY xxx",
"address1" : "xxx",
"address2" : null,
"city" : "xxx",
"postal_code" : "11610",
"country_iso2" : "FR"
}
]
}
{
"_id" : ObjectId("56c46f6eae6f960fb6f59108"),
"id_civilitytitle" : 2,
"date_creation" : ISODate("2008-09-06T14:38:59Z"),
"date_update" : null,
"firstname" : "aaa",
"lastname" : zzz",
"email" : "xxx#xxx.fr",
"phone" : "xx xx xx xx xx",
"mobile" : null,
"fax" : "",
"disabled" : false,
"confirmed" : true,
"date_birth" : null,
"locale" : "fr_FR",
"users_addresses" : [
{
"id_country" : ObjectId("56c43401ae6f960fb6000396"),
"name" : "Adresse",
"fullname" : "aaa zzz",
"address1" : "xxx",
"address2" : null,
"city" : "xxx",
"postal_code" : "59180",
"country_iso2" : "FR"
}
]
}
orders_orders sample
> db.orders_orders.find().skip(5).limit(2).pretty()
{
"_id" : ObjectId("56c46ccfae6f960fb6dfe9c3"),
"id_user" : ObjectId("56c46f6eae6f960fb6f59109"),
"date_creation" : ISODate("2008-09-09T08:21:56Z"),
"number" : "c000026",
"tax_inclusive_amount" : 10,
"shipping_fees" : 5.95,
"paid" : null,
"cancelled" : "cancelled",
"locale" : null,
"from_mobile" : false,
"entries" : [
{
"_id" : ObjectId("56c4340dae6f960fb60008b5"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c3"),
"id_target" : 58,
"type" : "reference",
"quantity" : 1,
"reference" : "#4203",
"name" : "XXX",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
},
{
"_id" : ObjectId("56c4340dae6f960fb60008be"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c3"),
"id_target" : 247,
"type" : "reference",
"quantity" : 1,
"reference" : "#1711",
"name" : "XXX",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
}
]
}
{
"_id" : ObjectId("56c46ccfae6f960fb6dfe9c4"),
"id_user" : ObjectId("56c46f6eae6f960fb6f5911d"),
"date_creation" : ISODate("2008-09-09T12:32:40Z"),
"number" : "c000027",
"tax_inclusive_amount" : 15,
"shipping_fees" : 5.95,
"paid" : "paid",
"cancelled" : null,
"locale" : null,
"from_mobile" : false,
"entries" : [
{
"_id" : ObjectId("56c4340dae6f960fb60008bf"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c4"),
"id_target" : 105,
"type" : "reference",
"quantity" : 1,
"reference" : "#9011",
"name" : "XXX",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
},
{
"_id" : ObjectId("56c435b0ae6f960fb614c240"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c4"),
"id_target" : 364,
"type" : "reference",
"quantity" : 1,
"reference" : "#1710",
"name" : "xxx",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
}
]
}
expected output
Multiple data from users_users for each lines (firstname, lastname, email, birth_date, locale, ...)
The reason why the query is slow is because further queries on the documents retrieved with the $lookup operator do not use indexes.
$max: "$orders.date_creation" in particular won't be indexed so it will do a full scan to retrieve this.

Resources