Using mongodb $lookup on big documents is slow - performance

I have users_users document with 966.628 entries and orders_orders with 1.419.081 (and above 14.000.000 entries inside orders).
I need to retrieve user's data based on multiple filters (location, birthday, nb orders, nb products bought, etc.) but it never ends. I'm new with mongodb so I probably do bad things and need to learn.
db.users_users.aggregate([{
$match: {
locale: {
$in: ["fr_FR", "fr_BE"]
},
"users_addresses.country_iso2": "FR",
mobile: {
$ne: null
}
}
}, {
$lookup: {
from: "orders_orders",
localField: "_id",
foreignField: "id_user",
as: "orders"
}
}, {
$unwind: "$orders"
}, {
$group: {
"_id": "$_id",
"lastname": {
$first: "$lastname"
},
"firstname": {
$first: "$firstname"
},
"email": {
$first: "$email"
},
"date_birth": {
$first: "$date_birth"
},
"locale": {
$first: "$locale"
},
"nb_orders": {
$sum: 1
},
"order_total": {
$sum: "$orders.tax_inclusive_amount"
},
"last_order": {
$max: "$orders.date_creation"
},
"entries": {
$push: "$orders.entries"
},
"countries": {
$addToSet: "$users_addresses.id_country"
},
}
}, {
$unwind: "$entries"
}, {
$unwind: "$entries"
}, {
$group: {
"_id": "$_id",
"lastname": {
$first: "$lastname"
},
"firstname": {
$first: "$firstname"
},
"email": {
$first: "$email"
},
"date_birth": {
$first: "$date_birth"
},
"locale": {
$first: "$locale"
},
"nb_orders": {
$first: "$nb_orders"
},
"order_total": {
$first: "$order_total"
},
"last_order": {
$first: "$last_order"
},
"countries": {
$first: "$countries"
},
"nb_entries": {
$sum: 1
}
}
}, {
$match: {
nb_orders: {
$gt: 1
},
nb_entries: {
$gt: 10
}
}
}])
EDIT:
Indexes, documents and output as requested
users_users indexes
> db.users_users.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"pre_mongified_id" : 1
},
"name" : "pre_mongified_id_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"email" : 1
},
"name" : "email_1",
"ns" : "elf.users_users",
"background" : true
},
{
"v" : 1,
"key" : {
"date_birth" : 1
},
"name" : "date_birth_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"mobile" : 1
},
"name" : "mobile_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"locale" : 1
},
"name" : "locale_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"users_addresses.postal_code" : 1
},
"name" : "users_addresses.postal_code_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"users_addresses.city" : 1
},
"name" : "users_addresses.city_1",
"ns" : "elf.users_users"
},
{
"v" : 1,
"key" : {
"users_addresses.country_iso2" : 1
},
"name" : "users_addresses.country_iso2_1",
"ns" : "elf.users_users"
}
]
orders_orders indexes
> db.orders_orders.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "elf.orders_orders"
},
{
"v" : 1,
"key" : {
"pre_mongified_id" : 1
},
"name" : "pre_mongified_id_1",
"ns" : "elf.orders_orders"
},
{
"v" : 1,
"key" : {
"id_user" : 1
},
"name" : "id_user_1",
"ns" : "elf.orders_orders"
},
{
"v" : 1,
"key" : {
"entries.id_target" : 1,
"entries.type" : 1
},
"name" : "entries.id_target_1_entries.type_1",
"ns" : "elf.orders_orders",
"background" : true
},
{
"v" : 1,
"key" : {
"number" : 1
},
"name" : "number_1",
"ns" : "elf.orders_orders"
}
]
users_users sample
> db.users_users.find().limit(2).pretty()
{
"_id" : ObjectId("56c46f6eae6f960fb6f59107"),
"id_civilitytitle" : 2,
"date_creation" : ISODate("2008-09-05T18:17:42Z"),
"date_update" : null,
"firstname" : "xxx",
"lastname" : "YYY",
"email" : "xxx#xxx.fr",
"phone" : "xxxxxxxxxx",
"mobile" : null,
"fax" : "",
"disabled" : false,
"confirmed" : true,
"date_birth" : null,
"locale" : "fr_FR",
"users_addresses" : [
{
"id_country" : ObjectId("56c43401ae6f960fb6000396"),
"name" : "Adresse",
"fullname" : "YYY xxx",
"address1" : "xxx",
"address2" : null,
"city" : "xxx",
"postal_code" : "11610",
"country_iso2" : "FR"
}
]
}
{
"_id" : ObjectId("56c46f6eae6f960fb6f59108"),
"id_civilitytitle" : 2,
"date_creation" : ISODate("2008-09-06T14:38:59Z"),
"date_update" : null,
"firstname" : "aaa",
"lastname" : zzz",
"email" : "xxx#xxx.fr",
"phone" : "xx xx xx xx xx",
"mobile" : null,
"fax" : "",
"disabled" : false,
"confirmed" : true,
"date_birth" : null,
"locale" : "fr_FR",
"users_addresses" : [
{
"id_country" : ObjectId("56c43401ae6f960fb6000396"),
"name" : "Adresse",
"fullname" : "aaa zzz",
"address1" : "xxx",
"address2" : null,
"city" : "xxx",
"postal_code" : "59180",
"country_iso2" : "FR"
}
]
}
orders_orders sample
> db.orders_orders.find().skip(5).limit(2).pretty()
{
"_id" : ObjectId("56c46ccfae6f960fb6dfe9c3"),
"id_user" : ObjectId("56c46f6eae6f960fb6f59109"),
"date_creation" : ISODate("2008-09-09T08:21:56Z"),
"number" : "c000026",
"tax_inclusive_amount" : 10,
"shipping_fees" : 5.95,
"paid" : null,
"cancelled" : "cancelled",
"locale" : null,
"from_mobile" : false,
"entries" : [
{
"_id" : ObjectId("56c4340dae6f960fb60008b5"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c3"),
"id_target" : 58,
"type" : "reference",
"quantity" : 1,
"reference" : "#4203",
"name" : "XXX",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
},
{
"_id" : ObjectId("56c4340dae6f960fb60008be"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c3"),
"id_target" : 247,
"type" : "reference",
"quantity" : 1,
"reference" : "#1711",
"name" : "XXX",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
}
]
}
{
"_id" : ObjectId("56c46ccfae6f960fb6dfe9c4"),
"id_user" : ObjectId("56c46f6eae6f960fb6f5911d"),
"date_creation" : ISODate("2008-09-09T12:32:40Z"),
"number" : "c000027",
"tax_inclusive_amount" : 15,
"shipping_fees" : 5.95,
"paid" : "paid",
"cancelled" : null,
"locale" : null,
"from_mobile" : false,
"entries" : [
{
"_id" : ObjectId("56c4340dae6f960fb60008bf"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c4"),
"id_target" : 105,
"type" : "reference",
"quantity" : 1,
"reference" : "#9011",
"name" : "XXX",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
},
{
"_id" : ObjectId("56c435b0ae6f960fb614c240"),
"id_order" : ObjectId("56c46ccfae6f960fb6dfe9c4"),
"id_target" : 364,
"type" : "reference",
"quantity" : 1,
"reference" : "#1710",
"name" : "xxx",
"tax_inclusive_price_unit" : 1,
"tax_inclusive_price_total" : 1,
"tax_rates" : "a:1:{i:0;O:38:\"Catalog_Model_References_Container_Tax\":5:{s:7:\"\u0000*\u0000rate\";d:0.196000000000000007549516567451064474880695343017578125;s:7:\"\u0000*\u0000name\";s:6:\"19.60%\";s:7:\"\u0000*\u0000type\";s:32:\"cbf1c9560e4d3dbae5d65339aefed7b0\";s:13:\"\u0000*\u0000proportion\";d:1;s:8:\"\u0000*\u0000value\";N;}}",
"weight" : null
}
]
}
expected output
Multiple data from users_users for each lines (firstname, lastname, email, birth_date, locale, ...)

The reason why the query is slow is because further queries on the documents retrieved with the $lookup operator do not use indexes.
$max: "$orders.date_creation" in particular won't be indexed so it will do a full scan to retrieve this.

Related

Elasticsearch aggregation based on nested terms

I have es documents as below
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "oNJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T22:33:30.923914+00:00",
"user_id" : 74780,
"user_name" : "Steven-c96b54",
"first_name" : "Steven",
"last_name" : "Lu",
"email" : "stevenlu5a#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Shanghai",
"state" : "Shanghai",
"address" : "Shanghai, Shanghai, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Food & Beverage",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "12-15",
"current_organizational_level" : "property",
"base_salary" : 123,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "Rising Mogul",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Assistant Director of F&B",
"seeking_position" : [
{
"seeking_position" : "Food & Beverage General Manager"
},
{
"seeking_position" : "Director of Food & Beverage"
},
{
"seeking_position" : "General Manager"
}
],
"current_location" : [
{
"city" : "Shanghai",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 123,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Other",
"company" : "Other"
},
{
"brand" : "Grand Hyatt",
"company" : "Hyatt Hotel Corporation"
},
{
"brand" : "St Regis",
"company" : "Marriott"
},
{
"brand" : "Shangri-La",
"company" : "Other Lifestyle - Luxury"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Upper-Upscale"
},
{
"brand_segment" : "Luxury"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "odJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T21:32:43.813128+00:00",
"user_id" : 74779,
"user_name" : "Steven-8d832e",
"first_name" : "Steven",
"last_name" : "Liu",
"email" : "stevenliu5a#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Shandong",
"state" : "Shandong",
"address" : "Qingdao, Shandong, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Rooms",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "9-12",
"current_organizational_level" : "corporate",
"base_salary" : 120,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "",
"seeking_position" : [
{
"seeking_position" : "Director of Room Operations"
},
{
"seeking_position" : "General Manager"
},
{
"seeking_position" : "Director of Rooms"
}
],
"current_location" : [
{
"city" : "Shandong",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 120,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Holiday Inn Hotels & Resorts",
"company" : "InterContinental Hotels Group"
},
{
"brand" : "Pullman",
"company" : "Accor"
},
{
"brand" : "Other",
"company" : "Other"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Midscale"
},
{
"brand_segment" : "Upper-Upscale"
},
{
"brand_segment" : "Upscale"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "otJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T22:33:25.563336+00:00",
"user_id" : 74778,
"user_name" : "Steven-706c40",
"first_name" : "Steven",
"last_name" : "Liu",
"email" : "stevenliu47#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Shanghai",
"state" : "Shanghai",
"address" : "Shanghai, Shanghai, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Sales & Marketing",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "9-12",
"current_organizational_level" : "property",
"base_salary" : 130,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "Rising Mogul",
"languages" : [
{
"language" : "English"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Account Director - MICE & Leisure",
"seeking_position" : [
{
"seeking_position" : "Director of Sales & Marketing"
},
{
"seeking_position" : "Regional Director of Sales"
},
{
"seeking_position" : "Director of Sales Strategy & Planning"
}
],
"current_location" : [
{
"city" : "Shanghai",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 130,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Hilton Hotels & Resorts",
"company" : "Hilton"
},
{
"brand" : "Westin",
"company" : "Marriott"
},
{
"brand" : "Swissotel",
"company" : "Accor"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Upscale"
},
{
"brand_segment" : "Upper-Upscale"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "o9JLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T22:46:07.919661+00:00",
"user_id" : 74777,
"user_name" : "Steven-513bc8",
"first_name" : "Steven",
"last_name" : "Li",
"email" : "stevenli77#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Jiangsu",
"state" : "Jiangsu",
"address" : "Suzhou, Jiangsu, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Rooms",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "6-9",
"current_organizational_level" : "property",
"base_salary" : 140,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Quality Manager",
"seeking_position" : [
{
"seeking_position" : "Director of Rooms"
},
{
"seeking_position" : "General Manager"
},
{
"seeking_position" : "Director of Room Operations"
}
],
"current_location" : [
{
"city" : "Jiangsu",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 140,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "InterContinental Hotels Group",
"company" : "InterContinental Hotels Group"
},
{
"brand" : "W Hotels",
"company" : "Marriott"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Luxury"
},
{
"brand_segment" : "Upper-Upscale"
},
{
"brand_segment" : "Upscale"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
},
{
"_index" : "candidate",
"_type" : "_doc",
"_id" : "pNJLMHsBMf41SmWAkvYj",
"_score" : 1.0,
"_source" : {
"created_at" : "2021-07-15",
"updated_at" : "2021-08-05T21:32:40.526078+00:00",
"user_id" : 74776,
"user_name" : "Steven-be8fa6",
"first_name" : "Steven",
"last_name" : "Li",
"email" : "stevenli2b#mougulan.com",
"contact_number" : "",
"country_code" : "cn",
"country" : "China",
"city" : "Beijing",
"state" : "Beijing",
"address" : "Beijing, Beijing, China",
"active_step" : 5,
"is_active" : true,
"is_deleted" : false,
"is_csv_user" : true,
"is_profile_completed" : true,
"profile_completion_percentage" : 0.0,
"gender" : "Female",
"title_rank" : "Director of Sales & Marketing",
"race_ethnicity" : "I do not wish to provide this information",
"years_of_experience" : "3-6",
"current_organizational_level" : "property",
"base_salary" : 150,
"bonus" : "",
"benefits" : [
{
"benefit" : ""
}
],
"badge" : "",
"languages" : [
{
"language" : "Chinese"
}
],
"geographies" : [
{
"geography" : "China"
}
],
"current_position" : "Asst. Director of Sales",
"seeking_position" : [
{
"seeking_position" : "Regional Director of Sales"
},
{
"seeking_position" : "Director of Sales & Marketing"
},
{
"seeking_position" : "Director of Sales Strategy & Planning"
}
],
"current_location" : [
{
"city" : "Beijing",
"country" : "China",
"country_code" : "cn",
"region" : "China"
}
],
"preferred_location" : [
{
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"compensation_seeking" : [
{
"compensation" : 150,
"city" : "",
"country" : "",
"country_code" : "",
"region" : ""
}
],
"brand_experience" : [
{
"brand" : "Shangri-La",
"company" : "Other Lifestyle - Luxury"
}
],
"brand_segment_experience" : [
{
"brand_segment" : "Luxury"
}
],
"real_estate_type_experience" : [
{
"real_estate" : "Government"
}
],
"property_type_experience" : [
{
"property_type" : "Full-Service Hotel"
}
]
}
}
Previously I was aggregating on nested compensation_seeking.compensation with term aggregation on title_rank.keyword as below
GET candidate/_search
{
"aggs": {
"comp": {
"terms": {
"field": "title_rank.keyword",
"size": 50000
},
"aggs": {
"level": {
"nested": {
"path": "compensation_seeking"
},
"aggs": {
"level": {
"avg": {
"field": "compensation_seeking.compensation"
}
}
}
}
}
}
}
}
But now I desire to modify this aggregation on basis of seeking_position.seeking_position.keyword, that is instead of title rank I am looking for average compensation candidate is looking for based on his seeking_position.
I tried replacing title_rank.keyword with seeking_position.seeking_position.keyword but it returns empty bucket.
I also tried something like this but is of no help
GET candidate/_search
{
"aggs": {
"comp": {
"nested": {
"path": "seeking_position"
},
"aggs": {
"level": {
"terms": {
"field": "seeking_position.seeking_position.keyword"
}
},
"aggs": {
"level": {
"nested": {
"path": "compensation_seeking"
},
"aggs": {
"level": {
"avg": {
"field": "compensation_seeking.compensation"
}
}
}
}
}
}
}
}
}
Kindly suggest
{
"aggs": {
"comp": {
"nested": {
"path": "seeking_position"
},
"aggs": {
"seek_pos": {
"terms": {
"field": "seeking_position.seeking_position.keyword",
"exclude": "",
"size": 50000
},
"aggs": {
"level": {
"nested": {
"path": "compensation_seeking"
},
"aggs": {
"level": {
"avg": {
"field": "compensation_seeking.compensation"
}
}
}
}
}
}
}
}
}
}

Possibility to only get one nested object from another nested object

I have an index with shop orders. Each document represents one order. In short my mapping looks like this:
{
"properties": {
"id": {
"type": "keyword"
},
"shopid": {
"type": "keyword"
},
"orderarticles": {
"type": "nested",
"properties": {
"orderid": {"type": "keyword"},
"artid": {"type": "keyword"},
"categories": {
"type": "nested",
"properties": {
"catid": {"type": "keyword"},
"pos": {"type": "integer"}
}
}
}
}
}
I want to get only the category with the smallest pos value, per orderarticle
Any Idea how to get those ?
I tried with a construct of nested and reverse nested aggregations but this did not brought me a solution.
Maybe I just need a hint :-)
EDIT:
Sample Doc:
"_source" : {
"id" : "0461a3310615643a1ffb4e3842c10c66",
"shopid" : "2",
"userid" : "ef7316c9f9cf3d12143aae63c43401c2",
"orderdate" : "2020-05-19T18:53:19+02:00",
"ordernr" : "999999",
"ramandantid" : "1",
"billemail" : "test#test.de",
"billcountryid" : "a7c40f631fc920687.20179984",
"delcountryid" : "",
"totalnetsum" : "132",
"delcost" : "11.9",
"discount" : "0",
"currency" : "EUR",
"currate" : "1",
"orderarticles" : [
{
"orderid" : "0461a3310615643a1ffb4e3842c10c66",
"artid" : "47eaed3db45b3685b58f70fdfc5d70dd",
"artnum" : "60.0605.8",
"title" : "Test Article",
"amount" : "3",
"pic1" : "r6006058-01.jpg",
"nprice" : "44",
"netprice" : "132",
"convertedtotalnetprice" : "132",
"convertednetprice" : "44",
"categories" : [
{
"catid" : "085328abacf9366f60728bff31a310df",
"pos" : "75"
},
{
"catid" : "8c7e45d5fc6bdde76deaeca4f4eabbb9",
"pos" : "14"
},
{
"catid" : "73afd2bd79791f9761731c2e157e06c1",
"pos" : "37"
}
]
}
],
"billcountryisoalpha2" : "DE",
"billcountrytitle" : "Germany",
"delcountryisoalpha2" : null,
"delcountrytitle" : null,
"exchangerate" : 1,
"convertedtotalnetsum" : "132"
}
}
"_source" : {
"id" : "0461a3310615643a1ffb4e3842c10c66",
"shopid" : "2",
"userid" : "ef7316c9f9cf3d12143aae63c43401c2",
"orderdate" : "2020-05-19T18:53:19+02:00",
"ordernr" : "999999",
"ramandantid" : "1",
"billemail" : "test#test.de",
"billcountryid" : "a7c40f631fc920687.20179984",
"delcountryid" : "",
"totalnetsum" : "132",
"delcost" : "11.9",
"discount" : "0",
"currency" : "EUR",
"currate" : "1",
"orderarticles" : [
{
"orderid" : "0461a3310615643a1ffb4e3842c10c66",
"artid" : "47eaed3db45b3685b58f70fdfc5d70dd",
"artnum" : "60.0605.8",
"title" : "Test Article",
"amount" : "3",
"pic1" : "r6006058-01.jpg",
"nprice" : "156",
"netprice" : "132",
"convertedtotalnetprice" : "132",
"convertednetprice" : "44",
"categories" : [
{
"catid" : "another category id",
"pos" : "75"
},
{
"catid" : "8c7e45d5fc6bdde76deaeca4f4eabbb9",
"pos" : "14"
},
{
"catid" : "another category id",
"pos" : "37"
}
]
},
{
"orderid" : "0461a3310615643a1ffb4e3842c10c66",
"artid" : "another article id",
"artnum" : "60.0605.7",
"title" : "Test Article 2",
"amount" : "3",
"pic1" : "r6006058-01.jpg",
"nprice" : "200",
"netprice" : "600",
"convertedtotalnetprice" : "600",
"convertednetprice" : "200",
"categories" : [
{
"catid" : "another category id",
"pos" : "10"
},
{
"catid" : "8c7e45d5fc6bdde76deaeca4f4eabbb9",
"pos" : "100"
},
{
"catid" : "another category id",
"pos" : "37"
}
]
}
],
"billcountryisoalpha2" : "DE",
"billcountrytitle" : "Germany",
"delcountryisoalpha2" : null,
"delcountrytitle" : null,
"exchangerate" : 1,
"convertedtotalnetsum" : "132"
}
}

Elasticsearch ngram tokenizer returns all results regardless of query input

I am trying to build a query to search for records in the following format: TR000002_1_2020.
Users should be able to search for results the following ways:
TR000002 or 2_1_2020 or TR000002_1_2020 or 2020. I figured an ngram tokenization query would be best suited for my needs. I am using Elasticsearch 6.8 so I cannot use the built in Search-As-You-Type introduced in E7.
Here's my implementation I followed from docs here. The only thing I modified was EdgeNGram -> NGram as the user can search from any point of the text.
My Analysis block looks like this:
.Analysis(a => a
.Analyzers(aa => aa
.Custom("autocomplete", ca => ca
.Tokenizer("autocomplete")
.Filters(new string[] {
"lowercase"
})
)
.Custom("autocomplete_search", ca => ca
.Tokenizer("lowercase")
)
)
.Tokenizers(t => t
.NGram("autocomplete", e => e
.MinGram(2)
.MaxGram(16)
.TokenChars(new TokenChar[] {
TokenChar.Letter,
TokenChar.Digit,
TokenChar.Punctuation,
TokenChar.Symbol
})
)
)
)
Then in my mapping I define:
.Text(t => t
.Name(tr => tr.TestRecordId)
.Analyzer("autocomplete")
.SearchAnalyzer("autocomplete_search")
)
When I search for TR000002, my query returns all results instead of just the records that contain those specific characters. What am I doing wrong? Is there a better tokenizer for this specific use case? Thanks!
EDIT: Here's a sample of what is returned:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 27,
"max_score" : 0.105360515,
"hits" : [
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "3",
"_score" : 0.105360515,
"_source" : {
"id" : 3,
"testRecordId" : "TR000002_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 12,
"testStatus" : {
"testStatusId" : 12,
"name" : "Complete: Postponed Until Further Notice"
},
"discriminator" : "SingleEventEffectsRecord",
"testRecordServiceOrders" : [
{
"testRecordId" : 3,
"serviceOrderId" : 9,
"serviceOrder" : {
"serviceOrderId" : 9,
"serviceOrderNumber" : "105702"
}
}
],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
},
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"sEETestRates" : [ ]
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "11",
"_score" : 0.105360515,
"_source" : {
"id" : 11,
"testRecordId" : "TR000011_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"partLDC" : "12",
"waferLot" : "1",
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false,
"testStartDate" : "2020-07-30T00:00:00",
"actualCompletionDate" : "2020-07-31T00:00:00"
}
},
{
"_index" : "test-records-development-09-09-2020-02-00-00",
"_type" : "testrecorddto",
"_id" : "17",
"_score" : 0.105360515,
"_source" : {
"id" : 17,
"testRecordId" : "TR000017_1_2020",
"type" : 0,
"typeName" : "TIDCo60",
"missionId" : 1,
"mission" : {
"missionId" : 1,
"name" : "[REDACTED]",
"mRPLUsername" : "[REDACTED]",
"missionRadiationPartsLead" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"missionInstruments" : [
{
"missionId" : 1,
"instrumentId" : 1,
"cognizantEngineerUsername" : "[REDACTED]",
"instrument" : {
"intstrumentId" : 1,
"name" : "Instrument"
},
"cognizantEngineer" : {
"username" : "lewallen",
"displayName" : "[REDACTED]"
}
},
{
"missionId" : 1,
"instrumentId" : 2,
"instrument" : {
"intstrumentId" : 2,
"name" : "Instrument 2"
}
}
]
},
"procurementPartId" : 2,
"procurementPart" : {
"procurementPartId" : 2,
"partNumber" : "procurement part",
"part" : {
"partId" : 1,
"manufacturer" : "Texas Instruments",
"genericPartNumber" : "123",
"description" : "description",
"partTechnology" : "Part Tech"
}
},
"testStatusId" : 1,
"testStatus" : {
"testStatusId" : 1,
"name" : "Active"
},
"discriminator" : "TotalIonizingDoseRecord",
"creatorUsername" : "[REDACTED]",
"creator" : {
"username" : "[REDACTED]",
"displayName" : "[REDACTED]"
},
"testRecordServiceOrders" : [ ],
"rtdbFiles" : [ ],
"personnelGroups" : [
{
"personnelGroupUsers" : [ ]
}
],
"testRecordTestSubTypes" : [ ],
"testRecordTestFacilityConditions" : [ ],
"testRecordFollowers" : [ ],
"isDeleted" : false
}
},
Also here's what shows for mapping:
"testRecordId" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "autocomplete_search"
},
I guess I should also mention, I've been testing this query in the console like so:
GET test-records-development/_search
{
"query": {
"match": {
"testRecordId": {
"query": "TR000002_1_2020"
}
}
}
}
EDIT 2: Added API response from index _settings endpoint:
{
"test-records-development-09-09-2020-02-00-00" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "test-records-development-09-09-2020-02-00-00",
"creation_date" : "1599617013874",
"analysis" : {
"analyzer" : {
"autocomplete" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "autocomplete"
},
"autocomplete_search" : {
"type" : "custom",
"tokenizer" : "lowercase"
}
},
"tokenizer" : {
"autocomplete" : {
"token_chars" : [
"letter",
"digit",
"punctuation",
"symbol"
],
"min_gram" : "2",
"type" : "ngram",
"max_gram" : "16"
}
}
},
"number_of_replicas" : "0",
"uuid" : "FSeCa0YwRCOJVbjfxYGkig",
"version" : {
"created" : "6080199"
}
}
}
}
}
As I don't have the analyzer setting access in JSON format,I can't confirm it but most probably issue is with your search analyzer autocomplete_search which is creating search time tokens which are matching the index time tokens.
For example: you are searching for TR000002_1_2020 and if it creates 2020 as a token and for document containing TR000011_1_2020 also creates a 2020 token than your query will match it.
You can use the analyze API to check the generated tokens based on a analyzer and as mentioned earlier mostly there is some tokens which are matching as shown above.

How to transform a json to flat using JOLT?

I just need to make the attributes element to be on the same level with the id.
I am just having an issue to copy the attributes to be on the same level.
This is my sample JSON
{
"data" : [ {
"type" : "types",
"id" : "CYY7",
"attributes" : {
"description" : null,
"color" : "#dfc12d",
"duration" : 15,
"created_at" : "2017-01-10T04:51:22Z",
"updated_at" : "2017-01-10T04:51:22Z",
"slug" : "15min",
"active" : false,
"location" : null
}
}, {
"type" : "types",
"id" : "BGER",
"attributes" : {
"description" : null,
"color" : "#8989fc",
"duration" : 30,
"created_at" : "2017-01-10T04:51:22Z",
"updated_at" : "2017-01-10T04:51:22Z",
"slug" : "30min",
"active" : true,
"location" : null
}
}
This is my sample transformation.
[{
"operation": "shift",
"spec": {
"data": {
"*": {
"id": "event_type[&1].id",
"type": "event_type[&1].type",
"attributes": "event_type[&1].attributes[&1].description"
}
}
}
}]
The desired output would be
{
"event_type" : [ {
"type" : "types",
"id" : "CYY7",
"description" : null,
"color" : "#dfc12d",
"duration" : 15,
}, {
"type" : "types",
"id" : "BGER",
"description" : null,
"color" : "#8989fc",
"duration" : 30,
}]
}
Try with this Jolt Spec:
[{
"operation": "shift",
"spec": {
"data": {
"*": {
"id": "event_type[&1].id",
"type": "event_type[&1].type",
"attributes": {
"description": "event_type[&2].description",
"color": "event_type[&2].color",
"duration": "event_type[&2].duration"
}
}
}
}
}]
Output:
{
"event_type" : [ {
"id" : "CYY7",
"type" : "types",
"description" : null,
"color" : "#dfc12d",
"duration" : 15
}, {
"id" : "BGER",
"type" : "types",
"description" : null,
"color" : "#8989fc",
"duration" : 30
} ]
}

Elasticsearch: sum of total term frequency in ONE document

I need sumttf of ONE document in a field. However I can get sumttf of all documents only...
I need to be able to access the variable in script like _index['field'].sumttf() of that particular document. This is what I've got so far.
Mapping:
{"document2" : {
"mappings" : {
"document2" : {
"_all" : {
"enabled" : false
},
"properties" : {
"content" : {
"type" : "string",
"term_vector" : "yes",
"fields" : {
"with_shingles" : {
"type" : "string",
"analyzer" : "my_shingle_analyzer"
}
}
},
...
Term vector:
"_index" : "document2",
"_type" : "document2",
"_id" : "709718",
"_version" : 1,
"term_vectors" : {
"content" : {
"field_statistics" : {
"sum_doc_freq" : 60676474,
"doc_count" : 198373,
"sum_ttf" : 224960172
},
terms" : {
"0" : {
"term_freq" : 8
},
"0.5" : {
"term_freq" : 1
},
"003a0e45ea07a" : {
"term_freq" : 1
},
"005" : {
"term_freq" : 1
},
"0081989" : {
"term_freq" : 1
},
"01" : {
"term_freq" : 1
},
"01.08.2002" : {
"term_freq" : 1
},
...

Resources