Mongodb aggregation remove null values from object with nested properties - spring

Is there a way to remove literally all null or empty string values from an object? We have an aggregation which creates an object with empty fields and empty objects should the value be null.
What we wish to do is remove all null properties and empty objects and recreate the object, in order to keep the data as small as possible.
e.g. in the following object, only 'test' and 'more-nested-data' should be taken into account, the rest can be removed
{
"test": "some",
"test2": {
},
"test3": {
"some-key": {
},
"some-other-key": {
"more-nested-data": true,
"more-nested-emtpy": null
}
}
}
which should become:
{
"test": "some",
"test3": {
"some-other-key": {
"more-nested-data": true
}
}
}
I tried a lot, but I think by using objectToArray that something could be done, but I have not found the solution yet. The required aggregation should need to recursively (or by defined levels) remove null properties and empty objects.

Use the $function operator available in 4.4 (Aug 2021) to do this recursively as you note. Given this input which is a slightly expanded version of that supplied in the question:
var dd = {
"test": "some",
"test2": { },
"test3": {
"some-key": { },
"some-other-key": {
"more-nested-data": true,
"more-nested-emtpy": null,
"emptyArr": [],
"notEmptyArr": [
"XXX",
null,
{"corn":"dog"},
{"bad":null},
{"other": {zip:null, empty:[], zap:"notNull"}}
]
}
}
}
db.foo.insert(dd);
then this pipeline:
db.foo.aggregate([
{$replaceRoot: {newRoot: {$function: {
body: function(obj) {
var process = function(holder, spot, value) {
var remove_it = false;
// test FIRST since [] instanceof Object is true!
if(Array.isArray(value)) {
// walk BACKWARDS due to potential splice() later
// that will change the length...
for(var jj = value.length - 1; jj >= 0; jj--) {
process(value, jj, value[jj]);
}
if(0 == value.length) {
remove_it = true;
}
} else if(value instanceof Object) {
walkObj(value);
if(0 == Object.keys(value).length) {
remove_it = true;
}
} else {
if(null == value) {
remove_it = true;
}
}
if(remove_it) {
if(Array.isArray(holder)) {
holder.splice(spot,1); // snip out the val
} else if(holder instanceof Object) {
delete holder[spot];
}
}
};
var walkObj = function(obj) {
Object.keys(obj).forEach(function(k) {
process(obj, k, obj[k]);
});
}
walkObj(obj); // entry point!
return obj;
},
args: [ "$$CURRENT" ],
lang: "js"
}}
}}
]);
produces this result:
{
"_id" : 0,
"test" : "some",
"test3" : {
"some-other-key" : {
"more-nested-data" : true,
"notEmptyArr" : [
"XXX",
{
"corn" : "dog"
},
{
"other" : {
"zap" : "notNull"
}
}
]
}
}
}
A convenient way to debug such complex functions is by declaring them as variables outside of the pipeline and running data through them to simulate the documents (objects) coming out the database, e.g.:
ff = function(obj) {
var process = function(holder, spot, value) {
var remove_it = false;
// test FIRST since [] instanceof Object is true!
if(Array.isArray(value)) {
...
printjson(ff(dd)); // use the same doc as above
You can put print and other debugging aids into the code and then when you are done, you can remove them and call the pipeline to process the real data as follows:
db.foo.aggregate([
{$replaceRoot: {newRoot: {$function: {
body: ff, // substitute here!
args: [ "$$CURRENT" ],
lang: "js"
}}
}}
]);

Sounds like the unwind operator would help. Checkout the unwind operator at https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

Related

Spring Boot Mongo update nested array of documents

I'm trying to set an attribute of a document inside an array to uppercase.
This is a document example
{
"_id": ObjectId("5e786a078bc3b3333627341e"),
"test": [
{
"itemName": "alpha305102992",
"itemNumber": ""
},
{
"itemName": "beta305102630",
"itemNumber": "P5000"
},
{
"itemName": "gamma305102633 ",
"itemNumber": ""
}]
}
I already tried a lot of thing.
private void NameElementsToUpper() {
AggregationUpdate update = AggregationUpdate.update();
//This one does not work
update.set("test.itemName").toValue(StringOperators.valueOf(test.itemName).toUpper());
//This one also
update.set(SetOperation.set("test.$[].itemName").withValueOfExpression("test.#this.itemName"));
//And every variant in between these two.
// ...
Query query = new Query();
UpdateResult result = mongoTemplate.updateMulti(query, update, aClass.class);
log.info("updated {} records", result.getModifiedCount());
}
I see that Fields class in spring data is hooking into the "$" char and behaving special if you mention it. Do not seem to find the correct documentation.
EDIT: Following update seems to work but I do not seem to get it translated into spring-batch-mongo code
db.collection.update({},
[
{
$set: {
"test": {
$map: {
input: "$test",
in: {
$mergeObjects: [
"$$this",
{
itemName: {
$toUpper: "$$this.itemName"
}
}
]
}
}
}
}
}
])
Any solutions?
Thanks!
For now I'm using which does what i need. But a spring data way would be cleaner.
mongoTemplate.getDb().getCollection(mongoTemplate.getCollectionName(Application.class)).updateMany(
new BasicDBObject(),
Collections.singletonList(BasicDBObject.parse("""
{
$set: {
"test": {
$map: {
input: "$test",
in: {
$mergeObjects: [
"$$this",
{
itemName: { $toUpper: "$$this.itemName" }
}
]
}
}
}
}
}
"""))
);

Elasticsearch ingest pipeline: how to recursively modify values in a HashMap

Using an ingest pipeline, I want to iterate over a HashMap and remove underscores from all string values (where underscores exist), leaving underscores in the keys intact. Some values are arrays that must further be iterated over to do the same modification.
In the pipeline, I use a function to traverse and modify the values of a Collection view of the HashMap.
PUT /_ingest/pipeline/samples
{
"description": "preprocessing of samples.json",
"processors": [
{
"script": {
"tag": "remove underscore from sample_tags values",
"source": """
void findReplace(Collection collection) {
collection.forEach(element -> {
if (element instanceof String) {
element.replace('_',' ');
} else {
findReplace(element);
}
return true;
})
}
Collection samples = ctx.samples;
samples.forEach(sample -> { //sample.sample_tags is a HashMap
Collection sample_tags = sample.sample_tags.values();
findReplace(sample_tags);
return true;
})
"""
}
}
]
}
When I simulate the pipeline ingestion, I find the string values are not modified. Where am I going wrong?
POST /_ingest/pipeline/samples/_simulate
{
"docs": [
{
"_index": "samples",
"_id": "xUSU_3UB5CXFr25x7DcC",
"_source": {
"samples": [
{
"sample_tags": {
"Entry_A": [
"A_hyphentated-sample",
"sample1"
],
"Entry_B": "A_multiple_underscore_example",
"Entry_C": [
"sample2",
"another_example_with_underscores"
],
"Entry_E": "last_example"
}
}
]
}
}
]
}
\\Result
{
"docs" : [
{
"doc" : {
"_index" : "samples",
"_type" : "_doc",
"_id" : "xUSU_3UB5CXFr25x7DcC",
"_source" : {
"samples" : [
{
"sample_tags" : {
"Entry_E" : "last_example",
"Entry_C" : [
"sample2",
"another_example_with_underscores"
],
"Entry_B" : "A_multiple_underscore_example",
"Entry_A" : [
"A_hyphentated-sample",
"sample1"
]
}
}
]
},
"_ingest" : {
"timestamp" : "2020-12-01T17:29:52.3917165Z"
}
}
}
]
}
Here is a modified version of your script that will work on the data you provided:
PUT /_ingest/pipeline/samples
{
"description": "preprocessing of samples.json",
"processors": [
{
"script": {
"tag": "remove underscore from sample_tags values",
"source": """
String replaceString(String value) {
return value.replace('_',' ');
}
void findReplace(Map map) {
map.keySet().forEach(key -> {
if (map[key] instanceof String) {
map[key] = replaceString(map[key]);
} else {
map[key] = map[key].stream().map(this::replaceString).collect(Collectors.toList());
}
});
}
ctx.samples.forEach(sample -> {
findReplace(sample.sample_tags);
return true;
});
"""
}
}
]
}
The result looks like this:
{
"samples" : [
{
"sample_tags" : {
"Entry_E" : "last example",
"Entry_C" : [
"sample2",
"another example with underscores"
],
"Entry_B" : "A multiple underscore example",
"Entry_A" : [
"A hyphentated-sample",
"sample1"
]
}
}
]
}
You were on the right path but you were working on copies of values and weren't setting the modified values back onto the document context ctx which is eventually returned from the pipeline. This means you'll need to keep track of the current iteration indexes -- so for the array lists, as for the hash maps and everything in between -- so that you can then target the fields' positions in the deeply nested context.
Here's an example taking care of strings and (string-only) array lists. You'll need to extend it to handle hash maps (and other types) and then perhaps extract the whole process into a separate function. But AFAIK you cannot return multiple data types in Java so it may be challenging...
PUT /_ingest/pipeline/samples
{
"description": "preprocessing of samples.json",
"processors": [
{
"script": {
"tag": "remove underscore from sample_tags values",
"source": """
ArrayList samples = ctx.samples;
for (int i = 0; i < samples.size(); i++) {
def sample = samples.get(i).sample_tags;
for (def entry : sample.entrySet()) {
def key = entry.getKey();
def val = entry.getValue();
def replaced_val;
if (val instanceof String) {
replaced_val = val.replace('_',' ');
} else if (val instanceof ArrayList) {
replaced_val = new ArrayList();
for (int j = 0; j < val.length; j++) {
replaced_val.add(val[j].replace('_',' '));
}
}
// else if (val instanceof HashMap) {
// do your thing
// }
// crucial part
ctx.samples[i][key] = replaced_val;
}
}
"""
}
}
]
}

Is there a way to filter json data format field in strapi?

Hi Guys I'm trying to filter post with data json format field?
"categoryList": ["cat", "cat1"]
For anyone still looking for a solution, this is what I have done for a json type field called tags of a collection type called Articles.
I have two articles in the database with one article having the following values set:
title: "lorem ipsum 1",
tags: [
"test",
"rest"
]
The other article has the following values set:
title: "lorem ipsum 2",
tags: [
"test",
"graphql"
]
My graphql query looks like this:
query {
articlesByTag(limit: 2, where: {tags_include: ["test", "rest"]}, start: 0, sort: "title:asc") {
title,
tags
}
}
While my rest query looks like this:
http://localhost:1337/articlesByTag?limit=2&tags_include[]=test&tags_include[]=rest
This is my articles.js service file:
const { convertRestQueryParams, buildQuery } = require('strapi-utils');
const _ = require('lodash');
const { convertToParams, convertToQuery } = require('../../../node_modules/strapi-plugin-graphql/services/utils');
module.exports = {
async findByTag(ctx) {
let tags_include;
if (ctx.where && ctx.where.tags_include && ctx.where.tags_include.length > 0) {
tags_include = ctx.where.tags_include;
delete ctx.where.tags_include;
} else if (ctx.query && ctx.query.tags_include && ctx.query.tags_include.length > 0) {
tags_include = ctx.query.tags_include;
delete ctx.query.tags_include;
}
if (!Array.isArray(tags_include)) {
tags_include = [tags_include];
}
let filters = null;
if (ctx.query) {
filters = convertRestQueryParams({
...convertToParams(ctx.query)
});
} else {
filters = convertRestQueryParams({
...convertToParams(_.pick(ctx, ['limit', 'start', 'sort'])),
...convertToQuery(ctx.where),
});
}
const entities = await strapi.query('articles').model.query(qb => {
buildQuery({ model: strapi.query('articles').model, filters: filters })(qb);
if (tags_include.length > 0) {
tags_include.forEach((tag) => {
if (tag && tag.length > 0) {
const likeStr = `%"${tag}"%`;
qb.andWhere('tags', 'like', likeStr);
}
});
}
}).fetchAll();
return entities;
},
};
This is the entry needed in routes.js
{
"method": "GET",
"path": "/articlesByTag",
"handler": "articles.findByTag",
"config": {
"policies": []
}
}
This is the controller articles.js
const { sanitizeEntity } = require('strapi-utils');
module.exports = {
async findByTag(ctx) {
const entities = await strapi.services.articles.findByTag(ctx);
return entities.map(entity => sanitizeEntity(entity, { model: strapi.models.articles }));
},
};
And finally this is the schema.graphql.js
module.exports = {
query: `
articlesByTag(sort: String, limit: Int, start: Int, where: JSON): [Articles]
`,
resolver: {
Query: {
articlesByTag: {
description: 'Return articles filtered by tag',
resolverOf: 'application::articles.articles.findByTag',
resolver: async (obj, options, ctx) => {
return await strapi.api.articles.controllers.articles.findByTag(options);
},
},
},
},
};
There is not currently a way to filter the JSON fields yet as of beta.17.8 (latest)
Probably something like that?
strapi.query('cool_model').find({ categoryList: { $all: [ "cat" , "cat1" ] } })

Apollo Server / GraphQL - Properties of Nested Array Returning Null

Bear with me, I will explain this the best I can. Please let me know if more information is needed, I am trying to keep this as brief as possible.
I am using Apollo Server and the 'apollo-datasource-rest' plugin to access a REST API. When attempting to get the property values from a nested array of objects I get a null response for each field/property. In addition, the array being queried is only showing a single iteration when multiple are available.
The field in question is the 'cores' field within the Rocket type, i.e., launch.rocket.firstStage.cores
I have attempted various ways of mapping through 'cores' (thinking this was what it wanted) with no success.
To keep things short and simple I'm only including the code for the specific issue. All other parts of the query are operating as expected.
You can view the API response I am hitting here: https://api.spacexdata.com/v3/launches/77
schema.js
const { gql } = require('apollo-server');
const typeDefs = gql`
type Query {
singleLaunch(flightNumber: Int!): Launch
}
type Launch {
flightNumber: Int!
rocket: Rocket
}
type Rocket {
firstStage: Cores
}
type Cores {
cores: [CoreFields]
}
type CoreFields {
flight: Int
gridfins: Boolean
legs: Boolean
reused: Boolean
landingType: String
landingVehicle: String
landingSuccess: Boolean
}
`;
module.exports = typeDefs;
Data Source - launch.js
const { RESTDataSource } = require('apollo-datasource-rest');
class LaunchAPI extends RESTDataSource {
constructor() {
super();
this.baseURL = 'https://api.spacexdata.com/v3/';
}
async getLaunchById({ launchId }) {
const res = await this.get('launches', {
flight_number: launchId,
});
return this.launchReducer(res[0]);
}
launchReducer(launch) {
return {
flightNumber: launch.flight_number || 0,
rocket: {
firstStage: {
cores: [
{
flight: launch.rocket.first_stage.cores.flight,
gridfins: launch.rocket.first_stage.cores.gridfins,
legs: launch.rocket.first_stage.cores.legs,
landingType: launch.rocket.first_stage.cores.landing_type,
landingVehicle: launch.rocket.first_stage.cores.landing_vehicle,
landingSuccess: launch.rocket.first_stage.cores.landing_success,
},
],
},
};
}
}
module.exports = LaunchAPI;
resolvers.js
module.exports = {
Query: {
singleLaunch: (_, { flightNumber }, { dataSources }) =>
dataSources.launchAPI.getLaunchById({ launchId: flightNumber }),
},
};
Query
query GetLaunchById($flightNumber: Int!) {
singleLaunch(flightNumber: $flightNumber) {
flightNumber
rocket {
firstStage {
cores {
flight
gridfins
legs
reused
landingType
landingVehicle
landingSuccess
}
}
}
}
}
Expected Result
{
"data": {
"singleLaunch": {
"flightNumber": 77,
"rocket": {
"firstStage": {
"cores": [
{
"flight": 1,
"gridfins": true,
"legs": true,
"reused": true,
"landingType": "ASDS",
"landingVehicle": "OCISLY",
"landSuccess": true,
},
{
"flight": 1,
"gridfins": true,
"legs": true,
"reused": false,
"landingType": "RTLS",
"landingVehicle": "LZ-1",
"landSuccess": true
},
{
"flight": 1,
"gridfins": true,
"legs": true,
"reused": false,
"landingType": "RTLS",
"landingVehicle": "LZ-2",
"landSuccess": true
},
]
}
},
}
}
}
Actual Result (Through GraphQL Playground)
{
"data": {
"singleLaunch": {
"flightNumber": 77,
"rocket": {
"firstStage": {
"cores": [
{
"flight": null,
"gridfins": null,
"legs": null,
"reused": null,
"landingType": null,
"landingVehicle": null,
"landingSuccess": null
}
]
}
},
}
}
}
Any suggestions as to what I am doing wrong here would be greatly appreciated. Again, let me know if more information is needed.
Thank you!
Missing base url
There should be
await this.get( this.baseURL + 'launches'
IMHO there should be a map used within launchReducer to return an array, sth like:
launchReducer(launch) {
return {
flightNumber: launch.flight_number || 0,
rocket: {
firstStage: {
cores: launch.rocket.first_stage.cores.map(core => ({
flight: core.flight,
gridfins: core.gridfins,
legs: core.legs,
landingType: core.landing_type,
landingVehicle: core.landing_vehicle,
landSuccess: core.land_success,
})),
},
},
};
}
.map(core => ({ is for returning object [literal], the same as/shorter version of .map(core => { return {

Issue in Sorting Integer values In AngularJs Custom Sort function

I am having problem in my custom sorting method. I have a json structure wherein integer values are being passed as String. While sorting I need to sort the list based on the integer value, but it is being sorted as String values.
I have created a JsFiddle for the same:
http://jsfiddle.net/ayan_2587/vjF2D/14/
The angular code is as follows:-
var app = angular.module('myModule', []);
app.controller('ContactListCtrl', function ($scope, $timeout, $filter) {
var sortingOrder = 'price';
$scope.sortingOrder = sortingOrder;
$scope.sortorder = '-sort';
$scope.contacts = [{
"name": "Richard",
"surname": "Stallman",
"price": "7200"
}, {
"name": "Donald",
"surname": "Knuth",
"price": "34565"
}, {
"name": "Linus",
"surname": "Torvalds",
"price": "23454"
}];
$scope.setLoading = function (loading) {
$scope.isLoading = loading;
}
$scope.layoutDone = function (value) {
console.log(value);
$scope.setLoading(true);
$timeout(function() {
// take care of the sorting order
if ($scope.sortingOrder !== '') {
if(value == 'sort'){
$scope.contacts = $filter('orderBy')($scope.contacts, $scope.sortingOrder, false);
}
else if(value == '-sort'){
$scope.contacts = $filter('orderBy')($scope.contacts, $scope.sortingOrder, true);
}
}
$scope.setLoading(false);
}, 1000);
}
$scope.loadFeed = function(url) {
$scope.setLoading(true);
}
$scope.loadFeed();
});
app.directive('repeatDone', function() {
return function(scope, element, attrs) {
if (scope.$last) { // all are rendered
scope.$eval(attrs.repeatDone);
}
}
})
Please help me out !!!
SInce you sort in JS, you can allways use Array.sort() - at least on modern browsers-, passing it a sort function like:
function sortAsc(a,b) {
a = parseInt(a.price);
b = parseInt(b.price);
return a > b ? 1 : (a === b ? 0 : -1);
}
And then execute the sorting like:
if(value == 'sort'){
$scope.contacts.sort(sortAsc);
}
See forked fiddle: http://jsfiddle.net/sRruf/ (the initial state is not sorted)
The sort property (here price) can be parameterized with a little extra work.
And here is a version using the orderBy filter and a custom predicate: http://jsfiddle.net/sRruf/1/

Resources