Is there any way to effectively delete rows in Parse that do something like this SQL statement?
DELETE FROM table WHERE delete_me = 1
I've tried this, but it's very slow:
var query = new Parse.Query('table');
query.equalTo('delete_me', 1);
query.each(function(obj) {
return obj.destroy();
}).then(function() {
// Done
}, function(error) {
// Error
});
Almost there: find() will get the objects satisfying the delete criteria, then destroyAll() will destroy them all.
var query = new Parse.Query('table');
query.equalTo('delete_me', 1);
query.find().then(function(results) {
return Parse.Object.destroyAll(results);
}).then(function() {
// Done
}, function(error) {
// Error
});
Edit - to delete a table with more than 1k, it takes a little extra work with promises. The idea is to cursor through the table, grouping finds in batches of 1k (or some smaller increment), execute those finds concurrently using Promise.when(), then destroy the results concurrently the same way...
var query = new Parse.Query('table');
query.equalTo('delete_me', 1);
query.count().then(function(count) {
var finds = [];
for (var i=0; i<count; i+=1000) {
finds.push(findSkip(i));
}
return Parse.Promise.when(finds);
}).then(function() {
var destroys = [];
_.each(arguments, function(results) {
destroys.push(Parse.Object.destroyAll(results));
});
return Parse.Promise.when(destroys);
}).then(function() {
// Done
}, function(error) {
// Error
});
// return a promise to find 1k rows starting after the ith row
function findSkip(i) {
var query = new Parse.Query('table');
query.limit(1000);
query.equalTo('delete_me', 1);
query.skip(i);
return query.find();
}
Edit 2 - This might be faster, but you'd need to discover empirically:
// return a promise to delete 1k rows from table, promise is fulfilled with the count deleted
function deleteABunch() {
var query = new Parse.Query('table');
query.limit(1000);
query.equalTo('delete_me', 1);
query.find().then(function(results) {
return Parse.Object.destroyAll(results).then(function() {
return results.length;
});
});
}
function deleteAll() {
return deleteABunch().then(function(count) {
return (count)? deleteAll() : Parse.Promise.as();
});
}
The 1802 request thing is the rate-limit (30/sec). The next idea is to batch the work into smaller transaction-count promises and run them serially, keeping the rate low but stretching them out over time. That's the gist of my suggestion above in a couple of forms (before I understood that you have ~500k rows).
Unfortunately, parse enforces a 10sec timeout limit, too. I think about ~1k rows deleted per sec is achievable, but I fear your 500k table will not yield to any method on the free tier. I think you have only these alternatives:
(a) throttle on the client - use some form of setTimeout(), to perform small enough, short enough batches. (This is how my app handles it, because the heavy work is done only by admins, and I can instruct them to not reload a page.).
(b) deploy your own node server which basically implements idea (a), calling parse.com in small enough steps to keep it happy but places no computational burden on the client.
(c) a parse.com background job that wakes periodically and nibbles away at it. You only get one of these on the free tier, and I imagine most of the time it will just wake frequently and waste electricity.
(d) pay.
I'll be able to do some actual code/test late today. If I learn anything new I'll post here. Best of luck.
Related
When writing custom functions to be used in spreadsheet cells, the default behavior for a sheet is to recalculate on edits, i.e. adding column or rows will cause a custom function to update.
This is a problem if the custom function calls a paid API and uses credits, the user will consuming API credits automatically.
I couldn't figure out a way to prevent this, so I decided to use the UserCache to cache the results for an arbitrary 25 minutes, and serve it back to the user should they happen to repeat the same function call. It's definitely not bulletproof but it's better than nothing I suppose. Apparently the cache can hold 10mb, but is this the right approach? Could I be doing something smarter?
var _ROOT = {
cache : CacheService.getUserCache(),
cacheDefaultTime: 1500,
// Step 1 -- Construct a unique name for function call storage using the
// function name and arguments passed to the function
// example: function getPaidApi(1,2,3) becomes "getPaidApi123"
stringifyFunctionArguments : function(functionName,argumentsPassed) {
var argstring = ''
for (var i = 0; i < argumentsPassed.length; i++) {
argstring += argumentsPassed[i]
}
return functionName+argstring
},
//Step 2 -- when a user calls a function that uses a paid api, we want to
//cache the results for 25 minutes
addToCache : function (encoded, returnedValues) {
var values = {
returnValues : returnedValues
}
Logger.log(encoded)
this.cache.put(encoded, JSON.stringify(values), this.cacheDefaultTime)
}
//Step 3 -- if the user repeats the exact same function call with the same
//arguments, we give them the cached result
//this way, we don't consume API credits as easily.
checkCache : function(encoded) {
var cached = this.cache.get(encoded);
try {
cached = JSON.parse(cached)
return cached.returnValues
} catch (e) {
return false;
}
}
}
Google Sheets already caches the values of custom functions, and will only run them again when either a) the inputs to the function have changed or b) the spreadsheet is being opened after being closed for a long time. I'm not able to replicate the recalculation you mentioned when adding and removing columns. Here's a simple example function I used to test that:
function rng() {
return Math.random();
}
Your approach of using an additional cache for expensive queries looks fine in general. I'd recommend using the DocumentCache instead of the UserCache, since all users of the document can and should see the same cell values.
I'd also recommend a more robust encoding of function signatures, since your current implementation is able to distinguish between the arguments [1, 2] and [12]. You could stringify the inputs and then base64 encode it for compactness:
function encode(functionName, argumentsPassed) {
var data = [functionName].concat(argumentsPassed);
var json = JSON.stringify(data);
return Utilities.base64Encode(json);
}
I have created a background job like this:
Parse.Cloud.job("ResetLeaderboard",
function(request, response)
{
Parse.Cloud.useMasterKey();
var query = new Parse.Query("Leaderboard");
query.find(
{
success: function(results)
{
response.success("Success!");
},
error: function(error)
{
response.error(error);
}
})
.then(
function(results)
{
return Parse.Object.destroyAll(results);
});
});
I want to run this job every 15 days. But there is no option available at www.parse.com to set time interval for more than a day.
I think I need to use a time stamp and compare that value with current time. Can somebody show me the standard way to do this?
You're right that the job scheduling UI is constrained to a single day. The way to solve the problem is to have the job run daily, but to have it do nothing on 14 out of 15 runs. Those do-nothing runs will be wasteful, but microscopically so, and parse is paying the bills anyway.
The specifics of the solution depend on specific requirements. If you require maximum control, like exactly 15 days down to the millisecond, starting at a millisecond-specific time, you'd need to create some scratch space in the database where state (in particular, date) from the prior run is kept.
But the job looks like a cleanup task, where the requirement of "very nearly 15 days, beginning within 15 days" is sufficient. With that simpler requirement, your intuition is correct that simple date arithmetic will work.
Also, importantly, it looks to me like your intention is to find several objects in need of deletion, then delete them. The posted logic doesn't quite do that. I've repaired the logic error and cleaned up the promise handling as well...
// Schedule this to run daily using the web UI
Parse.Cloud.job("ResetLeaderboard", function(request, response) {
if (dayOfYear() % 15 === 0) {
var query = new Parse.Query("Leaderboard");
query.find().then(function(results) {
Parse.Cloud.useMasterKey();
return Parse.Object.destroyAll(results);
}).then(function() {
response.success("Success!");
}, function(error) {
response.error(error);
});
} else {
response.success("Successfully did nothing");
}
});
function dayOfYear() {
var now = new Date();
var start = new Date(now.getFullYear(), 0, 0);
var diff = now - start;
var oneDay = 1000 * 60 * 60 * 24;
return Math.floor(diff / oneDay);
}
The dayOfYear function is thanks to Alex Turpin, here
I have a cron job that scrapes a list of items on a website and then inserts or updates records in a database. When I scrape the page, I want to create records for new ones that haven't been created yet, otherwise update any existing ones. Currently I'm doing something like this:
// pretend there is a "Widget" model defined
function createOrUpdateWidget(widgetConfig) {
return Widget.find(widgetConfig.id)
.then(function(widget) {
if (widget === null) {
return Widget.create(widgetConfig);
}
else {
widget.updateAttributes(widgetConfig);
}
});
}
function createOrUpdateWidgets(widgetConfigObjects) {
var promises = [];
widgetConfigObjects.forEach(function(widgetConfig) {
promises.push(createOrUpdateWidget(widgetConfig));
});
return Sequelize.Promise.all(promises);
}
createOrUpdateWidgets([...])
.done(function() {
console.log('Done!');
});
This seems to work fine, but I'm not sure if I'm doing this "correctly" or not. Do all promises that perform DB interactions need to run serially, or is how I have them defined ok? Is there a better way to do this kind of thing?
What you're doing is pretty idiomatic and perfectly fine, the only room for improvement is to utilize the fact Sequelize uses Bluebird for promises so you get .map for free, which lets you convert:
function createOrUpdateWidgets(widgetConfigObjects) {
var promises = [];
widgetConfigObjects.forEach(function(widgetConfig) {
promises.push(createOrUpdateWidget(widgetConfig));
});
return Sequelize.Promise.all(promises);
}
Into:
function createOrUpdateWidgets(widgetConfigObjects) {
return Sequelize.Promise.map(widgetConfig, createOrUpdateWidget)
}
Other than that minor improvement - you're chaining promises correctly and seem to have the correct hang of it.
The following code causes the browser to stop responding, using a value like '100' for example for the repeat solves the problem, but in my case I don't have specific value for it.
Would you please suggest a solution:
var observer = Rx.Observer.create(function (x)
{
console.log(x);
},
function (err)
{
console.log('Error: ' + err);
},
function ()
{
console.log('Completed');
});
var repeat = Rx.Observable.repeat(10, null); //repeat indefinitely
var interval = Rx.Observable.interval(1000);
var zip = Rx.Observable.zip(repeat,
interval,
function(rep, inter)
{
return rep + inter;
});
zip.subscribe(observer);
The browser freezes because .repeat simply yields 10 indefinitely.
Since Rx is push-based, we have no way of knowing when zip needs another item. Instead, we just push new values to zip as they become available. The static (class-method?) repeat says "hey, I have new items RIGHT NOW ALWAYS HERE THEY ARE" and never relinquishes control-flow back to zip. This means zip never actually ends up subscribing to the interval observable, so zip just starts buffering indefinitely.
If you're coming from a functional background, then it would seem like an "infinite" list of "10"s would zip nicely with a finite list of anything. Which is absolutely true, assuming your infinite list is lazy. In this case, our "list" has a mind of it's own, and definitely isn't lazy.
I'd be happy to suggest a solution, but it seems that the example is contrived. What exactly are you attempting to do?
I was dealing with the same problem. Looks like delay can do the trick.
Here's the slightly modified version of your code:
var observer = Rx.Observer.create(function (x)
{
console.log(x);
},
function (err)
{
console.log('Error: ' + err);
},
function ()
{
console.log('Completed');
});
var repeat = Rx.Observable.of(10).delay(0).repeat(-1); //repeat indefinitely
var interval = Rx.Observable.interval(1000);
var zip = Rx.Observable.zip(repeat,
interval,
function(rep, inter)
{
return rep + inter;
});
zip.subscribe(observer);
I have a page that displays some data using d3.js. Due to the heavy processing load, when the page load it freezes the browser for a few seconds.
I have determined that this "browser locking" behavior is due mostly to a line of the form:
selection.attr('d', linefn);
...where selection contains around 10K items.
I would like to replace this line with something like
function process_rest () {
if (selection.size() > 0) {
var next_item = first(selection); // function first() is hypothetical!
next_item.attr('d', linefn);
selection = rest(selection); // function rest() is hypothetical!
setTimeout(process_rest, 100);
return;
}
finish_up();
}
setTimeout(process_rest, 100);
I'm looking for an efficient way to implement either first and rest. My very naive guess would be something like:
function first(selection) {
return d3.select(selection[0][0]);
}
function rest(selection) {
selection[0] = selection[0].slice(1);
return selection;
}
...but, AFAIK, this is going "behind the API", or at least feels like it. Is there an "official" (i.e. documented) way to achieve the same result?
EDIT: deleted the shift variant (it's safer not to update selection until after the processing of the first element has been successfully completed).
You can simply use .each():
selection.each(function(d, i) {
setTimeout(function() { d3.select(this).attr("d", linefn); }, i * 100);
});