IndexedDB "updates" every browser restart and erases data - firefox

I wrote a Firefox WebExtension that downloads data files from a website and uses IndexedDB to store/update the data. The .SQLite file that is created is ~2GB in size. Whenever I restart Firefox, the extension executes the onupgradeneeded event, even though I always use version "1". I create the database object stores and indexes in that event, so all my data ends up getting deleted.
The only time this doesn't happen is when I close Firefox while the data is being downloaded or stored. The next time I start Firefox, it does not execute the event (as should be the case). It then continues to update the database as it was programmed to do.
I installed the SQLite Manager extension in hopes that I could identify something causing the issue to the database, but nothing was obvious to me.
Here is part of my background script:
init().then(fetchData).then(addData).catch(dberror);
function init() {
req = indexedDB.open("db", 1);
req.onupgradeneeded = e => {
var name;
var key;
console.log("Upgrading database...", e.oldVersion, e.newVersion);
db = e.currentTarget.result;
var store = db.createObjectStore("db", { keyPath: "KEY" });
db.createObjectStore("version", { keyPath: "version" });
for (name in indexes) {
key = ...
store.createIndex(name, key);
};
};
return new Promise( (resolve, reject) => {
req.onsuccess = e => {
db = e.currentTarget.result;
db.onerror = dberror;
var cursor = db.transaction("MECs").objectStore("MECs").index("STATUS_DATE").openCursor(null, 'prev');
cursor.onsuccess = e => {
if (e.target.result) {
lastMod = e.target.result.key;
fileYear = lastMod.getFullYear();
}
else lastMod = new Date(startingfileYear, 0);
resolve(lastMod);
}
cursor.onerror = reject;
};
req.onerror = e => {
dberror(e);
reject(e);
}
});
}
function fetchData(param) {
// Get data based on the param and return it
return fetchFile(filename);
}
function addData(data) {
var trans = db.transaction("db", "readwrite");
var store = trans.objectStore("db");
var req;
var n = 0;
var data2 = [];
var addPromise;
trans.onerror = event => console.log("Error! Error! ", event.target.error);
trans.onabort = event => console.log("Abort! Abort! ", event.target.error);
data.forEach((row, index) => {
//process data here
data2 = ...
});
(function storeRegData(n) {
var row = data[n];
if (!row) return;
req = store.put(row);
req.onsuccess = event => {
numUpdated++;
storeRegData(++n);
}
req.onabort = event => console.log("Abort! Abort! ", event.target.error);
req.onerror = event => console.log("Error! Error! ", event.target.error);
})(0); // I'm storing one row at a time because the transaction is failing when I queue too many rows.
addPromise = fetchData(data2).then(
response => {
var trans2 = db.transaction("db", "readwrite");
var store2 = trans2.objectStore("db");
var req2;
response.forEach(row => {
req2 = store2.put(row);
req2.onsuccess = event => numUpdated++;
req2.onerror = console.log;
});
return new Promise((resolve, reject) => trans2.oncomplete = e => resolve(response));
},
console.log)
);
return new Promise((resolve, reject) => trans.oncomplete = e => {
if (noMoreData)
resolve(addPromise);
else if (moreData)
resolve( addPromise.then(fetchData).then(addData) );
});
}
And here is my manifest
{
"author": "Name",
"manifest_version": 2,
"name": "Extension",
"description": "Extension",
"version": "3.0",
"applications": {
"gecko": {
"strict_min_version": "50.0",
"id": "myID",
"update_url": "https://update.me"
}
},
"background": {
"scripts": [
"js/background.js"
]
},
"content_scripts": [
{
"matches": [ "https://match.me/*" ],
"js": [
"script.js"
],
"css": [
"style.css"
]
}
],
"icons": {
"48": "icon.png"
},
"options_ui": {
"page": "options.html"
},
"page_action": {
"browser_style": true,
"default_icon": {
"19": "icon-19.png",
"38": "icon-38.png"
},
"default_title": "Extension",
"default_popup": "popup.html"
},
"permissions": [
"https://web.address/*",
"downloads",
"notifications",
"storage",
"tabs",
"webRequest",
"webNavigation"
],
"web_accessible_resources": [
"pictures.png"
]
}
Why does Firefox think the database is at version 0 when I restart the browser? I can use the stored data after I download it, so why does it overwrite it on every restart? I could possibly do a workaround where I only create the store and indexes on extension installation or update, but that's not a solution to the actual issue.
UPDATE: I tried the following to no avail -
Close the database and re-open after storing each data file
Create a new object store for each data file
UPDATE 2: It appears this is related to a storage issue. Apparently, 2GB is the storage limit for non-persistent storage. In Firefox you can by-pass this by making the storage persistent with the following command:
indexedDB.open("db", { version: 1, storage: "persistent" })
See the bugzilla report here.
Unfortunately, when run from a background page, the popup asking for confirmation is not handled, so you can never acknowledge it. Supposedly, when Firefox 56 comes out, you'll be able to use the "unlimitedStorage" permission, which will by-pass the confirmation popup, so it should work from the background page.
Update 3: So it looks like the limit is actually ~1.5 GB. I just spent over a week re-coding the extension to create and use a different database for each year of data, making each database no larger than 150 MB. And still onupgradeneeded executes when I restart the browser and wipes all my data. If, however, I limit the total amount of data in all the databases to the above limit, it works. Unfortunately, I'm still in the same boat.
Does no one have any ideas?

As I mentioned in the updates to my question, there appears to be a limit of ~1.5GB for the "default" storage of indexedDB. Changing the storage to "persistent" will remove that limit. Because persistent storage currently requires user input, however, the database has to be opened from a window that can handle a UI response.
This can be done from the background script by creating a new window with browser.window.create() and opening the database from there. There are security restrictions that prevent inline scripts from running in the new page, so I had to link to local javascript files for that (i.e. <script src="db.js"></script>. I think you can also change the content security policy with a manifest instruction, but I didn't do that.
Hopefully, the unlimitedStorage permission will be supported in Firefox 56, which will remove the popup, allowing a persistent database to be created/accessed directly from the background script.

Related

Group user events into activity sessions

You’re in charge of implementing a new analytics “sessions” view. You’re given a set of data that consists of individual web page visits, along with a visitorId which is generated by a tracking cookie that uniquely identifies each visitor. From this data we need to generate a list of sessions for each visitor.
The data set looks like this:
"events": [
{
"url": "/pages/a-big-river",
"visitorId": "d1177368-2310-11e8-9e2a-9b860a0d9039",
"timestamp": 1512754583000
},
{
"url": "/pages/a-small-dog",
"visitorId": "d1177368-2310-11e8-9e2a-9b860a0d9039",
"timestamp": 1512754631000
},
{
"url": "/pages/a-big-talk",
"visitorId": "f877b96c-9969-4abc-bbe2-54b17d030f8b",
"timestamp": 1512709065294
},
{
"url": "/pages/a-sad-story",
"visitorId": "f877b96c-9969-4abc-bbe2-54b17d030f8b",
"timestamp": 1512711000000
},
{
"url": "/pages/a-big-river",
"visitorId": "d1177368-2310-11e8-9e2a-9b860a0d9039",
"timestamp": 1512754436000
},
{
"url": "/pages/a-sad-story",
"visitorId": "f877b96c-9969-4abc-bbe2-54b17d030f8b",
"timestamp": 1512709024000
}
]
}
Given this input data, we want to create a set of sessions of the incoming data. A sessions is defined as a group of events from a single visitor with no more than 10 minutes between each consecutive event. A visitor can have multiple sessions. So given the example input data above, we would expect output which looks like:
{
"sessionsByUser": {
"f877b96c-9969-4abc-bbe2-54b17d030f8b": [
{
"duration": 41294,
"pages": [
"/pages/a-sad-story",
"/pages/a-big-talk"
],
"startTime": 1512709024000
},
{
"duration": 0,
"pages": [
"/pages/a-sad-story"
],
"startTime": 1512711000000
}
],
"d1177368-2310-11e8-9e2a-9b860a0d9039": [
{
"duration": 195000,
"pages": [
"/pages/a-big-river",
"/pages/a-big-river",
"/pages/a-small-dog"
],
"startTime": 1512754436000
}
]
}
}
Notes
Timestamps are in milliseconds.
Events may not be given in chronological order.
The visitors in sessionsByUser can be in any order.
For each visitor, sessions to be in chronological order.
For each session, the URLs should be sorted in chronological order
For a session with only one event the duration should be zero
Each event in a session (except the first event) must have occurred
within 10 minutes of the preceding event in the session. This means
that there can be more than 10 minutes between the first and the last
event in the session.
Note: I am not gonna show you how to make the exact output format you need, but I will show you the general approach for solving this problem, and hopefully you can figure out how to change it for your requirements.
You are gonna want to start out by grouping the events by user:
events_by_user = events.group_by { |event| event[:visitorId] }
Now, for each user, you need to sort their events by timestamp:
events_by_user.transform_values! do |events|
events.sort_by { |event| event[:timestamp] }
end
Now, you need to loop through each user's events and compare them in sequential order, putting them in groups based on timestamp similarity:
session_length = 10 # seconds
sessions = {}
events_by_user.each do |visitor_id, events|
sessions[visitor_id] = []
events.each do |event|
if sessions[visitor_id].empty?
sessions[visitor_id].push([event])
else
last_session = sessions[visitor_id].last
last_timestamp = last_session.last[:timestamp]
if (event[:timestamp] - last_timestamp) <= session_length
last_session.push(event)
else
sessions[visitor_id].push([event])
end
end
end
end
Now sessions will contain a hash like this:
{
<visitor_id>: [
[<list of events in session 1>],
[<list of events in session 2>]
],
etc.
}
You can then extract the start time, total duration, etc
Group the "events" array by the property "visitorId" first. You can use in JavaScript the
Array.prototype.reduce(): The reduce() method executes a user-supplied
“reducer” callback function on each element of the array, in order,
passing in the return value from the calculation on the preceding
element. The final result of running the reducer across all elements
of the array is a single value. So, set {} as the initial value for the reducer function, at each pass use the visitorId as key to an array that will hold events of the visitor, push the current event to respective position in the array.
The a variable || [] statement is used to make an 'undefined'
value as [], empty array.
Now sort the events array that is built just now by timestamp in ascending order.
Loop through it and compare timestamps pairwise (previous and current array element), if difference is below given session length(eg 10 min), merge the two sessions and push it in an array with 'visitorId' as key. Use a variable to keep track of the index of the session to be merged together.
let data = require('d:\\dataset.json');
//Group by visitorId
let sessions = {
sessionsByUser: data.events.reduce(function (events, session) {
(events[session['visitorId']] = events[session['visitorId']] || []).push(session);
return events;
}, {})
};
//Sort events by timestamp ascending
for(let key in sessions.sessionsByUser){
let events = sessions.sessionsByUser[key];
events = events.sort((a, b) => {
return a.timestamp - b.timestamp;
});
}
let userSessions = {};
for(let key in sessions.sessionsByUser){
let events = sessions.sessionsByUser[key];
let lastIndex = 0;
for(let i = 0; i < events.length; i++)
{
if(i == 0) {
userSessions[key] = [{
duration: 0,
pages: [events[i].url],
startTime: events[i].timestamp
}]
}
else {
//Check difference (10 min)
if(events[i].timestamp - events[i-1].timestamp < 600000) {
let session = userSessions[key][lastIndex];
session.duration += (events[i].timestamp - events[i-1].timestamp);
session.pages.push(events[i].url);
}
else {
userSessions[key].push({
duration: 0,
pages: [events[i].url],
startTime: events[i].timestamp
});
lastIndex++;
}
}
}
}
let soln = {
sessionsByUser: userSessions
};
console.log(JSON.stringify(soln));
Run command in cmd: Node <filename>.js
Change dataset path, navigate to the directory of the file in cmd. Node must
be installed on the system.

How to modify just a property from a dexie store without deleting the rest?

I'm having the dexie stores showed in the print screen below:
Dexie stores print screen
My goal is to update a dexie field row from a store without losing the rest of the data.
For example: when I edit and save the field "com_name" from the second row (key={2}) I want to update "com_name" only and not lose the rest of the properties, see first and the third row.
I already tried with collection.modify and table.update but both deleted the rest of the properties when used the code below:
dexieDB.table('company').where('dexieKey').equals('{1}')
//USING table.update
//.update(dexieRecord.dexiekey, {
// company: {
// com_name: "TOP SERVE 2"
// }
//})
.modify(
{
company:
{
com_name: TOP SERVE 2
}
}
)
.then(function (updated) {
if (updated)
console.log("Success.");
else
console.log("Nothing was updated.");
})
.catch(function (err) { console.log(err); });
Any idea how can I accomplish that?
Thanks
Alex
You where right to use Table.update or Collection.modify. They should never delete other properties than the ones specified. Can you paste a jsitor.com or jsfiddle repro of that and someone may help you pinpoint why the code doesn't work as expected.
Now that you are saying I realised that company and contact stores are created dynamically and editedRecords store has the indexes explicitly declared therefore when update company or contact store, since dexie doesn't see the indexes will overwrite. I haven't tested it yet but I suspect this is the behaviour.
See the print screen below:
Dexie stores overview
Basically I have json raw data from db and in the browser I create the stores and stores data based on it, see code below:
function createDexieTables(jsonData) { //jsonData - array, is the json from db
const stores = {};
const editedRecordsTable = 'editedRecords';
jsonData.forEach((jsonPackage) => {
for (table in jsonPackage) {
if (_.find(dexieDB.tables, { 'name': table }) == undefined) {
stores[table] = 'dexieKey';
}
}
});
stores[editedRecordsTable] = 'dexieKey, table';
addDataToDexie(stores, jsonData);
}
function addDataToDexie(stores, jsonData) {
dbv1 = dexieDB.version(1);
if (jsonData.length > 0) {
dbv1.stores(stores);
jsonData.forEach((jsonPackage) => {
for (table in jsonPackage) {
jsonPackage[table].forEach((tableRow) => {
dexieDB.table(table).add(tableRow)
.then(function () {
console.log(tableRow, ' added to dexie db.');
})
.catch(function () {
console.log(tableRow, ' already exists.');
});
});
}
});
}
}
This is the json, which I convert to object and save to dexie in the value column and the key si "dexieKey":
[
{
"company": [
{
"dexieKey": "{1}",
"company": {
"com_pk": 1,
"com_name": "CloudFire",
"com_city": "Round Rock",
"serverLastEdit": [
{
"com_pk": "2021-06-02T11:30:24.774Z"
},
{
"com_name": "2021-06-02T11:30:24.774Z"
},
{
"com_city": "2021-06-02T11:30:24.774Z"
}
],
"userLastEdit": []
}
}
]
}
]
Any idea why indexes were not populated when generating them dynamically?
Given the JSON data, i understand what's going wrong.
Instead of passing the following to update():
{
company:
{
com_name: "TOP SERVE 2"
}
}
You probably meant to pass this:
{
"company.com_name": "TOP SERVE 2"
}
Another hint is to do the add within an rw transaction, or even better if you can use bulkAdd() instead to optimize the performance.

Apollo cache fails when query variables change

I'm using useLazyQuery() to get analytic data measured by a time filter. When the time filter change, the cache returns the same values despite the filter.
I'm using the default fetchPolicy (cache-first).
const [
getVisitorsAnalytics,
{ loading, data },
] = useAnalyticVisitorsLazyQuery()
const handleTimeFilterChange = (timeFilter: string) => {
getVisitorsAnalytics({
variables: {
input: {
timeFilter,
},
},
})
}
Thanks and regards!
I think the values are cached and the cache doesn't have the data for the new filter value passed -
const { data, loading } = useLazyQuery(QUERY, {
fetchPolicy: 'network-only'
});

Loopback custom password validation

very simple question: if I try to validate a password in a User model it seems I can only validate the already encrypted password?
So for example if I use
Customer.validatesLengthOf('password', { min: 8, message: 'Too short' })
Then the encrypted password is checked (which is always longer than 8 characters), so no good... If I try to use a custom validation, how can I get access to the original password (the original req.body.password basically)?
EDIT (August 20, 2019): I am unsure if this is still an issue in the latest loopback releases.
In fact, this is a known problem in loopback. The tacitly approved solution is to override the <UserModel>.validatePassword() method with your own. YMMV.
akapaul commented on Jan 10, 2017 •
I've found another way to do this. In common model User there is a
method called validatePassword. If we extend our UserModel from User,
we can redefine this method in JS, like following:
var g = require('loopback/lib/globalize');
module.exports = function(UserModel) {
UserModel.validatePassword = function(plain) {
var err,
passwordProperties = UserModel.definition.properties.password;
if (plain.length > passwordProperties.max) {
err = new Error (g.f('Password too long: %s (maximum %d symbols)', plain, passwordProperties.max));
err.code = 'PASSWORD_TOO_LONG';
} else if (plain.length < passwordProperties.min) {
err = new Error(g.f('Password too short: %s (minimum %d symbols)', plain, passwordProperties.min));
err.code = 'PASSWORD_TOO_SHORT';
} else if(!(new RegExp(passwordProperties.pattern, 'g').test(plain))) {
err = new Error(g.f('Invalid password: %s (symbols and numbers are allowed)', plain));
err.code = 'INVALID_PASSWORD';
} else {
return true;
}
err.statusCode = 422;
throw err;
};
};
This works for me. I don't think that g (globalize) object is required
here, but I added this, just in case. Also, I've added my validator
options in JSON definition of UserModel, because of Loopback docs
For using the above code, one would put their validation rules in the model's .json definition like so (see max, min, and pattern under properties.password):
{
"name": "UserModel",
"base": "User",
...
"properties": {
...
"password": {
"type": "string",
"required": true,
...
"max": 50,
"min": 8,
"pattern": "(?=.*[A-Z])(?=.*[!##$&*])(?=.*[0-9])(?=.*[a-z])^.*$"
},
...
},
...
}
ok, no answer so what I'm doing is using a remote hook to get access to the original plain password and that'll do for now.
var plainPwd
Customer.beforeRemote( 'create', function (ctx, inst, next) {
plainPwd = ctx.req.body.password
next()
})
Then I can use it in a custom validation:
Customer.validate( 'password', function (err, res) {
const pattern = new RegExp(/some-regex/)
if (plainPwd && ! pattern.test( plainPwd )) err()
}, { message: 'Invalid format' })
Ok I guess the above answer is quite novel and obviously is accepted, but If you want a real easy solution with just some basic validations done and not much code then loopback-mixin-complexity is the solution for you.
If you don't want to create another dependency then you can go ahead with a custom mixin, that you can add into your user model or any other model where you need some kind of validation and it would do the validation for you.
Here's a sample code for how to create such mixin
module.exports = function(Model, options) {
'use strict';
Model.observe('before save', function event(ctx, next) { //Observe any insert/update event on Model
if (ctx.instance) {
if(!yourValidatorFn(ctx.instance.password) )
next('password not valid');
else
next();
}
else {
if(!yourValidatorFn(ctx.data.password) )
next('password not valid');
else
next();
}
});
};

YUI DataTable Loading...but no data retrieved

I have a YUI datatable bound to a YUI datasource that needs to be auto-refreshed after a couple of seconds and also manually through a button. While I am able to read the data through a local datasource (datasource declared in the same page) I am not able to read it remotely. The grid remains "Data Loading..." even though the requests to the target page (yui_data.cfm) are being made at the set interval. The source code is the following:
Source code of yui_data.cfm (for testing) is the following:
{ "records": [ {"id": 31, "name":"4fruit", "price":8323, "number":231} ] }
Source code of the page requesting the data:
myDataSource = new YAHOO.util.XHRDataSource("yui_data.cfm?");
myDataSource.responseType = YAHOO.util.XHRDataSource.TYPE_JSON;
myDataSource.responseSchema = {
resultsList: "records",
fields: [
{key:"id", parser:"number"},
{key:"name"},
{key:"price",parser:"number"},
{key:"number",parser:"number"}
]
};
myDataTable = new YAHOO.widget.DataTable("dynamicdata", myColumnDefs, myDataSource);
myCallBack = {
success: myDataTable.onDataReturnSetRows,
failure: function() {
},
scope: myDataTable,
argument: myDataTable.getState()
}
myDataSource.setInterval(5000, null, myCallBack);
The above example only works when the line
myDataSource = new YAHOO.util.XHRDataSource("yui_data.cfm?");
is changed to:
myDataSource = new YAHOO.util.XHRDataSource(YAHOO.data.sample); // as an example!
I managed to fix the problem by preceding the previous JSON output with a ResultSet and Result and then modifying the response schema resultList to read from that path.

Resources