Optimised EmEditor Macro to return Min/Max column lengths on large delimited data - performance

I currently have large delimited data sets and I need to return the min\max lengths for each column.
I'm currently using the following code in Emeditor v20.3, which works great, but am wondering if there is a quicker way, particularly when there are million of lines of data and hundreds of columns (and this code is slow).
Any quicker approaches or ideas would that could be wrapped into a javascript macro would be much appreciated.
for( col = colStart; col <= MaxCol; col++ ) {
sTitle = document.GetCell( 1, col, eeCellIncludeNone );
min = -1;
max = 0;
for( line = document.HeadingLines + 1; line < MaxLines; line++ ) {
str = document.GetCell( line, col, eeCellIncludeQuotesAndDelimiter );
if( min == -1 || min > str.length ) {
min = str.length;
}
if( max < str.length ) {
max = str.length;
}
}
OutputBar.writeln( col + min + " " + max + " " + sTitle);
}

Please update EmEditor to 20.3.906 or later, and run this macro:
colStart = 1;
MaxCol = document.GetColumns();
document.selection.EndOfDocument();
yLastLine = document.selection.GetActivePointY( eePosCellLogical );
min = -1;
max = 0;
for( col = colStart; col <= MaxCol; col++ ) {
sTitle = document.GetCell( 1, col, eeCellIncludeNone );
document.selection.SetActivePoint( eePosCellLogical, col, 1 );
editor.ExecuteCommandByID( 4064 ); // Find Empty or Shortest Cell
y = document.selection.GetActivePointY( eePosCellLogical );
if( y < yLastLine ) { // check if not the last empty line
str = document.GetCell( y, col, eeCellIncludeQuotes );
min = str.length;
}
else { // if the last empty line
document.selection.SetActivePoint( eePosCellLogical, col, 1 );
editor.ExecuteCommandByID( 4050 ); // Find Non-empty Shortest Cell
y = document.selection.GetActivePointY( eePosCellLogical );
str = document.GetCell( y, col, eeCellIncludeQuotes );
min = str.length;
}
document.selection.SetActivePoint( eePosCellLogical, col, 1 );
editor.ExecuteCommandByID( 4049 ); // Find Longest Cell
y = document.selection.GetActivePointY( eePosCellLogical );
str = document.GetCell( y, col, eeCellIncludeQuotes );
max = str.length;
OutputBar.writeln( col + " : " + min + " " + max + " " + sTitle);
}

Related

Calculate and list how many different 4 digit numbers can be done with the numbers 0 to 9?

There is something I want to learn.
Let's say we have some single digit numbers.
Example: 1-2-3-4-5-6-7-8-9-0
Example II: 1-2-4-6-0
And with these numbers, we want to get 4-digit numbers that are different from each other.
And we want to print them as lists.
Result:
4676
4236
1247
1236
....
Is it possible to do this?
You can write and run a macro like this:
// retrieve the selected text
str = document.selection.Text;
// check the input string format. The input must be something like: "1-2-4-6-0"
if( str.length == 0 ) {
alert( "Select the input string" );
Quit();
}
for( i = 0; i < str.length; ++i ) {
c = str.substr( i, 1 );
if( i % 2 == 0 ) {
if( c < '0' || c > '9' ) {
alert( "not digit" );
Quit();
}
}
else {
if( c != '-' ) {
alert( "not separated by '-'" );
Quit();
}
}
}
var arr = new Array();
j = 0;
for( i = 0; i < str.length; ++i ) {
if( i % 2 == 0 ) {
c = str.substr( i, 1 );
arr[j++] = c;
}
}
if( arr.length < 4 ) {
alert( "Input string should contain at least 4 digits" );
Quit();
}
// list all 4-digit combinations
len = arr.length;
str = "";
for( i = 0; i < len; ++i ) {
for( j = 0; j < len; ++j ) {
for( k = 0; k < len; ++k ) {
for( l = 0; l < len; ++l ) {
str += arr[i] + arr[j] + arr[k] + arr[l] + "\r\n";
}
}
}
}
// write the list in a new document
editor.EnableTab = true;
editor.NewFile();
document.write( str );
To run this, save this code as, for instance, GenCombinations.jsee, and then select this file from Select... in the Macros menu. Finally, select Run GenCombinations.jsee in the Macros menu after selecting an input string.

Optimised Macro to sort Column Order based on Headers

Any thoughts on how to sort very large multi-column data via a macro. So for example, sorting all the columns (or perhaps just the selected adjacent columns?) by A-Z, or 0-9, etc. Similar to the Current single Column sorting options.
Assuming you want to sort all columns by looking at the first row of each column, I wrote a macro:
function SortOneColumn()
{
count = document.GetColumns();
arr = new Array(count);
for( i = 0; i < count; ++i ) {
s = document.GetCell( 1, i + 1, eeCellIncludeNone );
arr[i] = { name: s, col: i };
}
arr.sort( function( a, b ) {
var nameA = a.name.toLowerCase();
var nameB = b.name.toLowerCase();
if( nameA < nameB ) {
return -1;
}
if( nameA > nameB ) {
return 1;
}
return 0;
});
bSorted = false;
for( i = 0; i < count; ++i ) {
if( arr[i].col != i ) {
document.MoveColumn( arr[i].col + 1, arr[i].col + 1, i + 1 );
bSorted = true;
break;
}
}
return bSorted;
}
while( SortOneColumn() );
There is a potential for optimizing the macro even further, so please let me know if this is not fast enough.
To run this, save this code as, for instance, SortColumns.jsee, and then select this file from Select... in the Macros menu. Finally, select Run SortColumns.jsee in the Macros menu.

Confusion related to the time complexity of this algorithm

I was going through some of the articles of the leetcode. Here is one of them https://leetcode.com/articles/optimal-division/.
Given a list of positive integers, the adjacent integers will perform the float division. For example, [2,3,4] -> 2 / 3 / 4.
However, you can add any number of parenthesis at any position to change the priority of operations. You should find out how to add parenthesis to get the maximum result, and return the corresponding expression in string format. Your expression should NOT contain redundant parenthesis.
Example:
Input: [1000,100,10,2]
Output: "1000/(100/10/2)"
Explanation:
1000/(100/10/2) = 1000/((100/10)/2) = 200
However, the bold parenthesis in "1000/((100/10)/2)" are redundant,
since they don't influence the operation priority. So you should return "1000/(100/10/2)".
Other cases:
1000/(100/10)/2 = 50
1000/(100/(10/2)) = 50
1000/100/10/2 = 0.5
1000/100/(10/2) = 2
I think the time complexity of the solution is O(N^2) isn't it?
Here is the memoization solution
public class Solution {
class T {
float max_val, min_val;
String min_str, max_str;
}
public String optimalDivision(int[] nums) {
T[][] memo = new T[nums.length][nums.length];
T t = optimal(nums, 0, nums.length - 1, "", memo);
return t.max_str;
}
public T optimal(int[] nums, int start, int end, String res, T[][] memo) {
if (memo[start][end] != null)
return memo[start][end];
T t = new T();
if (start == end) {
t.max_val = nums[start];
t.min_val = nums[start];
t.min_str = "" + nums[start];
t.max_str = "" + nums[start];
memo[start][end] = t;
return t;
}
t.min_val = Float.MAX_VALUE;
t.max_val = Float.MIN_VALUE;
t.min_str = t.max_str = "";
for (int i = start; i < end; i++) {
T left = optimal(nums, start, i, "", memo);
T right = optimal(nums, i + 1, end, "", memo);
if (t.min_val > left.min_val / right.max_val) {
t.min_val = left.min_val / right.max_val;
t.min_str = left.min_str + "/" + (i + 1 != end ? "(" : "") + right.max_str + (i + 1 != end ? ")" : "");
}
if (t.max_val < left.max_val / right.min_val) {
t.max_val = left.max_val / right.min_val;
t.max_str = left.max_str + "/" + (i + 1 != end ? "(" : "") + right.min_str + (i + 1 != end ? ")" : "");
}
}
memo[start][end] = t;
return t;
}
}

For a given string which contains only digits , what's the optimal approach to return all valid ip address combinations?

Example:
Given “25525511135”
Output : [“255.255.11.135”, “255.255.111.35”]. (sorted order)
Kindly let me know if we could do a depth first search over here ?(that's the only thing striking me )
Why is it important to have an 'optimal' approach for answering this?
There are not many permutations so the simple approach of checking every combination that fits into the IP format and then filtering out those that have out of range numbers will easily work.
It's unlikely to be a bottle neck for whatever this is part of.
You probably want a dynamic programming algorithm for the general case (something like
http://www.geeksforgeeks.org/dynamic-programming-set-32-word-break-problem/).
Instead of testing whether prefixes can be segmented into words in the dictionary, you'd be testing to see whether the prefixes are prefixes of some valid IPv4 address.
Brutal DFS is acceptable in this problem:
class Solution{
private:
vector<string> ans;
int len;
string cur, rec, str;
bool IsOk(string s) {
if(s[0] == '0' && s.size() > 1) return false;
int sum = 0;
for(int i = 0; i < s.size(); i ++) {
if(s[i] == '.') return false;
sum = sum * 10 + s[i] - '0';
}
if(sum >= 0 && sum <= 255) return true;
return false;
}
void dfs(int x, int cnt) {
if(x == len) {
if(str.size() != len + 4) return ;
string tmp(str);
tmp.erase(tmp.size() - 1, 1);
if(cnt == 4) ans.push_back(tmp);
return ;
}
if(cnt > 4 || str.size() > len + 4) return ;
string tmp = cur;
cur += rec[x];
if(!IsOk(cur)) {
cur = tmp;
return ;
}
dfs(x + 1, cnt);
string tmp2 = cur + '.';
str += tmp2;
cur = "";
dfs(x + 1, cnt + 1);
str.erase(str.size() - tmp2.size(), tmp2.size());
cur = tmp;
}
public:
vector<string> restoreIpAddresses(string s) {
this->len = s.size();
this->rec = s;
cur = str = "";
ans.clear();
dfs(0, 0);
return ans;
}
};
Here is a recursive solution on JavaScript. The result is not sorted.
// Task from https://www.geeksforgeeks.org/program-generate-possible-valid-ip-addresses-given-string/
// Given a string containing only digits, restore it by returning all possible valid IP address combinations.
//
// Example:
// Input : 25525511135
// Output : [“255.255.11.135”, “255.255.111.35”]
//
(function () {
function getValidIP(str) {
const result = [];
const length = str.length;
check(0, 0, '');
function check(start, level, previous){
let i = 0;
let num;
if (level === 3) {
num = str.substring(start);
if (num && num < 256) {
result.push(`${previous}.${num}`);
}
return;
}
num = str.substring(start, start + 1);
if (num == 0) {
check(start + 1, level + 1, level === 0 ? `${num}`: `${previous}.${num}`);
} else {
while (num.length < 4 && num < 256 && start + i + 1 < length) {
check(start + i + 1, level + 1, level === 0 ? `${num}`: `${previous}.${num}`);
i++;
num = str.substring(start, start + i + 1);
}
}
}
return result;
}
console.log('12345:')
console.time('1-1');
console.log(getValidIP('12345'));
console.timeEnd('1-1');
console.log('1234:')
console.time('1-2');
console.log(getValidIP('1234'));
console.timeEnd('1-2');
console.log('2555011135:')
console.time('1-3');
console.log(getValidIP('2555011135'));
console.timeEnd('1-3');
console.log('222011135:')
console.time('1-4');
console.log(getValidIP('222011135'));
console.timeEnd('1-4');
})();

Inserting elements in a matrix spirally

Given a number x, insert elements 1 to x^2 in a matrix spirally.
e.g. For x = 3, matrix looks like [[1,2,3],[8,9,4],[7,6,5]].
For this I've written following snippet. However, I'm getting o/p as [[7,9,5],[7,9,5],[7,9,5]]
while(t<=b && l<=r){
System.out.print(t+" "+b+" "+l+" "+r+"\n");
if(dir==0){
for(int i = l;i<=r;i++){
arr.get(t).set(i,x);
x++;
}
t++;
}else if(dir==1){
for(int i = t;i<=b;i++){
arr.get(i).set(r,x);
x++;
}
r--;
}else if(dir==2){
for(int i = r;i>=l;i--){
arr.get(b).set(i,x);
x++;
}
b--;
}else if(dir==3){
for(int i = b;i>=t;i--){
arr.get(l).set(i,x);
x++;
}
l++;
}
dir = (dir+1)%4;
}
You can use the next code (which I developed for some implementation that handles huge martrix sizes). It will use width (columns) and height (rows) of any matrix size and produce the output you need
List<rec> BuildSpiralIndexList(long w, long h)
{
List<rec> result = new List<rec>();
long count = 0,dir = 1,phase = 0,pos = 0;
long length = 0,totallength = 0;
bool isVertical = false;
if ((w * h)<1) return null;
do
{
isVertical = (count % 2) != 0;
length = (isVertical ? h : w) - count / 2 - count % 2;
phase = (count / 4);
pos = (count % 4);
dir = pos > 1 ? -1 : 1;
for (int t = 0; t < length; t++)
// you can replace the next code with printing or any other action you need
result.Add(new rec()
{
X = ((pos == 2 || pos == 1) ? (w - 1 - phase - (pos == 2 ? 1 : 0)) : phase) + dir * (isVertical ? 0 : t),
Y = ((pos <= 1 ? phase + pos : (h - 1) - phase - pos / 3)) + dir * (isVertical ? t : 0),
Index = totallength + t
});
totallength += length;
count++;
} while (totallength < (w*h));
return result;
}
This solution walks from the top left to the top right, the top right to the bottom right, the bottom right to the bottom left and the bottom left up to the top left.
It is a tricky problem, hopefully my comments below assist in explaining.
Below is a codepen link to see it added to a table.
https://codepen.io/mitchell-boland/pen/rqdWPO
const n = 3; // Set this to a number
matrixSpiral(n);
function matrixSpiral(number){
// Will populate the outer array with n-times inner arrays
var outerArray = [];
for(var i = 0; i < number; i++){
outerArray.push([]);
}
var leftColumn = 0;
var rightColumn = number - 1;
var topRow = 0;
var bottomRow = number-1;
var counter = 1; // Used to track the number we are up to.
while(leftColumn <= rightColumn && topRow <=bottomRow){
// populate the top row
for(var i = leftColumn; i <= rightColumn; i++){
outerArray[leftColumn][i] = counter;
counter++;
}
// Top row is now populated
topRow ++;
// Populate the right column
for(var i = topRow ; i <= bottomRow; i++){
outerArray[i][rightColumn] = counter;
counter++;
}
// Right column now populated.
rightColumn--;
// Populate the bottom row
// We are going from the bottom right, to the bottom left
for(var i = rightColumn; i >= leftColumn; i--){
outerArray[bottomRow][i] = counter;
counter++;
}
// Bottom Row now populated
bottomRow--;
// Populate the left column
// We are going from bottom left, to top left
for(var i = bottomRow; i >= topRow ; i--){
outerArray[i][leftColumn] = counter;
counter++;
}
// Left column now populated.
leftColumn++;
// While loop will now repeat the above process, but a step in.
}
// Console log the results.
for(var i = 0; i < number; i++){
console.log(outerArray[i]);
}
}

Resources