Optimised Macro to sort Column Order based on Headers - performance

Any thoughts on how to sort very large multi-column data via a macro. So for example, sorting all the columns (or perhaps just the selected adjacent columns?) by A-Z, or 0-9, etc. Similar to the Current single Column sorting options.

Assuming you want to sort all columns by looking at the first row of each column, I wrote a macro:
function SortOneColumn()
{
count = document.GetColumns();
arr = new Array(count);
for( i = 0; i < count; ++i ) {
s = document.GetCell( 1, i + 1, eeCellIncludeNone );
arr[i] = { name: s, col: i };
}
arr.sort( function( a, b ) {
var nameA = a.name.toLowerCase();
var nameB = b.name.toLowerCase();
if( nameA < nameB ) {
return -1;
}
if( nameA > nameB ) {
return 1;
}
return 0;
});
bSorted = false;
for( i = 0; i < count; ++i ) {
if( arr[i].col != i ) {
document.MoveColumn( arr[i].col + 1, arr[i].col + 1, i + 1 );
bSorted = true;
break;
}
}
return bSorted;
}
while( SortOneColumn() );
There is a potential for optimizing the macro even further, so please let me know if this is not fast enough.
To run this, save this code as, for instance, SortColumns.jsee, and then select this file from Select... in the Macros menu. Finally, select Run SortColumns.jsee in the Macros menu.

Related

Calculate and list how many different 4 digit numbers can be done with the numbers 0 to 9?

There is something I want to learn.
Let's say we have some single digit numbers.
Example: 1-2-3-4-5-6-7-8-9-0
Example II: 1-2-4-6-0
And with these numbers, we want to get 4-digit numbers that are different from each other.
And we want to print them as lists.
Result:
4676
4236
1247
1236
....
Is it possible to do this?
You can write and run a macro like this:
// retrieve the selected text
str = document.selection.Text;
// check the input string format. The input must be something like: "1-2-4-6-0"
if( str.length == 0 ) {
alert( "Select the input string" );
Quit();
}
for( i = 0; i < str.length; ++i ) {
c = str.substr( i, 1 );
if( i % 2 == 0 ) {
if( c < '0' || c > '9' ) {
alert( "not digit" );
Quit();
}
}
else {
if( c != '-' ) {
alert( "not separated by '-'" );
Quit();
}
}
}
var arr = new Array();
j = 0;
for( i = 0; i < str.length; ++i ) {
if( i % 2 == 0 ) {
c = str.substr( i, 1 );
arr[j++] = c;
}
}
if( arr.length < 4 ) {
alert( "Input string should contain at least 4 digits" );
Quit();
}
// list all 4-digit combinations
len = arr.length;
str = "";
for( i = 0; i < len; ++i ) {
for( j = 0; j < len; ++j ) {
for( k = 0; k < len; ++k ) {
for( l = 0; l < len; ++l ) {
str += arr[i] + arr[j] + arr[k] + arr[l] + "\r\n";
}
}
}
}
// write the list in a new document
editor.EnableTab = true;
editor.NewFile();
document.write( str );
To run this, save this code as, for instance, GenCombinations.jsee, and then select this file from Select... in the Macros menu. Finally, select Run GenCombinations.jsee in the Macros menu after selecting an input string.

Optimised EmEditor Macro to return Min/Max column lengths on large delimited data

I currently have large delimited data sets and I need to return the min\max lengths for each column.
I'm currently using the following code in Emeditor v20.3, which works great, but am wondering if there is a quicker way, particularly when there are million of lines of data and hundreds of columns (and this code is slow).
Any quicker approaches or ideas would that could be wrapped into a javascript macro would be much appreciated.
for( col = colStart; col <= MaxCol; col++ ) {
sTitle = document.GetCell( 1, col, eeCellIncludeNone );
min = -1;
max = 0;
for( line = document.HeadingLines + 1; line < MaxLines; line++ ) {
str = document.GetCell( line, col, eeCellIncludeQuotesAndDelimiter );
if( min == -1 || min > str.length ) {
min = str.length;
}
if( max < str.length ) {
max = str.length;
}
}
OutputBar.writeln( col + min + " " + max + " " + sTitle);
}
Please update EmEditor to 20.3.906 or later, and run this macro:
colStart = 1;
MaxCol = document.GetColumns();
document.selection.EndOfDocument();
yLastLine = document.selection.GetActivePointY( eePosCellLogical );
min = -1;
max = 0;
for( col = colStart; col <= MaxCol; col++ ) {
sTitle = document.GetCell( 1, col, eeCellIncludeNone );
document.selection.SetActivePoint( eePosCellLogical, col, 1 );
editor.ExecuteCommandByID( 4064 ); // Find Empty or Shortest Cell
y = document.selection.GetActivePointY( eePosCellLogical );
if( y < yLastLine ) { // check if not the last empty line
str = document.GetCell( y, col, eeCellIncludeQuotes );
min = str.length;
}
else { // if the last empty line
document.selection.SetActivePoint( eePosCellLogical, col, 1 );
editor.ExecuteCommandByID( 4050 ); // Find Non-empty Shortest Cell
y = document.selection.GetActivePointY( eePosCellLogical );
str = document.GetCell( y, col, eeCellIncludeQuotes );
min = str.length;
}
document.selection.SetActivePoint( eePosCellLogical, col, 1 );
editor.ExecuteCommandByID( 4049 ); // Find Longest Cell
y = document.selection.GetActivePointY( eePosCellLogical );
str = document.GetCell( y, col, eeCellIncludeQuotes );
max = str.length;
OutputBar.writeln( col + " : " + min + " " + max + " " + sTitle);
}

How to correct the code such that the loop stops when there is a data match

We have new entries being pulled into a Google sheet daily using XML (Sheet 1). We are using the code below to pull these entries into a separate Google sheet for end users (Sheet 2). The end users can then edit the data. We are trying to compare the ID numbers in Sheet 1 and Sheet 2. Each time we run the code, ID numbers from Sheet 1 that do not have a match in Sheet 2 should be added to Sheet 2. If the ID number in Sheet 1 already exists in Sheet 2, that ID number should be skipped. It isn't skipping the matches. Instead it is adding everything to Sheet 2 every time we run the code and Sheet 2 now contains duplicates.
for(var i = 1; i < slateDT.length; i ++) {
var bannerid = slateDT[i][0];
var match = "No Match";
var j = 1;
while(j < gradingDT.length && match == "No Match") {
var matchID = gradingDT[j][1].trim();
if(bannerid.trim() != matchID){
j++;
} else {
match = "Match";
}
}
if(match == "No Match"){
additions.push(moveColumns(slateDT[i]));
}
}
if(additions.length > 0) {
gradingSS.getRange(gradingDT.length + 1, 2, additions.length, additions[0].length).setValues(additions);
gradingDT = getDataValues(gradingSS.getName());
var sortRng = gradingSS.getRange(2, 1, gradingDT.length, gradingDT[0].length);
sortRng.sort(3);
}
function moveColumns(studentRow) {
studentRow.splice(17, 3);
var v = checkDefined(studentRow.splice(20, 1));
studentRow.splice(10, 0, v.join());
v = checkDefined(studentRow.splice(18, 1));
studentRow.splice(13, 0, v.join());
v = checkDefined(studentRow.splice(20));
studentRow.splice(14, 0, v.join());
return studentRow;
}
Ok, I'm assuming that your weird moveColumns function does what you want and that the column numbers mismatch per my question above. Replace your for loop with this:
for (var i = 0; i < slateDT.length; i++) {
var oldID = slateDT[i][0].trim();
var matchID = 0;
for (var j = 1; j < gradingDT.length; j++) {
var newID = gradingDT[j][1].trim();
if (oldID == newID) {
matchID = j;
break; //ends the j loop when it meets the match
}
} //for [j] loop
if (matchID == 0) {
additions.push(moveColumns(slateDT[i]));
Logger.log("No match was found for " + i);
} else {
Logger.log("A match was found for " + i + " at " + j);
}
} //for [i] loop
This is very similar to what you are trying to do with the while loop but without managing to never increment J under certain circumstances.
Once you are sure it is working comment out the two logger lines for performance.

Number of dict words containing each substring

I chanced upon a problem which requires user to count the number of matching words for N substrings in a list of K strings (each string has length M) for the following constraints:
0 < length of substring <= 100
0 < M <= 100
0 < N <= 10,000
0 < K <= 10,000
For example:
substring se for { serpent, lose, last } will yield 2.
Given the enormous bounds of this input, checking all K strings for each substring will be too expensive. KMP would not work for this reason. Preprocessing the strings with suffix tree would be the next better option. But I can't possibly create suffix tree for each word because it would again lead to the above problem. Even if I try to join all the words together, the problem is that I would not be able to detect substrings in the same word (e.g. substring s for {stools, sat} will yield 3).
Is there a more efficient way of solving this problem?
An approach can be to index each chars of the string you want to search in.
"serpent" will give :
s > {0}
e > {1, 4}
r > {2}
p > {3}
n > {5}
t > {6}
From that, for each word you search in this string, look in the dictionary for its first and last char.
If they are found, you'll need to look into the corresponding indexes to find a pair that can match the string length.
Once found, you can do a full string comparison but only in this case.
The code can be like this (c#):
static void Main( string[] args )
{
List<string> words = new List<string>() { "se", "s", "pen", "oo", "st" };
List<int> scores = new List<int>( words.Select( w => 0 ) );
List<string> strings = new List<string>() { "serpent", "lose", "last", "stools", "sat" };
foreach ( var s in strings )
{
var indexes = MakeIndexes( s );
for ( int i = 0 ; i < words.Count ; i++ )
{
scores[i] += Score( words[i], s, indexes );
}
}
}
static int Score( string word, string s, Dictionary<char, List<int>> indexes )
{
int firstPos = 0, lastPos = word.Length - 1;
char first = word[firstPos];
char last = word[lastPos];
List<int> firstIndexes;
if ( indexes.TryGetValue( first, out firstIndexes ) )
{
if ( firstPos == lastPos )
return 1;
else
{
List<int> lastIndexes;
if ( indexes.TryGetValue( last, out lastIndexes ) )
{
int fiPos = 0, liPos = 0;
while ( fiPos < firstIndexes.Count && liPos < lastIndexes.Count )
{
int fi = firstIndexes[fiPos], li = lastIndexes[liPos];
int len = li - fi;
if ( len < lastPos )
liPos++;
else if ( len == lastPos )
{
if ( FullCompare( word, s, fi ) )
return 1;
fiPos++;
liPos++;
}
else
fiPos++;
}
}
}
}
return 0;
}
static bool FullCompare( string word, string s, int from )
{
for ( int i = 0 ; i < word.Length ; i++ )
if ( word[i] != s[i + from] )
return false;
return true;
}
static Dictionary<char, List<int>> MakeIndexes( string s )
{
Dictionary<char, List<int>> result = new Dictionary<char, List<int>>();
for ( int i = 0 ; i < s.Length ; i++ )
{
char c = s[i];
List<int> indexes;
if ( result.TryGetValue( c, out indexes ) == false )
{
indexes = new List<int>();
result.Add( c, indexes );
}
indexes.Add( i );
}
return result;
}

For a given string which contains only digits , what's the optimal approach to return all valid ip address combinations?

Example:
Given “25525511135”
Output : [“255.255.11.135”, “255.255.111.35”]. (sorted order)
Kindly let me know if we could do a depth first search over here ?(that's the only thing striking me )
Why is it important to have an 'optimal' approach for answering this?
There are not many permutations so the simple approach of checking every combination that fits into the IP format and then filtering out those that have out of range numbers will easily work.
It's unlikely to be a bottle neck for whatever this is part of.
You probably want a dynamic programming algorithm for the general case (something like
http://www.geeksforgeeks.org/dynamic-programming-set-32-word-break-problem/).
Instead of testing whether prefixes can be segmented into words in the dictionary, you'd be testing to see whether the prefixes are prefixes of some valid IPv4 address.
Brutal DFS is acceptable in this problem:
class Solution{
private:
vector<string> ans;
int len;
string cur, rec, str;
bool IsOk(string s) {
if(s[0] == '0' && s.size() > 1) return false;
int sum = 0;
for(int i = 0; i < s.size(); i ++) {
if(s[i] == '.') return false;
sum = sum * 10 + s[i] - '0';
}
if(sum >= 0 && sum <= 255) return true;
return false;
}
void dfs(int x, int cnt) {
if(x == len) {
if(str.size() != len + 4) return ;
string tmp(str);
tmp.erase(tmp.size() - 1, 1);
if(cnt == 4) ans.push_back(tmp);
return ;
}
if(cnt > 4 || str.size() > len + 4) return ;
string tmp = cur;
cur += rec[x];
if(!IsOk(cur)) {
cur = tmp;
return ;
}
dfs(x + 1, cnt);
string tmp2 = cur + '.';
str += tmp2;
cur = "";
dfs(x + 1, cnt + 1);
str.erase(str.size() - tmp2.size(), tmp2.size());
cur = tmp;
}
public:
vector<string> restoreIpAddresses(string s) {
this->len = s.size();
this->rec = s;
cur = str = "";
ans.clear();
dfs(0, 0);
return ans;
}
};
Here is a recursive solution on JavaScript. The result is not sorted.
// Task from https://www.geeksforgeeks.org/program-generate-possible-valid-ip-addresses-given-string/
// Given a string containing only digits, restore it by returning all possible valid IP address combinations.
//
// Example:
// Input : 25525511135
// Output : [“255.255.11.135”, “255.255.111.35”]
//
(function () {
function getValidIP(str) {
const result = [];
const length = str.length;
check(0, 0, '');
function check(start, level, previous){
let i = 0;
let num;
if (level === 3) {
num = str.substring(start);
if (num && num < 256) {
result.push(`${previous}.${num}`);
}
return;
}
num = str.substring(start, start + 1);
if (num == 0) {
check(start + 1, level + 1, level === 0 ? `${num}`: `${previous}.${num}`);
} else {
while (num.length < 4 && num < 256 && start + i + 1 < length) {
check(start + i + 1, level + 1, level === 0 ? `${num}`: `${previous}.${num}`);
i++;
num = str.substring(start, start + i + 1);
}
}
}
return result;
}
console.log('12345:')
console.time('1-1');
console.log(getValidIP('12345'));
console.timeEnd('1-1');
console.log('1234:')
console.time('1-2');
console.log(getValidIP('1234'));
console.timeEnd('1-2');
console.log('2555011135:')
console.time('1-3');
console.log(getValidIP('2555011135'));
console.timeEnd('1-3');
console.log('222011135:')
console.time('1-4');
console.log(getValidIP('222011135'));
console.timeEnd('1-4');
})();

Resources