Word wrap to X lines instead of maximum width (Least raggedness) - algorithm

Does anyone know a good algorithm to word wrap an input string to a specified number of lines rather than a set width. Basically to achieve the minimum width for X lines.
e.g. "I would like to be wrapped into two lines"
goes to
"I would like to be
wrapped into two lines"
"I would like to be wrapped into three lines"
goes to
"I would like to
be wrapped into
three lines"
Inserting new lines as required. I can find other word wrap questions but they all have a known width and want to insert as many lines as needed to fit that width. I am after the opposite.
Answers preferable in a .NET language but any language would be helpful. Obviously if there is a framework way to do this I am not aware of let me know.
Edit I have found this since which I think the accepted answer is the solution to my problem but am having difficulty understanding it. Algorithm to divide text into 3 evenly-sized groups any chance someone could convert it to c# or vb.net.

A way of solvng this problem would be using dynamic programming, You can solve this problem using dynamic programming, cf Minimum raggedness algorithm.
I used some of the informations you add when you eddited your post with :
Algorithm to divide text into 3 evenly-sized groups
Notations:
Let name your text document="word1 word2 .... wordp"
n= number of line required
LineWidth=len(document)/n
Cost function:
First you need to define a cost function of having word[i] to word[j] in the same line , you can take the same as the one as the one on wikipedia, with p=2 for example:
It represent the distance between the objective length of a line and the actual lenght.
The total cost function for the optimal solution can be defined with the following recursiion relation:
Solving the problem:
You can solve this problem using dynamic programming.
I took the code from the link you gave, and changed it a so you see what the program is using.
At stage k you add words to line k.
Then you look at the optimal cost of
having word i to j at line k.
Once you've gone from line 1 to n,
you tacke the smallest cost in the
last step and you have your optimal
result:
Here is the result from the code:
D=minragged('Just testing to see how this works.')
number of words: 7
------------------------------------
stage : 0
------------------------------------
word i to j in line 0 TotalCost (f(j))
------------------------------------
i= 0 j= 0 121.0
i= 0 j= 1 49.0
i= 0 j= 2 1.0
i= 0 j= 3 16.0
i= 0 j= 4 64.0
i= 0 j= 5 144.0
i= 0 j= 6 289.0
i= 0 j= 7 576.0
------------------------------------
stage : 1
------------------------------------
word i to j in line 1 TotalCost (f(j))
------------------------------------
i= 0 j= 0 242.0
i= 0 j= 1 170.0
i= 0 j= 2 122.0
i= 0 j= 3 137.0
i= 0 j= 4 185.0
i= 0 j= 5 265.0
i= 0 j= 6 410.0
i= 0 j= 7 697.0
i= 1 j= 2 65.0
i= 1 j= 3 50.0
i= 1 j= 4 58.0
i= 1 j= 5 98.0
i= 1 j= 6 193.0
i= 1 j= 7 410.0
i= 2 j= 4 26.0
i= 2 j= 5 2.0
i= 2 j= 6 17.0
i= 2 j= 7 122.0
i= 3 j= 7 80.0
------------------------------------
stage : 2
------------------------------------
word i to j in line 2 TotalCost (f(j))
------------------------------------
i= 0 j= 7 818.0
i= 1 j= 7 531.0
i= 2 j= 7 186.0
i= 3 j= 7 114.0
i= 4 j= 7 42.0
i= 5 j= 7 2.0
reversing list
------------------------------------
Just testing 12
to see how 10
this works. 11
*There fore the best choice is to have words 5 to 7 in last line.(cf
stage2)
then words 2 to 5 in second line (cf
stage1)
then words 0 to 2 in first line (cf
stage 0).*
Reverse this and you get:
Just testing 12
to see how 10
this works. 11
Here is the code to print the reasonning,(in python sorry I don't use C#...but I someone actually translated the code in C#) :
def minragged(text, n=3):
P=2
words = text.split()
cumwordwidth = [0]
# cumwordwidth[-1] is the last element
for word in words:
cumwordwidth.append(cumwordwidth[-1] + len(word))
totalwidth = cumwordwidth[-1] + len(words) - 1 # len(words) - 1 spaces
linewidth = float(totalwidth - (n - 1)) / float(n) # n - 1 line breaks
print "number of words:", len(words)
def cost(i, j):
"""
cost of a line words[i], ..., words[j - 1] (words[i:j])
"""
actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i])
return (linewidth - float(actuallinewidth)) ** P
"""
printing the reasoning and reversing the return list
"""
F={} # Total cost function
for stage in range(n):
print "------------------------------------"
print "stage :",stage
print "------------------------------------"
print "word i to j in line",stage,"\t\tTotalCost (f(j))"
print "------------------------------------"
if stage==0:
F[stage]=[]
i=0
for j in range(i,len(words)+1):
print "i=",i,"j=",j,"\t\t\t",cost(i,j)
F[stage].append([cost(i,j),0])
elif stage==(n-1):
F[stage]=[[float('inf'),0] for i in range(len(words)+1)]
for i in range(len(words)+1):
j=len(words)
if F[stage-1][i][0]+cost(i,j)<F[stage][j][0]: #calculating min cost (cf f formula)
F[stage][j][0]=F[stage-1][i][0]+cost(i,j)
F[stage][j][1]=i
print "i=",i,"j=",j,"\t\t\t",F[stage][j][0]
else:
F[stage]=[[float('inf'),0] for i in range(len(words)+1)]
for i in range(len(words)+1):
for j in range(i,len(words)+1):
if F[stage-1][i][0]+cost(i,j)<F[stage][j][0]:
F[stage][j][0]=F[stage-1][i][0]+cost(i,j)
F[stage][j][1]=i
print "i=",i,"j=",j,"\t\t\t",F[stage][j][0]
print 'reversing list'
print "------------------------------------"
listWords=[]
a=len(words)
for k in xrange(n-1,0,-1):#reverse loop from n-1 to 1
listWords.append(' '.join(words[F[k][a][1]:a]))
a=F[k][a][1]
listWords.append(' '.join(words[0:a]))
listWords.reverse()
for line in listWords:
print line, '\t\t',len(line)
return listWords

Here is the accepted solution from Algorithm to divide text into 3 evenly-sized groups converted to C#:
static List<string> Minragged(string text, int n = 3)
{
var words = text.Split();
var cumwordwidth = new List<int>();
cumwordwidth.Add(0);
foreach (var word in words)
cumwordwidth.Add(cumwordwidth[cumwordwidth.Count - 1] + word.Length);
var totalwidth = cumwordwidth[cumwordwidth.Count - 1] + words.Length - 1;
var linewidth = (double)(totalwidth - (n - 1)) / n;
var cost = new Func<int, int, double>((i, j) =>
{
var actuallinewidth = Math.Max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
return (linewidth - actuallinewidth) * (linewidth - actuallinewidth);
});
var best = new List<List<Tuple<double, int>>>();
var tmp = new List<Tuple<double, int>>();
best.Add(tmp);
tmp.Add(new Tuple<double, int>(0.0f, -1));
foreach (var word in words)
tmp.Add(new Tuple<double, int>(double.MaxValue, -1));
for (int l = 1; l < n + 1; ++l)
{
tmp = new List<Tuple<double, int>>();
best.Add(tmp);
for (int j = 0; j < words.Length + 1; ++j)
{
var min = new Tuple<double, int>(best[l - 1][0].Item1 + cost(0, j), 0);
for (int k = 0; k < j + 1; ++k)
{
var loc = best[l - 1][k].Item1 + cost(k, j);
if (loc < min.Item1 || (loc == min.Item1 && k < min.Item2))
min = new Tuple<double, int>(loc, k);
}
tmp.Add(min);
}
}
var lines = new List<string>();
var b = words.Length;
for (int l = n; l > 0; --l)
{
var a = best[l][b].Item2;
lines.Add(string.Join(" ", words, a, b - a));
b = a;
}
lines.Reverse();
return lines;
}

There was a discussion about this exact problem (though it was phrased in a different way) at http://www.perlmonks.org/?node_id=180276.
In the end the best solution was to do a binary search through all possible widths to find the smallest width that wound up with no more than the desired number of columns. If there are n items and the average width is m, then you'll need O(log(n) + log(m)) passes to find the right width, each of which takes O(n) time, for O(n * (log(n) + log(m))). This is probably fast enough with no more need to be clever.
If you wish to be clever, you can create an array of word counts, and cumulative lengths of the words. Then use binary searches on this data structure to figure out where the line breaks are. Creating this data structure is O(n), and it makes all of the passes to figure out the right width be O(log(n) * (log(n) + log(m))) which for reasonable lengths of words is dominated by your first O(n) pass.
If the widths of words can be floating point, you'll need to do something more clever with the binary searches, but you are unlikely to need that particular optimization.

btilly has the right answer here, but just for fun I decided to code up a solution in python:
def wrap_min_width(words, n):
r, l = [], ""
for w in words:
if len(w) + len(l) > n:
r, l = r + [l], ""
l += (" " if len(l) > 0 else "") + w
return r + [l]
def min_lines(phrase, lines):
words = phrase.split(" ")
hi, lo = sum([ len(w) for w in words ]), min([len(w) for w in words])
while lo < hi:
mid = lo + (hi-lo)/2
v = wrap_min_width(words, mid)
if len(v) > lines:
lo = mid + 1
elif len(v) <= lines:
hi = mid
return lo, "\n".join(wrap_min_width(words, lo))
Now this still may not be exactly what you want, since if it is possible to wrap the words in fewer than n lines using the same line width, it instead returns the smallest number of lines encoding. (Of course you can always add extra empty lines, but it is a bit silly.) If I run it on your test case, here is what I get:
Case: "I would like to be wrapped into three lines", 3 lines
Result: 14 chars/line
I would like to
be wrapped into
three lines

I just thought of an approach:
You can write a function accepting two parameters 1. String 2. Number of lines
Get the length of the string (String.length if using C#).
Divide the length by number of lines (lets say the result is n)
Now start a loop and access each character of the string (using string[i])
Insert a '\n\r' after every nth occurrence in the array of characters.
In the loop maintain a temp string array which would be null if there is a blank character(maintaining each word).
If there is a nth occurrence and temp string is not null then insert '\n\r' after that temp string.

I'll assume you're trying to minimize the maximum width of a string with n breaks. This can be done in O(words(str)*n) time and space using dynamic programming or recursion with memoziation.
The recurrence would look like this where the word has been split in to words
def wordwrap(remaining_words, n):
if n > 0 and len(remaining_words)==0:
return INFINITY #we havent chopped enough lines
if n == 0:
return len(remaining_words.join(' ')) # rest of the string
best = INFINITY
for i in range remaining_words:
# split here
best = min( max(wordwrap( remaining_words[i+1:], n-1),remaining_words[:i].join(' ')), best )
return best

I converted the C# accepted answer to JavaScript for something I was working on. Posting it here might save someone a few minutes of doing it themselves.
function WrapTextWithLimit(text, n) {
var words = text.toString().split(' ');
var cumwordwidth = [0];
words.forEach(function(word) {
cumwordwidth.push(cumwordwidth[cumwordwidth.length - 1] + word.length);
});
var totalwidth = cumwordwidth[cumwordwidth.length - 1] + words.length - 1;
var linewidth = (totalwidth - (n - 1.0)) / n;
var cost = function(i, j) {
var actuallinewidth = Math.max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
return (linewidth - actuallinewidth) * (linewidth - actuallinewidth);
};
var best = [];
var tmp = [];
best.push(tmp);
tmp.push([0.0, -1]);
words.forEach(function(word) {
tmp.push([Number.MAX_VALUE, -1]);
});
for (var l = 1; l < n + 1; ++l)
{
tmp = [];
best.push(tmp);
for (var j = 0; j < words.length + 1; ++j)
{
var min = [best[l - 1][0][0] + cost(0, j), 0];
for (var k = 0; k < j + 1; ++k)
{
var loc = best[l - 1][k][0] + cost(k, j);
if (loc < min[0] || (loc === min[0] && k < min[1])) {
min = [loc, k];
}
}
tmp.push(min);
}
}
var lines = [];
var b = words.length;
for (var p = n; p > 0; --p) {
var a = best[p][b][1];
lines.push(words.slice(a, b).join(' '));
b = a;
}
lines.reverse();
return lines;
}

This solution improves on Mikola's.
It's better because
It doesn't use strings. You don't need to use strings and concatenate them. You just need an array of their lengths. So, because of this it's faster, also you can use this method with any kind of "element" - you just need the widths.
There was some unnecessary processing in the wrap_min_width function. It just kept going even when it went beyond the point of failure. Also, it just builds the string unnecessarily.
Added the "separator width" as an adjustable parameter.
It calculates the min width - which is really what you want.
Fixed some bugs.
This is written in Javascript:
// For testing calcMinWidth
var formatString = function (str, nLines) {
var words = str.split(" ");
var elWidths = words.map(function (s, i) {
return s.length;
});
var width = calcMinWidth(elWidths, 1, nLines, 0.1);
var format = function (width)
{
var lines = [];
var curLine = null;
var curLineLength = 0;
for (var i = 0; i < words.length; ++i) {
var word = words[i];
var elWidth = elWidths[i];
if (curLineLength + elWidth > width)
{
lines.push(curLine.join(" "));
curLine = [word];
curLineLength = elWidth;
continue;
}
if (i === 0)
curLine = [word];
else
{
curLineLength += 1;
curLine.push(word);
}
curLineLength += elWidth;
}
if (curLine !== null)
lines.push(curLine.join(" "));
return lines.join("\n");
};
return format(width);
};
var calcMinWidth = function (elWidths, separatorWidth, lines, tolerance)
{
var testFit = function (width)
{
var nCurLine = 1;
var curLineLength = 0;
for (var i = 0; i < elWidths.length; ++i) {
var elWidth = elWidths[i];
if (curLineLength + elWidth > width)
{
if (elWidth > width)
return false;
if (++nCurLine > lines)
return false;
curLineLength = elWidth;
continue;
}
if (i > 0)
curLineLength += separatorWidth;
curLineLength += elWidth;
}
return true;
};
var hi = 0;
var lo = null;
for (var i = 0; i < elWidths.length; ++i) {
var elWidth = elWidths[i];
if (i > 0)
hi += separatorWidth;
hi += elWidth;
if (lo === null || elWidth > lo)
lo = elWidth;
}
if (lo === null)
lo = 0;
while (hi - lo > tolerance)
{
var guess = (hi + lo) / 2;
if (testFit(guess))
hi = guess;
else
lo = guess;
}
return hi;
};

Related

Count the number of ways a given sequence of digits can be split with each part at most K

Hi guys I'm practicing dynamic programming and came across the following problem:
Given a number K, 0 <= K <= 10^100, a sequence of digits N, what is the number of possible ways of dividing N so that each part is at most K?
Input:
K = 8
N = 123
Output: 1
Explanation:
123
1-23
12-3
1-2-3
Are all possibilities of spliting N and only the last one is valid...
What I have achieved so far:
Let Dp[i] = the number of valid ways of dividing N, using i first digits.
Given a state, i must use the previous answer to compute new answers, we have 2 possibilities:
Use dp[i-1] + number of valid ways that split the digit i
Use dp[i-1] + number of valid ways that not split the digit i
But I'm stuck there and I don't know what to do
Thanks
Using dynamic programming implies that you need to think about the problem in terms of subproblems.
Let's denote by N[i...] the suffix of N starting at index i (for instance, with N = 45678955, we have N[3...] = 78955)
Let's denote by dp[i] the number of possible ways of dividing N[i...] so that each part is at most K.
We will also use a small function, max_part_len(N, K, i) which will represent the maximum length of a 'part' starting at i. For instance, with N = 45678955, K = 37, i = 3, we have max_part_len(N, K, i) = 1 because 7 < 37 but 78 > 37.
Now we can write the recurrence (or induction) relation on dp[i].
dp[i] = sum_(j from 1 to max_part_len(N, K, i)) dp[i+j]
This relation means that the the number of possible ways of dividing N[i...] so that each part is at most K, is:
The sum of the the number of possible ways of dividing N[i+j...] so that each part is at most K, for each j such that N[i...j] <= k.
From there the algorithm is quite straight forward if you understood the basics of dynamic programming, I leave this part to you ;-)
I think we can also use divide and conquer. Let f(l, r) represent the number of ways to divide the range of digits indexed from l to r, so that each part is at most k. Then divide the string, 45678955 in two:
4567 8955
and the result would be
f(4567) * f(8955)
plus a division with a part that includes at least one from each side of the split, so each left extension paired with all right extensions. Say k was 1000. Then
f(456) * 1 * f(955) + // 78
f(456) * 1 * f(55) + // 789
f(45) * 1 * f(955) // 678
where each one of the calls to f performs a similar divide and conquer.
Here's JavaScript code comparing a recursive (top-down) implementation of m.raynal's algorithm with this divide and conquer:
function max_part_len(N, K, i){
let d = 0;
let a = 0;
while (a <= K && d <= N.length - i){
d = d + 1;
a = Number(N.substr(i, d));
}
return d - 1;
}
// m.raynal's algorithm
function f(N, K, i, memo={}){
let key = String([N, i])
if (memo.hasOwnProperty(key))
return memo[key];
if (i == N.length)
return 1
if (i == N.length - 1)
return (Number(N[i]) <= K) & 1
let s = 0;
for (let j=1; j<=max_part_len(N, K, i); j++)
s = s + f(N, K, i + j, memo);
return memo[key] = s;
}
// divide and conquer
function g(N, K, memo={}){
if (memo.hasOwnProperty(N))
return memo[N];
if (!N)
return memo[N] = 1;
if (N.length == 1)
return memo[N] = (Number(N) <= K) & 1;
let mid = Math.floor(N.length / 2);
let left = g(N.substr(0, mid), K);
let right = g(N.substr(mid), K);
let s = 0;
let i = mid - 1;
let j = mid;
let str = N.substring(i, j + 1);
while (i >= 0 && Number(str) <= K){
if (j == N.length){
if (i == 0){
break;
} else{
i = i - 1;
j = mid;
str = N.substring(i, j + 1);
continue
}
}
let l = g(N.substring(0, i), K, memo);
let r = g(N.substring(j + 1, N.length, memo), K);
s = s + l * r;
j = j + 1;
str = N.substring(i, j + 1);
if (Number(str) > K){
j = mid;
i = i - 1;
str = N.substring(i, j + 1);
}
}
return memo[N] = left * right + s;
}
let start = new Date;
for (let i=5; i<100000; i++){
let k = Math.ceil(Math.random() * i)
let ii = String(i);
let ff = f(ii, k, 0);
}
console.log(`Running f() 100,000 times took ${ (new Date - start)/1000 } sec`)
start = new Date;
for (let i=5; i<100000; i++){
let k = Math.ceil(Math.random() * i)
let ii = String(i);
let gg = g(ii, k);
}
console.log(`Running g() 100,000 times took ${ (new Date - start)/1000 } sec`)
start = new Date;
for (let i=5; i<100000; i++){
let k = Math.ceil(Math.random() * i)
let ii = String(i);
let ff = f(ii, k, 0);
let gg = g(ii, k);
if (ff != gg){
console.log("Mismatch found.", ii, k, ff, gg);
break;
}
}
console.log(`No discrepancies found between f() and g(). ${ (new Date - start)/1000 } sec`)

What AI technique or something else should I use on this logic problem?

for didactic purposes I'm trying to complete this challenge:
A random 3 digit number is generated (1-9) the Char sequence is DESC, for exemple "123" is a invalid number generated cause 1 > 2 and 2 > 3;
"321" is valid.
when you try to guess the number it returns C for correct Char in exact placement and N for correct Char in wrong placement, eg:
randomly generated "961"
algorithm try "321" and XPTO returns C = 1 and N = 0
algorithm try "654" and XPTO returns C = 0 and N = 1
The objective is get C=3 efficiently with less tries as possible, don't even know what kind of AI technique I need to learn about, Any tip or recommendation?
After the hint of #juvian I built an js solution
First create a List:
function generateList(){
List = new Array();
for (i = 1; i <= 7; i++) {
for (j = i+1; j <= 8; j++) {
for (k = j+1; k <= 9; k++) {
List.push(""+k+j+i);}}}
WriteResult(List.length,List[0]);
}
Then make the tests and exclude from List the "not possible" combinations
for( var i = List.length-1; i >= 0; i--){
var C = Number(document.getElementById("C").value);
var N = Number(document.getElementById("N").value);
var Guess = ""+Number(document.getElementById("Guess").value);
var CountC = 0
var CountN = 0
for( var j = 0; j < 3; j++)
{
if (Guess.substr(j, 1) == List[i].substr(j, 1)) {CountC++;}
else if (List[i].split(Guess.substr(j, 1)).length > 1) {CountN++;}
}
console.log(List[i]+" C:"+CountC+" N:"+CountN);
if (CountC != C || CountN != N) {List.splice(i, 1); }
}
WriteResult(List.length,List[0]);
}

Hungarian Algorithm: finding minimum number of lines to cover zeroes?

I am trying to implement the Hungarian Algorithm but I am stuck on the step 5. Basically, given a n X n matrix of numbers, how can I find minimum number of vertical+horizontal lines such that the zeroes in the matrix are covered?
Before someone marks this question as a duplicate of this, the solution mentioned there is incorrect and someone else also ran into the bug in the code posted there.
I am not looking for code but rather the concept by which I can draw these lines...
EDIT:
Please do not post the simple (but wrong) greedy algorithm:
Given this input:
(0, 1, 0, 1, 1)
(1, 1, 0, 1, 1)
(1, 0, 0, 0, 1)
(1, 1, 0, 1, 1)
(1, 0, 0, 1, 0)
I select, column 2 obviously (0-indexed):
(0, 1, x, 1, 1)
(1, 1, x, 1, 1)
(1, 0, x, 0, 1)
(1, 1, x, 1, 1)
(1, 0, x, 1, 0)
Now I can either select row 2 or col 1 both of which have two "remaining" zeroes. If I select col2, I end up with incorrect solution down this path:
(0, x, x, 1, 1)
(1, x, x, 1, 1)
(1, x, x, 0, 1)
(1, x, x, 1, 1)
(1, x, x, 1, 0)
The correct solution is using 4 lines:
(x, x, x, x, x)
(1, 1, x, 1, 1)
(x, x, x, x, x)
(1, 1, x, 1, 1)
(x, x, x, x, x)
Update
I have implemented the Hungarian Algorithm in the same steps provided by the link you posted: Hungarian algorithm
Here's the files with comments:
Github
Algorithm (Improved greedy) for step 3: (This code is very detailed and good for understanding the concept of choosing line to draw: horizontal vs Vertical. But note that this step code is improved in my code in Github)
Calculate the max number of zeros vertically vs horizontally for each xy position in the input matrix and store the result in a separate array called m2.
While calculating, if horizontal zeros > vertical zeroes, then the calculated number is converted to negative. (just to distinguish which direction we chose for later use)
Loop through all elements in the m2 array. If the value is positive, draw a vertical line in array m3, if value is negative, draw an horizontal line in m3
Follow the below example + code to understand more the algorithm:
Create 3 arrays:
m1: First array, holds the input values
m2: Second array, holds maxZeroes(vertical,horizontal) at each x,y position
m3: Third array, holds the final lines (0 index uncovered, 1 index covered)
Create 2 functions:
hvMax(m1,row,col); returns maximum number of zeroes horizontal or vertical. (Positive number means vertical, negative number means horizontal)
clearNeighbours(m2, m3,row,col); void method, it will clear the horizontal neighbors if the value at row col indexes is negative, or clear vertical neighbors if positive. Moreover, it will set the line in the m3 array, by flipping the zero bit to 1.
Code
public class Hungarian {
public static void main(String[] args) {
// m1 input values
int[][] m1 = { { 0, 1, 0, 1, 1 }, { 1, 1, 0, 1, 1 }, { 1, 0, 0, 0, 1 },
{ 1, 1, 0, 1, 1 }, { 1, 0, 0, 1, 0 } };
// int[][] m1 = { {13,14,0,8},
// {40,0,12,40},
// {6,64,0,66},
// {0,1,90,0}};
// int[][] m1 = { {0,0,100},
// {50,100,0},
// {0,50,50}};
// m2 max(horizontal,vertical) values, with negative number for
// horizontal, positive for vertical
int[][] m2 = new int[m1.length][m1.length];
// m3 where the line are drawen
int[][] m3 = new int[m1.length][m1.length];
// loop on zeroes from the input array, and sotre the max num of zeroes
// in the m2 array
for (int row = 0; row < m1.length; row++) {
for (int col = 0; col < m1.length; col++) {
if (m1[row][col] == 0)
m2[row][col] = hvMax(m1, row, col);
}
}
// print m1 array (Given input array)
System.out.println("Given input array");
for (int row = 0; row < m1.length; row++) {
for (int col = 0; col < m1.length; col++) {
System.out.print(m1[row][col] + "\t");
}
System.out.println();
}
// print m2 array
System.out
.println("\nm2 array (max num of zeroes from horizontal vs vertical) (- for horizontal and + for vertical)");
for (int row = 0; row < m1.length; row++) {
for (int col = 0; col < m1.length; col++) {
System.out.print(m2[row][col] + "\t");
}
System.out.println();
}
// Loop on m2 elements, clear neighbours and draw the lines
for (int row = 0; row < m1.length; row++) {
for (int col = 0; col < m1.length; col++) {
if (Math.abs(m2[row][col]) > 0) {
clearNeighbours(m2, m3, row, col);
}
}
}
// prinit m3 array (Lines array)
System.out.println("\nLines array");
for (int row = 0; row < m1.length; row++) {
for (int col = 0; col < m1.length; col++) {
System.out.print(m3[row][col] + "\t");
}
System.out.println();
}
}
// max of vertical vs horizontal at index row col
public static int hvMax(int[][] m1, int row, int col) {
int vertical = 0;
int horizontal = 0;
// check horizontal
for (int i = 0; i < m1.length; i++) {
if (m1[row][i] == 0)
horizontal++;
}
// check vertical
for (int i = 0; i < m1.length; i++) {
if (m1[i][col] == 0)
vertical++;
}
// negative for horizontal, positive for vertical
return vertical > horizontal ? vertical : horizontal * -1;
}
// clear the neighbors of the picked largest value, the sign will let the
// app decide which direction to clear
public static void clearNeighbours(int[][] m2, int[][] m3, int row, int col) {
// if vertical
if (m2[row][col] > 0) {
for (int i = 0; i < m2.length; i++) {
if (m2[i][col] > 0)
m2[i][col] = 0; // clear neigbor
m3[i][col] = 1; // draw line
}
} else {
for (int i = 0; i < m2.length; i++) {
if (m2[row][i] < 0)
m2[row][i] = 0; // clear neigbor
m3[row][i] = 1; // draw line
}
}
m2[row][col] = 0;
m3[row][col] = 1;
}
}
Output
Given input array
0 1 0 1 1
1 1 0 1 1
1 0 0 0 1
1 1 0 1 1
1 0 0 1 0
m2 array (max num of zeroes from horizontal vs vertical) (- for horizontal and + for vertical)
-2 0 5 0 0
0 0 5 0 0
0 -3 5 -3 0
0 0 5 0 0
0 -3 5 0 -3
Lines array
1 1 1 1 1
0 0 1 0 0
1 1 1 1 1
0 0 1 0 0
1 1 1 1 1
PS: Your example that you pointed to, will never occur because as you can see the first loop do the calculations by taking the max(horizontal,vertical) and save them in m2. So col1 will not be selected because -3 means draw horizontal line, and -3 was calculated by taking the max between horizontal vs vertical zeros. So at the first iteration at the elements, the program has checked how to draw the lines, on the second iteration, the program draw the lines.
Greedy algorithms may not work for some cases.
Firstly, it is possible reformulate your problem as following: given a bipartite graph, find a minimum vertex cover. In this problem there are 2n nodes, n for rows and n for columns. There is an edge between two nodes if element at the intersection of corresponding column and row is zero. Vertex cover is a set of nodes (rows and columns) such that each edge is incident to some node from that set (each zero is covered by row or column).
This is a well known problem and can be solved in O(n^3) by finding a maximum matching. Check wikipedia for details
There are cases where Amir's code fails.
Consider the following m1:
0 0 1
0 1 1
1 0 1
The best solution is to draw vertical lines in the first two columns.
Amir's code would give the following m2:
-2 -2 0
2 0 0
0 2 0
And the result would draw the two vertical lines AS WELL AS a line in the first row.
It seems to me the problem is the tie-breaking case:
return vertical > horizontal ? vertical : horizontal * -1;
Because of the way the code is written, the very similar m1 will NOT fail:
0 1 1
1 0 1
0 0 1
Where the first row is moved to the bottom, because the clearing function will clear the -2 values from m2 before those cells are reached. In the first case, the -2 values are hit first, so a horizontal line is drawn through the first row.
I've been working a little through this, and this is what I have. In the case of a tie, do not set any value and do not draw a line through those cells. This covers the case of the matrix I mentioned above, we are done at this step.
Clearly, there are situations where there will remain 0s that are uncovered. Below is another example of a matrix that will fail in Amir's method (m1):
0 0 1 1 1
0 1 0 1 1
0 1 1 0 1
1 1 0 0 1
1 1 1 1 1
The optimal solution is four lines, e.g. the first four columns.
Amir's method gives m2:
3 -2 0 0 0
3 0 -2 0 0
3 0 0 -2 0
0 0 -2 -2 0
0 0 0 0 0
Which will draw lines at the first four rows and the first column (an incorrect solution, giving 5 lines). Again, the tie-breaker case is the issue. We solve this by not setting a value for the ties, and iterating the procedure.
If we ignore the ties we get an m2:
3 -2 0 0 0
3 0 0 0 0
3 0 0 0 0
0 0 0 0 0
0 0 0 0 0
This leads to covering only the first row and the first column. We then take out the 0s that are covered to give new m1:
1 1 1 1 1
1 1 0 1 1
1 1 1 0 1
1 1 0 0 1
1 1 1 1 1
Then we keep repeating the procedure (ignoring ties) until we reach a solution. Repeat for a new m2:
0 0 0 0 0
0 0 2 0 0
0 0 0 2 0
0 0 0 0 0
0 0 0 0 0
Which leads to two vertical lines through the second and third columns. All 0s are now covered, needing only four lines (this is an alternative to lining the first four columns). The above matrix only needs 2 iterations, and I imagine most of these cases will need only two iterations unless there are sets of ties nested within sets of ties. I tried to come up with one, but it became difficult to manage.
Sadly, this is not good enough, because there will be cases that will remain tied forever. Particularly, in cases where there is a 'disjoint set of tied cells'. Not sure how else to describe this except to draw the following two examples:
0 0 1 1
0 1 1 1
1 0 1 1
1 1 1 0
or
0 0 1 1 1
0 1 1 1 1
1 0 1 1 1
1 1 1 0 0
1 1 1 0 0
The upper-left 3x3 sub-matrices in these two examples are identical to my original example, I have added 1 or 2 rows/cols to that example at the bottom and right. The only newly added zeros are where the new rows and columns cross. Describing for clarity.
With the iterative method I described, these matrices will be caught in an infinite loop. The zeros will always remain tied (col-count vs row-count). At this point, it does make sense to just arbitrarily choose a direction in the case of a tie, at least from what I can imagine.
The only issue I'm running into is setting up the stopping criteria for the loop. I can't assume that 2 iterations is enough (or any n), but I also can't figure out how to detect if a matrix has only infinite loops left within it. I'm still not sure how to describe these disjoint-tied-sets computationally.
Here is the code to do what I have come up with so far (in MATLAB script):
function [Lines, AllRows, AllCols] = FindMinLines(InMat)
%The following code finds the minimum set of lines (rows and columns)
%required to cover all of the true-valued cells in a matrix. If using for
%the Hungarian problem where 'true-values' are equal to zero, make the
%necessary changes. This code is not complete, since it will be caught in
%an infinite loop in the case of disjoint-tied-sets
%If passing in a matrix where 0s are the cells of interest, uncomment the
%next line
%InMat = InMat == 0;
%Assume square matrix
Count = length(InMat);
Lines = zeros(Count);
%while there are any 'true' values not covered by lines
while any(any(~Lines & InMat))
%Calculate row-wise and col-wise totals of 'trues' not-already-covered
HorzCount = repmat(sum(~Lines & InMat, 2), 1, Count).*(~Lines & InMat);
VertCount = repmat(sum(~Lines & InMat, 1), Count, 1).*(~Lines & InMat);
%Calculate for each cell the difference between row-wise and col-wise
%counts. I.e. row-oriented cells will have a negative number, col-oriented
%cells will have a positive numbers, ties and 'non-trues' will be 0.
%Non-zero values indicate lines to be drawn where orientation is determined
%by sign.
DiffCounts = VertCount - HorzCount;
%find the row and col indices of the lines
HorzIdx = any(DiffCounts < 0, 2);
VertIdx = any(DiffCounts > 0, 1);
%Set the horizontal and vertical indices of the Lines matrix to true
Lines(HorzIdx, :) = true;
Lines(:, VertIdx) = true;
end
%compute index numbers to be returned.
AllRows = [find(HorzIdx); find(DisjTiedRows)];
AllCols = find(VertIdx);
end
Step 5:
The drawing of line in the matrix is evaluated diagonally with a maximum evaluations of the length of the matrix.
Based on http://www.wikihow.com/Use-the-Hungarian-Algorithm with Steps 1 - 8 only.
Run code snippet and see results in console
Console Output
horizontal line (row): {"0":0,"2":2,"4":4}
vertical line (column): {"2":2}
Step 5: Matrix
0 1 0 1 1
1 1 0 1 1
1 0 0 0 1
1 1 0 1 1
1 0 0 1 0
Smallest number in uncovered matrix: 1
Step 6: Matrix
x x x x x
1 1 x 1 1
x x x x x
1 1 x 1 1
x x x x x
JSFiddle: http://jsfiddle.net/jjcosare/6Lpz5gt9/2/
// http://www.wikihow.com/Use-the-Hungarian-Algorithm
var inputMatrix = [
[0, 1, 0, 1, 1],
[1, 1, 0, 1, 1],
[1, 0, 0, 0, 1],
[1, 1, 0, 1, 1],
[1, 0, 0, 1, 0]
];
//var inputMatrix = [
// [10, 19, 8, 15],
// [10, 18, 7, 17],
// [13, 16, 9, 14],
// [12, 19, 8, 18],
// [14, 17, 10, 19]
// ];
var matrix = inputMatrix;
var HungarianAlgorithm = {};
HungarianAlgorithm.step1 = function(stepNumber) {
console.log("Step " + stepNumber + ": Matrix");
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
var sb = "";
for (var j = 0; j < matrix[i].length; j++) {
currentNumber = matrix[i][j];
sb += currentNumber + " ";
}
console.log(sb);
}
}
HungarianAlgorithm.step2 = function() {
var largestNumberInMatrix = getLargestNumberInMatrix(matrix);
var rowLength = matrix.length;
var columnLength = matrix[0].length;
var dummyMatrixToAdd = 0;
var isAddColumn = rowLength > columnLength;
var isAddRow = columnLength > rowLength;
if (isAddColumn) {
dummyMatrixToAdd = rowLength - columnLength;
for (var i = 0; i < rowLength; i++) {
for (var j = columnLength; j < (columnLength + dummyMatrixToAdd); j++) {
matrix[i][j] = largestNumberInMatrix;
}
}
} else if (isAddRow) {
dummyMatrixToAdd = columnLength - rowLength;
for (var i = rowLength; i < (rowLength + dummyMatrixToAdd); i++) {
matrix[i] = [];
for (var j = 0; j < columnLength; j++) {
matrix[i][j] = largestNumberInMatrix;
}
}
}
HungarianAlgorithm.step1(2);
console.log("Largest number in matrix: " + largestNumberInMatrix);
function getLargestNumberInMatrix(matrix) {
var largestNumberInMatrix = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
for (var j = 0; j < matrix[i].length; j++) {
currentNumber = matrix[i][j];
largestNumberInMatrix = (largestNumberInMatrix > currentNumber) ?
largestNumberInMatrix : currentNumber;
}
}
return largestNumberInMatrix;
}
}
HungarianAlgorithm.step3 = function() {
var smallestNumberInRow = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
smallestNumberInRow = getSmallestNumberInRow(matrix, i);
console.log("Smallest number in row[" + i + "]: " + smallestNumberInRow);
for (var j = 0; j < matrix[i].length; j++) {
currentNumber = matrix[i][j];
matrix[i][j] = currentNumber - smallestNumberInRow;
}
}
HungarianAlgorithm.step1(3);
function getSmallestNumberInRow(matrix, rowIndex) {
var smallestNumberInRow = matrix[rowIndex][0];
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[rowIndex][i];
smallestNumberInRow = (smallestNumberInRow < currentNumber) ?
smallestNumberInRow : currentNumber;
}
return smallestNumberInRow;
}
}
HungarianAlgorithm.step4 = function() {
var smallestNumberInColumn = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
smallestNumberInColumn = getSmallestNumberInColumn(matrix, i);
console.log("Smallest number in column[" + i + "]: " + smallestNumberInColumn);
for (var j = 0; j < matrix[i].length; j++) {
currentNumber = matrix[j][i];
matrix[j][i] = currentNumber - smallestNumberInColumn;
}
}
HungarianAlgorithm.step1(4);
function getSmallestNumberInColumn(matrix, columnIndex) {
var smallestNumberInColumn = matrix[0][columnIndex];
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[i][columnIndex];
smallestNumberInColumn = (smallestNumberInColumn < currentNumber) ?
smallestNumberInColumn : currentNumber;
}
return smallestNumberInColumn;
}
}
var rowLine = {};
var columnLine = {};
HungarianAlgorithm.step5 = function() {
var zeroNumberCountRow = 0;
var zeroNumberCountColumn = 0;
rowLine = {};
columnLine = {};
for (var i = 0; i < matrix.length; i++) {
zeroNumberCountRow = getZeroNumberCountInRow(matrix, i);
zeroNumberCountColumn = getZeroNumberCountInColumn(matrix, i);
if (zeroNumberCountRow > zeroNumberCountColumn) {
rowLine[i] = i;
if (zeroNumberCountColumn > 1) {
columnLine[i] = i;
}
} else if (zeroNumberCountRow < zeroNumberCountColumn) {
columnLine[i] = i;
if (zeroNumberCountRow > 1) {
rowLine[i] = i;
}
} else {
if ((zeroNumberCountRow + zeroNumberCountColumn) > 2) {
rowLine[i] = i;
columnLine[i] = i;
}
}
}
var zeroCount = 0;
for (var i in columnLine) {
zeroCount = getZeroNumberCountInColumnLine(matrix, columnLine[i], rowLine);
if (zeroCount == 0) {
delete columnLine[i];
}
}
for (var i in rowLine) {
zeroCount = getZeroNumberCountInRowLine(matrix, rowLine[i], columnLine);
if (zeroCount == 0) {
delete rowLine[i];
}
}
console.log("horizontal line (row): " + JSON.stringify(rowLine));
console.log("vertical line (column): " + JSON.stringify(columnLine));
HungarianAlgorithm.step1(5);
//if ((Object.keys(rowLine).length + Object.keys(columnLine).length) == matrix.length) {
// TODO:
// HungarianAlgorithm.step9();
//} else {
// HungarianAlgorithm.step6();
// HungarianAlgorithm.step7();
// HungarianAlgorithm.step8();
//}
function getZeroNumberCountInColumnLine(matrix, columnIndex, rowLine) {
var zeroNumberCount = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[i][columnIndex];
if (currentNumber == 0 && !(rowLine[i] == i)) {
zeroNumberCount++
}
}
return zeroNumberCount;
}
function getZeroNumberCountInRowLine(matrix, rowIndex, columnLine) {
var zeroNumberCount = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[rowIndex][i];
if (currentNumber == 0 && !(columnLine[i] == i)) {
zeroNumberCount++
}
}
return zeroNumberCount;
}
function getZeroNumberCountInColumn(matrix, columnIndex) {
var zeroNumberCount = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[i][columnIndex];
if (currentNumber == 0) {
zeroNumberCount++
}
}
return zeroNumberCount;
}
function getZeroNumberCountInRow(matrix, rowIndex) {
var zeroNumberCount = 0;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[rowIndex][i];
if (currentNumber == 0) {
zeroNumberCount++
}
}
return zeroNumberCount;
}
}
HungarianAlgorithm.step6 = function() {
var smallestNumberInUncoveredMatrix = getSmallestNumberInUncoveredMatrix(matrix, rowLine, columnLine);
console.log("Smallest number in uncovered matrix: " + smallestNumberInUncoveredMatrix);
var columnIndex = 0;
for (var i in columnLine) {
columnIndex = columnLine[i];
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[i][columnIndex];
//matrix[i][columnIndex] = currentNumber + smallestNumberInUncoveredMatrix;
matrix[i][columnIndex] = "x";
}
}
var rowIndex = 0;
for (var i in rowLine) {
rowIndex = rowLine[i];
for (var i = 0; i < matrix.length; i++) {
currentNumber = matrix[rowIndex][i];
//matrix[rowIndex][i] = currentNumber + smallestNumberInUncoveredMatrix;
matrix[rowIndex][i] = "x";
}
}
HungarianAlgorithm.step1(6);
function getSmallestNumberInUncoveredMatrix(matrix, rowLine, columnLine) {
var smallestNumberInUncoveredMatrix = null;;
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
if (rowLine[i]) {
continue;
}
for (var j = 0; j < matrix[i].length; j++) {
if (columnLine[j]) {
continue;
}
currentNumber = matrix[i][j];
if (!smallestNumberInUncoveredMatrix) {
smallestNumberInUncoveredMatrix = currentNumber;
}
smallestNumberInUncoveredMatrix =
(smallestNumberInUncoveredMatrix < currentNumber) ?
smallestNumberInUncoveredMatrix : currentNumber;
}
}
return smallestNumberInUncoveredMatrix;
}
}
HungarianAlgorithm.step7 = function() {
var smallestNumberInMatrix = getSmallestNumberInMatrix(matrix);
console.log("Smallest number in matrix: " + smallestNumberInMatrix);
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
for (var j = 0; j < matrix[i].length; j++) {
currentNumber = matrix[j][i];
matrix[j][i] = currentNumber - smallestNumberInMatrix;
}
}
HungarianAlgorithm.step1(7);
function getSmallestNumberInMatrix(matrix) {
var smallestNumberInMatrix = matrix[0][0];
var currentNumber = 0;
for (var i = 0; i < matrix.length; i++) {
for (var j = 0; j < matrix[i].length; j++) {
currentNumber = matrix[i][j];
smallestNumberInMatrix = (smallestNumberInMatrix < currentNumber) ?
smallestNumberInMatrix : currentNumber;
}
}
return smallestNumberInMatrix;
}
}
HungarianAlgorithm.step8 = function() {
console.log("Step 8: Covering zeroes with Step 5 - 8 until Step 9 is reached");
HungarianAlgorithm.step5();
}
HungarianAlgorithm.step9 = function(){
console.log("Step 9...");
}
HungarianAlgorithm.step1(1);
HungarianAlgorithm.step2();
HungarianAlgorithm.step3();
HungarianAlgorithm.step4();
HungarianAlgorithm.step5();
HungarianAlgorithm.step6();
Do the assignment using the steps mentioned below:
assign a row if it has only one 0, else skip the row temporarily
cross out the 0's in the assigned column
Do the same for every column
After doing the assignment using the above steps, follow the steps below to get the minimum number of lines which cover all the 0's
step 1 - Tick an unassigned row
step 2 - If a ticked row has a 0, then tick the corresponding column
step 3 - If a ticked column has an assignment, then tick the corresponding row
step 4 - Repeat steps 2 and 3, till no more ticking is possible
step 5 - Draw lines through un-ticked rows and ticked columns
For your case: (0-indexing for rows and columns)
skip row 0, as it has two 0's
assign row 1, and cross out all the 0's in column 2
skip row 2, as it has two uncrossed 0's
skip row 3, as it has no uncrossed 0
skip row 4, as it has 2 uncrossed 0's
assign column 0
skip column 1 as it has two uncrossed 0's (in row-2 and row-4)
skip column 2, as it has an already assigned 0
assign column 3,and cross out the 0 in row 2
assign column 4, and cross out the 0 in row 4
assigned 0's are shown by '_' and 'x' shows crossed out 0's
( _ 1 x 1 1 ),
( 1 1 _ 1 1 ),
( 1 x x _ 1 ),
( 1 1 x 1 1 ),
( 1 x x 1 _ )
The matrix looks like the one shown above after doing the assignments
Now follow the 5 steps mentioned above to get the minimum number of lines that cover all the 0's
Tick row 3 as it is not assigned yet
Since row 3 has a 0 in column 2, tick column 2
Since column 2 has an assignment in row 1, tick row 1
Now draw lines through un-ticked rows (i.e. row 0,2,4) and ticked columns(i.e. column 2)
These 4 lines will cover all the 0's
Hope this helps:)
PS : For cases where no initial assignment is possible due to multiple 0's in each row and column, this could be handled by taking one arbitrary assignment (For the cases where multiple 0's are present in each row and column, it is very likely that more than one possible assignment would result in an optimal solution)
#CMPS answer fails on quite a few graphs. I think I have implemented a solution which solves the problem.
I followed the Wikipedia article on the Hungarian algorithm and I made an implementation that seems to work all the time.
From Wikipedia, here is a the method to draw the minimum number of lines:
First, assign as many tasks as possible.
Mark all rows having no assignments.
Mark all (unmarked) columns having zeros in newly marked row(s).
Mark all rows having assignments in newly marked columns.
Repeat for all non-assigned rows.
Here is my Ruby implementation:
def draw_lines grid
#copies the array
marking_grid = grid.map { |a| a.dup }
marked_rows = Array.new
marked_cols = Array.new
while there_is_zero(marking_grid) do
marking_grid = grid.map { |a| a.dup }
marked_cols.each do |col|
cross_out(marking_grid,nil, col)
end
marked = assignment(grid, marking_grid)
marked_rows = marked[0]
marked_cols.concat(marked[1]).uniq!
marking_grid = grid.map { |a| a.dup }
marking_grid.length.times do |row|
if !(marked_rows.include? row) then
cross_out(marking_grid,row, nil)
end
end
marked_cols.each do |col|
cross_out(marking_grid,nil, col)
end
end
lines = Array.new
marked_cols.each do |index|
lines.push(["column", index])
end
grid.each_index do |index|
if !(marked_rows.include? index) then
lines.push(["row", index])
end
end
return lines
end
def there_is_zero grid
grid.each_with_index do |row|
row.each_with_index do |value|
if value == 0 then
return true
end
end
end
return false
end
def assignment grid, marking_grid
marking_grid.each_index do |row_index|
first_zero = marking_grid[row_index].index(0)
#if there is no zero go to next row
if first_zero.nil? then
next
else
cross_out(marking_grid, row_index, first_zero)
marking_grid[row_index][first_zero] = "*"
end
end
return mark(grid, marking_grid)
end
def mark grid, marking_grid, marked_rows = Array.new, marked_cols = Array.new
marking_grid.each_with_index do |row, row_index|
selected_assignment = row.index("*")
if selected_assignment.nil? then
marked_rows.push(row_index)
end
end
marked_rows.each do |index|
grid[index].each_with_index do |cost, col_index|
if cost == 0 then
marked_cols.push(col_index)
end
end
end
marked_cols = marked_cols.uniq
marked_cols.each do |col_index|
marking_grid.each_with_index do |row, row_index|
if row[col_index] == "*" then
marked_rows.push(row_index)
end
end
end
return [marked_rows, marked_cols]
end
def cross_out(marking_grid, row, col)
if col != nil then
marking_grid.each_index do |i|
marking_grid[i][col] = "X"
end
end
if row != nil then
marking_grid[row].map! {|i| "X"}
end
end
grid = [
[0,0,1,0],
[0,0,1,0],
[0,1,1,1],
[0,1,1,1],
]
p draw_lines(grid)

How to find number of times a number repeated in pascal's triangle?

Can anyone give an algorithm to find the number of times a number repeats in pascal's triangle? For example
num - No of times
1 - infinite
2 - 1
3 - 2
4 - 2
. .
6 - 3
. .
10 - 4
. .
for image Link
Or in other way, how many nCr 's are possible for nCr = x , where x is any given integer?
Just count. You know n > 1 can only appear in the first n+1 rows of Pascal's triangle. And that each row is symmetric, and increasing (for the first half). That saves time.
See http://oeis.org/A003016 for more about the sequence
I had to write something similar for a hackathon challenge. This code will find all the numbers 1 to MAX_NUMBER_TO_SEARCH that have a count of more than MINIMUM_COUNT in the Pascal Triangle of size PASC_SIZE. You can obviously alter it to only count for a single number. Not super efficient obviously.
function pasc(n) {
var xx = [];
var d = 0;
var result = [];
result[0] = [1];
result[1] = [1, 1];
for (var row = 2; row < n; row++) {
result[row] = [1];
for (var col = 1; col <= row - 1; col++) {
result[row][col] = result[row - 1][col] + result[row - 1][col - 1];
result[row].push(1);
}
for (var ff = 0; ff < result[row].length; ff++) {
xx[d++] = (result[row][ff]);
}
}
return xx;
}
function countInArray(array, what) {
var count = 0;
for (var i = 0; i < array.length; i++) {
if (array[i] === what) {
count++;
}
}
return count;
}
var MAX_NUMBER_TO_SEARCH = 5000;
var MINIMUM_COUNT = 5;
var PASC_SIZE = 1000;
var dataset = pasc(PASC_SIZE);
for (var i = 0; i < MAX_NUMBER_TO_SEARCH; i++) {
if (countInArray(dataset, i) >= MINIMUM_COUNT) {
console.log(i + " Count:" + countInArray(dataset, i) + "\n");
}
}

Algorithm to calculate the number of 1s for a range of numbers in binary

So I just got back for the ACM Programing competition and did pretty well but there was one problem that not one team got.
The Problem.
Start with an integer N0 which is greater than 0. Let N1 be the number of ones in the binary representation of N0. So, if N0 = 27, N1 = 4. For all i > 0, let Ni be the number of ones in the binary representation of Ni-1. This sequence will always converge to one. For any starting number, N0, let K be the minimum value of i >= 0 for which N1 = 1. For example, if N0 = 31, then N1 = 5, N2 = 2, N3 = 1, so K = 3.
Given a range of consecutive numbers and a value of X how many numbers in the range have a K value equal to X?
Input
There will be several test cases in the input. Each test case will consist of three integers on a single line:
LO HI X
Where LO and HI (1 <= LO <= HI <= 10^18) are the lower and upper limits of a range of integers, and X (0 <= X <= 10) is the target value for K. The input will end with a line of three 0s.
Output
For each test case output a single integer, representing the number of integers in the range from LO to HI (inclusive) which have a K value equal to X in the input. Print each Integer on its own line with no spaces. Do not print any blank lines between answers.
Sample Input
31 31 3
31 31 1
27 31 1
27 31 2
1023 1025 1
1023 1025 2
0 0 0
Sample Output
1
0
0
3
1
1
If you guys want I can include our answer or our problem, because finding for a small range is easy but I will give you a hint first your program needs to run in seconds not minutes. We had a successful solution but not an efficient algorithm to use a range similar to
48238 10^18 9
Anyway good luck and if the community likes these we had some more we could not solve that could be some good brain teasers for you guys. The competition allows you to use Python, C++, or Java—all three are acceptable in an answer.
So as a hint my coach said to think of how binary numbers count rather than checking every bit. I think that gets us a lot closer.
I think a key is first understanding the pattern of K values and how rapidly it grows. Basically, you have:
K(1) = 0
K(X) = K(bitcount(X))+1 for X > 1
So finding the smallest X values for a given K we see
K(1) = 0
K(2) = 1
K(3) = 2
K(7) = 3
K(127) = 4
K(170141183460469231731687303715884105727) = 5
So for an example like 48238 10^18 9 the answer is trivially 0. K=0 only for 1, and K=1 only for powers of 2, so in the range of interest, we'll pretty much only see K values of 2, 3 or 4, and never see K >= 5
edit
Ok, so we're looking for an algorithm to count the number of values with K=2,3,4 in a range of value LO..HI without iterating over the entire range. So the first step is to find the number of values in the range with bitcount(x)==i for i = 1..59 (since we only care about values up to 10^18 and 10^18 < 2^60). So break down the range lo..hi into subranges that are a power of 2 size and differ only in their lower n bits -- a range of the form x*(2^n)..(x+1)*(2^n)-1. We can break down the arbitray lo..hi range into such subranges easily. For each such subrange there will be choose(n, i) values with i+bitcount(x) set bits.
So we just add all the subranges together to get a vector of counts for 1..59, which we then iterate over, adding together those elements with the same K value to get our answer.
edit (fixed again to be be C89 compatible and work for lo=1/k=0)
Here's a C program to do what I previously described:
#include <stdio.h>
#include <string.h>
#include <assert.h>
int bitcount(long long x) {
int rv = 0;
while(x) { rv++; x &= x-1; }
return rv; }
long long choose(long long m, long long n) {
long long rv = 1;
int i;
for (i = 0; i < n; i++) {
rv *= m-i;
rv /= i+1; }
return rv; }
void bitcounts_p2range(long long *counts, long long base, int l2range) {
int i;
assert((base & ((1LL << l2range) - 1)) == 0);
counts += bitcount(base);
for (i = 0; i <= l2range; i++)
counts[i] += choose(l2range, i); }
void bitcounts_range(long long *counts, long long lo, long long hi) {
int l2range = 0;
while (lo + (1LL << l2range) - 1 <= hi) {
if (lo & (1LL << l2range)) {
bitcounts_p2range(counts, lo, l2range);
lo += 1LL << l2range; }
l2range++; }
while (l2range >= 0) {
if (lo + (1LL << l2range) - 1 <= hi) {
bitcounts_p2range(counts, lo, l2range);
lo += 1LL << l2range; }
l2range--; }
assert(lo == hi+1); }
int K(int x) {
int rv = 0;
while(x > 1) {
x = bitcount(x);
rv++; }
return rv; }
int main() {
long long counts[64];
long long lo, hi, total;
int i, k;
while (scanf("%lld%lld%d", &lo, &hi, &k) == 3) {
if (lo < 1 || lo > hi || k < 0) break;
if (lo == 0 || hi == 0 || k == 0) break;
total = 0;
if (lo == 1) {
lo++;
if (k == 0) total++; }
memset(counts, 0, sizeof(counts));
bitcounts_range(counts, lo, hi);
for (i = 1; i < 64; i++)
if (K(i)+1 == k)
total += counts[i];
printf("%lld\n", total); }
return 0; }
which runs just fine for values up to 2^63-1 (LONGLONG_MAX).
For 48238 1000000000000000000 3 it gives 513162479025364957, which certainly seems plausible
edit
giving the inputs of
48238 1000000000000000000 1
48238 1000000000000000000 2
48238 1000000000000000000 3
48238 1000000000000000000 4
gives outputs of
44
87878254941659920
513162479025364957
398959266032926842
Those add up to 999999999999951763 which is correct. The value for k=1 is correct (there are 44 powers of two in that range 2^16 up to 2^59). So while I'm not sure the other 3 values are correct, they're certainly plausible.
The idea behind this answer can help you develop very fast solution. Having ranges 0..2^N the complexity of a potential algorithm would be O(N) in the worst case (Assuming that complexity of a long arithmetic is O(1)) If programmed correctly it should easily handle N = 1000000 in a matter of milliseconds.
Imagine we have the following values:
LO = 0; (0000000000000000000000000000000)
HI = 2147483647; (1111111111111111111111111111111)
The lowest possible N1 in range LO..HI is 0
The highest possible N1 in range LO..HI is 31
So the computation of N2..NN part is done only for one of 32 values (i.e. 0..31).
Which can be done simply, even without a computer.
Now lets compute the amount of N1=X for a range of values LO..HI
When we have X = 0 we have count(N1=X) = 1 this is the following value:
1 0000000000000000000000000000000
When we have X = 1 we have count(N1=X) = 31 these are the following values:
01 1000000000000000000000000000000
02 0100000000000000000000000000000
03 0010000000000000000000000000000
...
30 0000000000000000000000000000010
31 0000000000000000000000000000001
When we have X = 2 we have the following pattern:
1100000000000000000000000000000
How many unique strings can be formed with 29 - '0' and 2 - '1'?
Imagine the rightmost '1'(#1) is cycling from left to right, we get the following picture:
01 1100000000000000000000000000000
02 1010000000000000000000000000000
03 1001000000000000000000000000000
...
30 1000000000000000000000000000001
Now we've got 30 unique strings while moving the '1'(#1) from left to right, it is now impossible to
create a unique string by moving the '1'(#1) in any direction. This means we should move '1'(#2) to the right,
let's also reset the position of '1'(#1) as left as possible remaining uniqueness, we get:
01 0110000000000000000000000000000
now we do the cycling of '1'(#1) once again
02 0101000000000000000000000000000
03 0100100000000000000000000000000
...
29 0100000000000000000000000000001
Now we've got 29 unique strings, continuing this whole operation 28 times we get the following expression
count(N1=2) = 30 + 29 + 28 + ... + 1 = 465
When we have X = 3 the picture remains similar but we are moving '1'(#1), '1'(#2), '1'(#3)
Moving the '1'(#1) creates 29 unique strings, when we start moving '1'(#2) we get
29 + 28 + ... + 1 = 435 unique strings, after that we are left to process '1'(#3) so we have
29 + 28 + ... + 1 = 435
28 + ... + 1 = 406
...
+ 1 = 1
435 + 406 + 378 + 351 + 325 + 300 + 276 +
253 + 231 + 210 + 190 + 171 + 153 + 136 +
120 + 105 + 091 + 078 + 066 + 055 + 045 +
036 + 028 + 021 + 015 + 010 + 006 + 003 + 001 = 4495
Let's try to solve the general case i.e. when we have N zeros and M ones.
Overall amount of permutations for the string of length (N + M) is equal to (N + M)!
The amount of '0' duplicates in this string is equal to N!
The amount of '1' duplicates in this string is equal to M!
thus receiving overall amount of unique strings formed of N zeros and M ones is
(N + M)! 32! 263130836933693530167218012160000000
F(N, M) = ============= => ========== = ====================================== = 4495
(N!) * (M!) 3! * 29! 6 * 304888344611713860501504000000
Edit:
F(N, M) = Binomial(N + M, M)
Now let's consider a real life example:
LO = 43797207; (0000010100111000100101011010111)
HI = 1562866180; (1011101001001110111001000000100)
So how do we apply our unique permutations formula to this example? Since we don't know how
many '1' is located below LO and how many '1' is located above HI.
So lets count these permutations below LO and above HI.
Lets remember how we cycled '1'(#1), '1'(#2), ...
1111100000000000000000000000000 => 2080374784
1111010000000000000000000000000 => 2046820352
1111001000000000000000000000000 => 2030043136
1111000000000000000000000000001 => 2013265921
1110110000000000000000000000000 => 1979711488
1110101000000000000000000000000 => 1962934272
1110100100000000000000000000000 => 1954545664
1110100010000000000000000000001 => 1950351361
As you see this cycling process decreases the decimal values smoothly. So we need to count amount of
cycles until we reach HI value. But we shouldn't be counting these values by one because
the worst case can generate up to 32!/(16!*16!) = 601080390 cycles, which we will be cycling very long :)
So we need cycle chunks of '1' at once.
Having our example we would want to count the amount of cycles of a transformation
1111100000000000000000000000000 => 1011101000000000000000000000000
1011101001001110111001000000100
So how many cycles causes the transformation
1111100000000000000000000000000 => 1011101000000000000000000000000
?
Lets see, the transformation:
1111100000000000000000000000000 => 1110110000000000000000000000000
is equal to following set of cycles:
01 1111100000000000000000000000000
02 1111010000000000000000000000000
...
27 1111000000000000000000000000001
28 1110110000000000000000000000000
So we need 28 cycles to transform
1111100000000000000000000000000 => 1110110000000000000000000000000
How many cycles do we need to transform
1111100000000000000000000000000 => 1101110000000000000000000000000
performing following moves we need:
1110110000000000000000000000000 28 cycles
1110011000000000000000000000000 27 cycles
1110001100000000000000000000000 26 cycles
...
1110000000000000000000000000011 1 cycle
and 1 cycle for receiving:
1101110000000000000000000000000 1 cycle
thus receiving 28 + 27 + ... + 1 + 1 = 406 + 1
but we have seen this value before and it was the result for the amount of unique permutations, which was
computed for 2 '1' and 27 '0'. This means that amount of cycles while moving
11100000000000000000000000000 => 01110000000000000000000000000
is equal to moving
_1100000000000000000000000000 => _0000000000000000000000000011
plus one additional cycle
so this means if we have M zeros and N ones and want to move the chunk of U '1' to the right we will need to
perform the following amount of cycles:
(U - 1 + M)!
1 + =============== = f(U, M)
M! * (U - 1)!
Edit:
f(U, M) = 1 + Binomial(U - 1 + M, M)
Now let's come back to our real life example:
LO = 43797207; (0000010100111000100101011010111)
HI = 1562866180; (1011101001001110111001000000100)
so what we want to do is count the amount cycles needed to perform the following
transformations (suppose N1 = 6)
1111110000000000000000000000000 => 1011101001000000000000000000000
1011101001001110111001000000100
this is equal to:
1011101001000000000000000000000 1011101001000000000000000000000
------------------------------- -------------------------------
_111110000000000000000000000000 => _011111000000000000000000000000 f(5, 25) = 118756
_____11000000000000000000000000 => _____01100000000000000000000000 f(2, 24) = 301
_______100000000000000000000000 => _______010000000000000000000000 f(1, 23) = 24
________10000000000000000000000 => ________01000000000000000000000 f(1, 22) = 23
thus resulting 119104 'lost' cycles which are located above HI
Regarding LO, there is actually no difference in what direction we are cycling
so for computing LO we can do reverse cycling:
0000010100111000100101011010111 0000010100111000100101011010111
------------------------------- -------------------------------
0000000000000000000000000111___ => 0000000000000000000000001110___ f(3, 25) = 2926
00000000000000000000000011_____ => 00000000000000000000000110_____ f(2, 24) = 301
Thus resulting 3227 'lost' cycles which are located below LO this means that
overall amount of lost cycles = 119104 + 3227 = 122331
overall amount of all possible cycles = F(6, 25) = 736281
N1 in range 43797207..1562866180 is equal to 736281 - 122331 = 613950
I wont provide the remaining part of the solution. It is not that hard to grasp the remaining part. Good luck!
I think it's a problem in Discrete mathematics,
assuming LOW is 0,
otherwise we can insert a function for summing numbers below LOW,
from numbers shown i understand the longest number will consist up to 60 binary digit at most
alg(HIGH,k)
l=len(HIGH)
sum=0;
for(i=0;i<l;i++)
{
count=(l choose i);
nwia=numbers_with_i_above(i,HIGH);
if canreach(i,k) sum+=(count-nwia);
}
all the numbers appear
non is listed twice
numbers_with_i_above is trivial
canreach with numbers up to 60 is easy
len is it length of a binary represention
Zobgib,
The key to this problem is not to understand how rapidly the growth of K's pattern grows, but HOW it grows, itself. The first step in this is to understand (as your coach said) how binary numbers count, as this determines everything about how K is determined. Binary numbers follow a pattern that is distinct when counting the number of positive bits. Its a single progressive repetitive pattern. I am going to demonstrate in an unusual way...
Assume i is an integer value. Assume b is the number of positive bits in i
i = 1;
b = 1;
i = 2; 3;
b = 1; 2;
i = 4; 5; 6; 7;
b = 1; 2; 2; 3;
i = 8; 9; 10; 11; 12; 13; 14; 15;
b = 1; 2; 2; 3; 2; 3; 3; 4;
i = 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31;
b = 1; 2; 2; 3; 2; 3; 3; 4; 2; 3; 3; 4; 3; 4; 4; 5;
I assure you, this pattern holds to infinity, but if needed you
should be able to find or construct a proof easily.
If you look at the data above, you'll notice a distinct pattern related to 2^n. Each time you have an integer exponent of 2, the pattern will reset by including the each term of previous pattern, and then each term of the previous pattern incremented by 1. As such, to get K, you just apply the new number to the pattern above. The key is to find a single expression (that is efficient) to receive your number of bits.
For demonstration, yet again, you can further extrapolate a new pattern off of this, because it is static and follows the same progression. Below is the original data modified with its K value (based on the recursion).
Assume i is an integer value. Assume b is the number of positive bits in i
i = 1;
b = 1;
K = 1;
i = 2; 3;
b = 1; 2;
K = 1; 2;
i = 4; 5; 6; 7;
b = 1; 2; 2; 3;
K = 1; 2; 2; 3;
i = 8; 9; 10; 11; 12; 13; 14; 15;
b = 1; 2; 2; 3; 2; 3; 3; 4;
K = 1; 2; 2; 3; 2; 3; 3; 2;
i = 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31;
b = 1; 2; 2; 3; 2; 3; 3; 4; 2; 3; 3; 4; 3; 4; 4; 5;
K = 1; 2; 2; 3; 2; 3; 3; 2; 2; 3; 3; 2; 3; 2; 2; 3;
If you notice, K follows a similar patterning, with a special condition... Everytime b is a power of 2, it actually lowers the K value by 2. Soooo, if you follow a binary progression, you should be able to easily map your K values. Since this pattern is dependant on powers of 2, and the pattern is dependant upon finding the nearest power of 2 and starting there, I propose the following solution. Take your LOW value and find the nearest power of 2 (p) such that 2^p < LOW. This can be done by "counting the bits" for just the lowest number. Again, once you know which exponent it is, you don't have to count the bits for any other number. You just increment through the pattern and you will have your b and hence K (which is following the same pattern).
Note: If you are particularly observant, you can use the previous b or K to determine the next. If the current i is odd, add 1 to the previous b. If the current i is divisible by 4, then you decrement b by either 1 or 2, dependent upon whether it's in the first 1/2 of the pattern or second half. And, of course, if i is a power of 2, start over at 1.
Fuzzical Logic
Pseudo-code Example (non-Optimized)
{ var LOW, HIGH
var power = 0
//Get Nearest Power Of 2
for (var i = 0 to 60) {
// Compare using bitwise AND
if (LOW bitAND (2 ^ i) = (2 ^ i)) {
if ((2 ^ i) <= LOW) {
set power to i
}
else {
// Found the Power: end the for loop
set i to 61
}
}
}
// Automatically 1 at a Power of 2
set numOfBits to 1
array numbersWithPositiveBits with 64 integers = 0
// Must create the pattern from Power of 2
set foundLOW to false
for (var j = (2^power) to HIGH) {
set lenOfPatten to (power + 1)
// Don't record until we have found the LOW value
if ((foundLOW is false) bitAND (j is equal to LOW)) {
set foundLOW to true
}
// If j is odd, increment numOfBits
if ((1 bitAND j) is equal to 1) {
increment numOfBits
}
else if (j modulus 4 == 0) {
decrement numOfBits accordingly //Figure this one out yourself, please
}
else if ((j - (2^power)) == (power + 1)) {
// We are at the next power
increment power
// Start pattern over
set numOfBits to 1
}
// Record if appropriate
if (foundLOW is equal to true) {
increment element numOfBits in array numbersWithPositiveBits
}
}
// From here, derive your K values.
You can solve this efficiently as follows:
ret = 0;
for (i = 1; i <= 64; i++) {
if (computeK(i) != desiredK) continue;
ret += numBelow(HIGH, i) - numBelow(LO - 1, i);
}
return ret;
The function numBelow(high, numSet) computes the number of integers less than or equal to high and greater than zero that have numSet bits set. To implement numBelow(high, numSet) efficiently, you can use something like the following:
numBelow(high, numSet) {
t = floor(lg(high));
ret = 0;
if (numBitsSet(high) == numSet) ret++;
while (numSet > 0 && t > 0) {
ret += nchoosek(t - 1, numSet);
numSet--;
while (--t > 0 && (((1 << t) & high) == 0));
}
return ret;
}
This is a full working example with c++17
#include <bits/stdc++.h>
using namespace std;
#define BASE_MAX 61
typedef unsigned long long ll;
ll combination[BASE_MAX][BASE_MAX];
vector<vector<ll>> NK(4);
int count_bit(ll n) {
int ret = 0;
while (n) {
if (n & 1) {
ret++;
}
n >>= 1;
}
return ret;
}
int get_leftmost_bit_index(ll n) {
int ret = 0;
while (n > 1) {
ret++;
n >>= 1;
}
return ret;
}
void pre_calculate() {
for (int i = 0; i < BASE_MAX; i++)
combination[i][0] = 1;
for (int i = 1; i < BASE_MAX; i++) {
for (int j = 1; j < BASE_MAX; j++) {
combination[i][j] = combination[i - 1][j] + combination[i - 1][j - 1];
}
}
NK[0].push_back(1);
for (int i = 2; i < BASE_MAX; i++) {
int bitCount = count_bit(i);
if (find(NK[0].begin(), NK[0].end(), bitCount) != NK[0].end()) {
NK[1].push_back(i);
}
}
for (int i = 1; i < BASE_MAX; i++) {
int bitCount = count_bit(i);
if (find(NK[1].begin(), NK[1].end(), bitCount) != NK[1].end()) {
NK[2].push_back(i);
}
}
for (int i = 1; i < BASE_MAX; i++) {
int bitCount = count_bit(i);
if (find(NK[2].begin(), NK[2].end(), bitCount) != NK[2].end()) {
NK[3].push_back(i);
}
}
}
ll how_many_numbers_have_n_bit_in_range(ll lo, ll hi, int bit_count) {
if (bit_count == 0) {
if (lo == 0) return 1;
else return 0;
}
if (lo == hi) {
return count_bit(lo) == bit_count;
}
int lo_leftmost = get_leftmost_bit_index(lo); // 100 -> 2
int hi_leftmost = get_leftmost_bit_index(hi); // 1101 -> 3
if (lo_leftmost == hi_leftmost) {
return how_many_numbers_have_n_bit_in_range(lo & ~(1LL << lo_leftmost), hi & ~(1LL << hi_leftmost),
bit_count - 1);
}
if (lo != 0) {
return how_many_numbers_have_n_bit_in_range(0, hi, bit_count) -
how_many_numbers_have_n_bit_in_range(0, lo - 1, bit_count);
}
ll ret = combination[hi_leftmost][bit_count];
ret += how_many_numbers_have_n_bit_in_range(1LL << hi_leftmost, hi, bit_count);
return ret;
}
int main(void) {
pre_calculate();
while (true) {
ll LO, HI;
int X;
scanf("%lld%lld%d", &LO, &HI, &X);
if (LO == 0 && HI == 0 && X == 0)
break;
switch (X) {
case 0:
cout << (LO == 1) << endl;
break;
case 1: {
int ret = 0;
ll power2 = 1;
for (int i = 0; i < BASE_MAX; i++) {
power2 *= 2;
if (power2 > HI)
break;
if (power2 >= LO)
ret++;
}
cout << ret << endl;
break;
}
case 2:
case 3:
case 4: {
vector<ll> &addedBitsSizes = NK[X - 1];
ll ret = 0;
for (auto bit_count_to_added: addedBitsSizes) {
ll result = how_many_numbers_have_n_bit_in_range(LO, HI, bit_count_to_added);
ret += result;
}
cout << ret << endl;
break;
}
default:
cout << 0 << endl;
break;
}
}
return 0;
}

Resources