Hi guys I'm practicing dynamic programming and came across the following problem:
Given a number K, 0 <= K <= 10^100, a sequence of digits N, what is the number of possible ways of dividing N so that each part is at most K?
Input:
K = 8
N = 123
Output: 1
Explanation:
123
1-23
12-3
1-2-3
Are all possibilities of spliting N and only the last one is valid...
What I have achieved so far:
Let Dp[i] = the number of valid ways of dividing N, using i first digits.
Given a state, i must use the previous answer to compute new answers, we have 2 possibilities:
Use dp[i-1] + number of valid ways that split the digit i
Use dp[i-1] + number of valid ways that not split the digit i
But I'm stuck there and I don't know what to do
Thanks
Using dynamic programming implies that you need to think about the problem in terms of subproblems.
Let's denote by N[i...] the suffix of N starting at index i (for instance, with N = 45678955, we have N[3...] = 78955)
Let's denote by dp[i] the number of possible ways of dividing N[i...] so that each part is at most K.
We will also use a small function, max_part_len(N, K, i) which will represent the maximum length of a 'part' starting at i. For instance, with N = 45678955, K = 37, i = 3, we have max_part_len(N, K, i) = 1 because 7 < 37 but 78 > 37.
Now we can write the recurrence (or induction) relation on dp[i].
dp[i] = sum_(j from 1 to max_part_len(N, K, i)) dp[i+j]
This relation means that the the number of possible ways of dividing N[i...] so that each part is at most K, is:
The sum of the the number of possible ways of dividing N[i+j...] so that each part is at most K, for each j such that N[i...j] <= k.
From there the algorithm is quite straight forward if you understood the basics of dynamic programming, I leave this part to you ;-)
I think we can also use divide and conquer. Let f(l, r) represent the number of ways to divide the range of digits indexed from l to r, so that each part is at most k. Then divide the string, 45678955 in two:
4567 8955
and the result would be
f(4567) * f(8955)
plus a division with a part that includes at least one from each side of the split, so each left extension paired with all right extensions. Say k was 1000. Then
f(456) * 1 * f(955) + // 78
f(456) * 1 * f(55) + // 789
f(45) * 1 * f(955) // 678
where each one of the calls to f performs a similar divide and conquer.
Here's JavaScript code comparing a recursive (top-down) implementation of m.raynal's algorithm with this divide and conquer:
function max_part_len(N, K, i){
let d = 0;
let a = 0;
while (a <= K && d <= N.length - i){
d = d + 1;
a = Number(N.substr(i, d));
}
return d - 1;
}
// m.raynal's algorithm
function f(N, K, i, memo={}){
let key = String([N, i])
if (memo.hasOwnProperty(key))
return memo[key];
if (i == N.length)
return 1
if (i == N.length - 1)
return (Number(N[i]) <= K) & 1
let s = 0;
for (let j=1; j<=max_part_len(N, K, i); j++)
s = s + f(N, K, i + j, memo);
return memo[key] = s;
}
// divide and conquer
function g(N, K, memo={}){
if (memo.hasOwnProperty(N))
return memo[N];
if (!N)
return memo[N] = 1;
if (N.length == 1)
return memo[N] = (Number(N) <= K) & 1;
let mid = Math.floor(N.length / 2);
let left = g(N.substr(0, mid), K);
let right = g(N.substr(mid), K);
let s = 0;
let i = mid - 1;
let j = mid;
let str = N.substring(i, j + 1);
while (i >= 0 && Number(str) <= K){
if (j == N.length){
if (i == 0){
break;
} else{
i = i - 1;
j = mid;
str = N.substring(i, j + 1);
continue
}
}
let l = g(N.substring(0, i), K, memo);
let r = g(N.substring(j + 1, N.length, memo), K);
s = s + l * r;
j = j + 1;
str = N.substring(i, j + 1);
if (Number(str) > K){
j = mid;
i = i - 1;
str = N.substring(i, j + 1);
}
}
return memo[N] = left * right + s;
}
let start = new Date;
for (let i=5; i<100000; i++){
let k = Math.ceil(Math.random() * i)
let ii = String(i);
let ff = f(ii, k, 0);
}
console.log(`Running f() 100,000 times took ${ (new Date - start)/1000 } sec`)
start = new Date;
for (let i=5; i<100000; i++){
let k = Math.ceil(Math.random() * i)
let ii = String(i);
let gg = g(ii, k);
}
console.log(`Running g() 100,000 times took ${ (new Date - start)/1000 } sec`)
start = new Date;
for (let i=5; i<100000; i++){
let k = Math.ceil(Math.random() * i)
let ii = String(i);
let ff = f(ii, k, 0);
let gg = g(ii, k);
if (ff != gg){
console.log("Mismatch found.", ii, k, ff, gg);
break;
}
}
console.log(`No discrepancies found between f() and g(). ${ (new Date - start)/1000 } sec`)
for(i = 1; i < a; i++){
for(j = 1; j < b; j = j + 3){
if((i+j) % 2 == 0)
Func()
}
}
In this case, I thought it is O(a*b) and Theta(a*b).
Did I analyze the Complexity correctly?
First of all, you, probably, mean
if ((i + j) % 2 == 0)
instead of
if (i + j % 2 == 0)
since when i is positive, j % 2 non-negative then i + j % 2 is positive and thus i + j % 2 never equals to zero: Func() doesn't run at all.
Your answer is correct one: the complexity is
a * // from the first loop
b / 3 * // from the second loop
1 // from the condition (it always true)
So you have
Θ(a * b / 3 * 1) = Θ(ab)
I have these 2 codes, the question is to find how many times x=x+1 will run in each occasion as T1(n) stands for code 1 and T2(n) stands for code 2. Then I have to find the BIG O of each one, but I know how to do it, the thing is I get stuck in finding how many times ( as to n of course ) will x = x + 1 will run.
CODE 1:
for( i= 1; i <= n; i++)
{
for(j = 1; j <= sqrt(i); j++)
{
for( k = 1; k <= n - j + 1; k++)
{
x = x + 1;
}
}
}
CODE 2:
for(j = 1; j <= n; j++)
{
h = n;
while(h > 0)
{
for (i = 1; i <= sqrt(n); i++)
{
x = x+1;
}
h = h/2;
}
}
I am really stuck, and have read already a lot so I ask if someone can help me, please explain me analytically.
PS: I think in the code 2 , this for (i = 1; i <= sqrt(n); i++) will run n*log(n) times, right? Then what?
For code 1 you have that the number of calls of x=x+1 is:
Here we bounded 1+sqrt(2)+...+sqrt(n) with n sqrt(n) and used the fact that the first term is the leading term.
For code 2 the calculations are simpler:
The second loop actually goes from h=n to 0 by iterating h = h/2 but you can see that this is the same as going from 1 to log n. What we used is the fact the j, t, i are mutually independent (analogously just like we can write that sum from 1 to n of f(n) is just nf(n)).
How can I give an efficient algorithm for computeing the transition function δ for the string-matching automaton in time O(m |Σ|), using π prefix function?
I want to compute the transition function in a finite automaton. Normal transition function has O(m^3|Σ|) complexity, where m = length of pattern P and Σ is the alphabet.
COMPUTE_TRANSITION_FUNCTION(P,Σ)
m = length(P);
for q = 0 through m do
for each character x in Σ
k = min(m+1, q+2); // +1 for x, +2 for subsequent repeat loop to decrement
repeat k = k-1 // work backwards from q+1
until Pk 'is-suffix-of' Pqx;
d(q, x) = k; // assign transition table
end for; end for;
return d;
End algorithm.
π is the prefix function defined in KMP algorithm
There is an O(m.|Σ|) algorithm and because the transaction function has O(m.|Σ|) possible input, there is no better algorithm due to the time complexity.
Assume we have computed π, and we want to calculate d(q, x). d(q, x) means in which state should we go, if we are currently in state q and the current character in the input is x. if the current character is P[q], we should go to state q + 1, because q+1 character is matched. so d(q, p[i]) = q + 1. Otherwise we have to go to a state with lower number. π[q] means the last state before q that P[0 .. π[q]] is a suffix of P[0 .. q]. so we copy the outputs of the state π[q] to the outputs of the state q except for the character p[i] which we have set previously.
I hope you understand it!
I got an answer which takes O(m^2|E|). Also there is a question 32.4-8 which is about the theme.
Here it is:
vector<vector<size_t>> Preprocess(const string &_pattern)
{
vector<string> pattern_vec;
for (size_t i = 0; i <= _pattern.size(); ++i) // m
pattern_vec.push_back(_pattern.substr(0, i));
vector<vector<int>> is_match_matrix(1 + _pattern.size(), vector<int>(1 + _pattern.size(), -1));
for (size_t i = 0; i < is_match_matrix.size(); ++i) // m
{
for (size_t j = 0; j <= i; ++j) // m
{
if (pattern_vec[i - j] == _pattern.substr(j, i - j))
{
is_match_matrix[i][j] = i - j;
}
}
}
// note:
is_match_matrix[_pattern.size()][0] = -1;
vector<vector<size_t>> status_matrix(1 + _pattern.size(), vector<size_t>(26, 0));
for (size_t i = 0; i < status_matrix.size(); ++i) // m
{
char c = 'a';
while (c <= 'z') // E
{
for (size_t j = 0; j <= i; ++j) // m
{
if (-1 != is_match_matrix[i][j] && c == _pattern[is_match_matrix[i][j]])
{
status_matrix[i][c - 'a'] = is_match_matrix[i][j] + 1;
break;
}
}
c++;
}
}
return status_matrix;
}
Does anyone know a good algorithm to word wrap an input string to a specified number of lines rather than a set width. Basically to achieve the minimum width for X lines.
e.g. "I would like to be wrapped into two lines"
goes to
"I would like to be
wrapped into two lines"
"I would like to be wrapped into three lines"
goes to
"I would like to
be wrapped into
three lines"
Inserting new lines as required. I can find other word wrap questions but they all have a known width and want to insert as many lines as needed to fit that width. I am after the opposite.
Answers preferable in a .NET language but any language would be helpful. Obviously if there is a framework way to do this I am not aware of let me know.
Edit I have found this since which I think the accepted answer is the solution to my problem but am having difficulty understanding it. Algorithm to divide text into 3 evenly-sized groups any chance someone could convert it to c# or vb.net.
A way of solvng this problem would be using dynamic programming, You can solve this problem using dynamic programming, cf Minimum raggedness algorithm.
I used some of the informations you add when you eddited your post with :
Algorithm to divide text into 3 evenly-sized groups
Notations:
Let name your text document="word1 word2 .... wordp"
n= number of line required
LineWidth=len(document)/n
Cost function:
First you need to define a cost function of having word[i] to word[j] in the same line , you can take the same as the one as the one on wikipedia, with p=2 for example:
It represent the distance between the objective length of a line and the actual lenght.
The total cost function for the optimal solution can be defined with the following recursiion relation:
Solving the problem:
You can solve this problem using dynamic programming.
I took the code from the link you gave, and changed it a so you see what the program is using.
At stage k you add words to line k.
Then you look at the optimal cost of
having word i to j at line k.
Once you've gone from line 1 to n,
you tacke the smallest cost in the
last step and you have your optimal
result:
Here is the result from the code:
D=minragged('Just testing to see how this works.')
number of words: 7
------------------------------------
stage : 0
------------------------------------
word i to j in line 0 TotalCost (f(j))
------------------------------------
i= 0 j= 0 121.0
i= 0 j= 1 49.0
i= 0 j= 2 1.0
i= 0 j= 3 16.0
i= 0 j= 4 64.0
i= 0 j= 5 144.0
i= 0 j= 6 289.0
i= 0 j= 7 576.0
------------------------------------
stage : 1
------------------------------------
word i to j in line 1 TotalCost (f(j))
------------------------------------
i= 0 j= 0 242.0
i= 0 j= 1 170.0
i= 0 j= 2 122.0
i= 0 j= 3 137.0
i= 0 j= 4 185.0
i= 0 j= 5 265.0
i= 0 j= 6 410.0
i= 0 j= 7 697.0
i= 1 j= 2 65.0
i= 1 j= 3 50.0
i= 1 j= 4 58.0
i= 1 j= 5 98.0
i= 1 j= 6 193.0
i= 1 j= 7 410.0
i= 2 j= 4 26.0
i= 2 j= 5 2.0
i= 2 j= 6 17.0
i= 2 j= 7 122.0
i= 3 j= 7 80.0
------------------------------------
stage : 2
------------------------------------
word i to j in line 2 TotalCost (f(j))
------------------------------------
i= 0 j= 7 818.0
i= 1 j= 7 531.0
i= 2 j= 7 186.0
i= 3 j= 7 114.0
i= 4 j= 7 42.0
i= 5 j= 7 2.0
reversing list
------------------------------------
Just testing 12
to see how 10
this works. 11
*There fore the best choice is to have words 5 to 7 in last line.(cf
stage2)
then words 2 to 5 in second line (cf
stage1)
then words 0 to 2 in first line (cf
stage 0).*
Reverse this and you get:
Just testing 12
to see how 10
this works. 11
Here is the code to print the reasonning,(in python sorry I don't use C#...but I someone actually translated the code in C#) :
def minragged(text, n=3):
P=2
words = text.split()
cumwordwidth = [0]
# cumwordwidth[-1] is the last element
for word in words:
cumwordwidth.append(cumwordwidth[-1] + len(word))
totalwidth = cumwordwidth[-1] + len(words) - 1 # len(words) - 1 spaces
linewidth = float(totalwidth - (n - 1)) / float(n) # n - 1 line breaks
print "number of words:", len(words)
def cost(i, j):
"""
cost of a line words[i], ..., words[j - 1] (words[i:j])
"""
actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i])
return (linewidth - float(actuallinewidth)) ** P
"""
printing the reasoning and reversing the return list
"""
F={} # Total cost function
for stage in range(n):
print "------------------------------------"
print "stage :",stage
print "------------------------------------"
print "word i to j in line",stage,"\t\tTotalCost (f(j))"
print "------------------------------------"
if stage==0:
F[stage]=[]
i=0
for j in range(i,len(words)+1):
print "i=",i,"j=",j,"\t\t\t",cost(i,j)
F[stage].append([cost(i,j),0])
elif stage==(n-1):
F[stage]=[[float('inf'),0] for i in range(len(words)+1)]
for i in range(len(words)+1):
j=len(words)
if F[stage-1][i][0]+cost(i,j)<F[stage][j][0]: #calculating min cost (cf f formula)
F[stage][j][0]=F[stage-1][i][0]+cost(i,j)
F[stage][j][1]=i
print "i=",i,"j=",j,"\t\t\t",F[stage][j][0]
else:
F[stage]=[[float('inf'),0] for i in range(len(words)+1)]
for i in range(len(words)+1):
for j in range(i,len(words)+1):
if F[stage-1][i][0]+cost(i,j)<F[stage][j][0]:
F[stage][j][0]=F[stage-1][i][0]+cost(i,j)
F[stage][j][1]=i
print "i=",i,"j=",j,"\t\t\t",F[stage][j][0]
print 'reversing list'
print "------------------------------------"
listWords=[]
a=len(words)
for k in xrange(n-1,0,-1):#reverse loop from n-1 to 1
listWords.append(' '.join(words[F[k][a][1]:a]))
a=F[k][a][1]
listWords.append(' '.join(words[0:a]))
listWords.reverse()
for line in listWords:
print line, '\t\t',len(line)
return listWords
Here is the accepted solution from Algorithm to divide text into 3 evenly-sized groups converted to C#:
static List<string> Minragged(string text, int n = 3)
{
var words = text.Split();
var cumwordwidth = new List<int>();
cumwordwidth.Add(0);
foreach (var word in words)
cumwordwidth.Add(cumwordwidth[cumwordwidth.Count - 1] + word.Length);
var totalwidth = cumwordwidth[cumwordwidth.Count - 1] + words.Length - 1;
var linewidth = (double)(totalwidth - (n - 1)) / n;
var cost = new Func<int, int, double>((i, j) =>
{
var actuallinewidth = Math.Max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
return (linewidth - actuallinewidth) * (linewidth - actuallinewidth);
});
var best = new List<List<Tuple<double, int>>>();
var tmp = new List<Tuple<double, int>>();
best.Add(tmp);
tmp.Add(new Tuple<double, int>(0.0f, -1));
foreach (var word in words)
tmp.Add(new Tuple<double, int>(double.MaxValue, -1));
for (int l = 1; l < n + 1; ++l)
{
tmp = new List<Tuple<double, int>>();
best.Add(tmp);
for (int j = 0; j < words.Length + 1; ++j)
{
var min = new Tuple<double, int>(best[l - 1][0].Item1 + cost(0, j), 0);
for (int k = 0; k < j + 1; ++k)
{
var loc = best[l - 1][k].Item1 + cost(k, j);
if (loc < min.Item1 || (loc == min.Item1 && k < min.Item2))
min = new Tuple<double, int>(loc, k);
}
tmp.Add(min);
}
}
var lines = new List<string>();
var b = words.Length;
for (int l = n; l > 0; --l)
{
var a = best[l][b].Item2;
lines.Add(string.Join(" ", words, a, b - a));
b = a;
}
lines.Reverse();
return lines;
}
There was a discussion about this exact problem (though it was phrased in a different way) at http://www.perlmonks.org/?node_id=180276.
In the end the best solution was to do a binary search through all possible widths to find the smallest width that wound up with no more than the desired number of columns. If there are n items and the average width is m, then you'll need O(log(n) + log(m)) passes to find the right width, each of which takes O(n) time, for O(n * (log(n) + log(m))). This is probably fast enough with no more need to be clever.
If you wish to be clever, you can create an array of word counts, and cumulative lengths of the words. Then use binary searches on this data structure to figure out where the line breaks are. Creating this data structure is O(n), and it makes all of the passes to figure out the right width be O(log(n) * (log(n) + log(m))) which for reasonable lengths of words is dominated by your first O(n) pass.
If the widths of words can be floating point, you'll need to do something more clever with the binary searches, but you are unlikely to need that particular optimization.
btilly has the right answer here, but just for fun I decided to code up a solution in python:
def wrap_min_width(words, n):
r, l = [], ""
for w in words:
if len(w) + len(l) > n:
r, l = r + [l], ""
l += (" " if len(l) > 0 else "") + w
return r + [l]
def min_lines(phrase, lines):
words = phrase.split(" ")
hi, lo = sum([ len(w) for w in words ]), min([len(w) for w in words])
while lo < hi:
mid = lo + (hi-lo)/2
v = wrap_min_width(words, mid)
if len(v) > lines:
lo = mid + 1
elif len(v) <= lines:
hi = mid
return lo, "\n".join(wrap_min_width(words, lo))
Now this still may not be exactly what you want, since if it is possible to wrap the words in fewer than n lines using the same line width, it instead returns the smallest number of lines encoding. (Of course you can always add extra empty lines, but it is a bit silly.) If I run it on your test case, here is what I get:
Case: "I would like to be wrapped into three lines", 3 lines
Result: 14 chars/line
I would like to
be wrapped into
three lines
I just thought of an approach:
You can write a function accepting two parameters 1. String 2. Number of lines
Get the length of the string (String.length if using C#).
Divide the length by number of lines (lets say the result is n)
Now start a loop and access each character of the string (using string[i])
Insert a '\n\r' after every nth occurrence in the array of characters.
In the loop maintain a temp string array which would be null if there is a blank character(maintaining each word).
If there is a nth occurrence and temp string is not null then insert '\n\r' after that temp string.
I'll assume you're trying to minimize the maximum width of a string with n breaks. This can be done in O(words(str)*n) time and space using dynamic programming or recursion with memoziation.
The recurrence would look like this where the word has been split in to words
def wordwrap(remaining_words, n):
if n > 0 and len(remaining_words)==0:
return INFINITY #we havent chopped enough lines
if n == 0:
return len(remaining_words.join(' ')) # rest of the string
best = INFINITY
for i in range remaining_words:
# split here
best = min( max(wordwrap( remaining_words[i+1:], n-1),remaining_words[:i].join(' ')), best )
return best
I converted the C# accepted answer to JavaScript for something I was working on. Posting it here might save someone a few minutes of doing it themselves.
function WrapTextWithLimit(text, n) {
var words = text.toString().split(' ');
var cumwordwidth = [0];
words.forEach(function(word) {
cumwordwidth.push(cumwordwidth[cumwordwidth.length - 1] + word.length);
});
var totalwidth = cumwordwidth[cumwordwidth.length - 1] + words.length - 1;
var linewidth = (totalwidth - (n - 1.0)) / n;
var cost = function(i, j) {
var actuallinewidth = Math.max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
return (linewidth - actuallinewidth) * (linewidth - actuallinewidth);
};
var best = [];
var tmp = [];
best.push(tmp);
tmp.push([0.0, -1]);
words.forEach(function(word) {
tmp.push([Number.MAX_VALUE, -1]);
});
for (var l = 1; l < n + 1; ++l)
{
tmp = [];
best.push(tmp);
for (var j = 0; j < words.length + 1; ++j)
{
var min = [best[l - 1][0][0] + cost(0, j), 0];
for (var k = 0; k < j + 1; ++k)
{
var loc = best[l - 1][k][0] + cost(k, j);
if (loc < min[0] || (loc === min[0] && k < min[1])) {
min = [loc, k];
}
}
tmp.push(min);
}
}
var lines = [];
var b = words.length;
for (var p = n; p > 0; --p) {
var a = best[p][b][1];
lines.push(words.slice(a, b).join(' '));
b = a;
}
lines.reverse();
return lines;
}
This solution improves on Mikola's.
It's better because
It doesn't use strings. You don't need to use strings and concatenate them. You just need an array of their lengths. So, because of this it's faster, also you can use this method with any kind of "element" - you just need the widths.
There was some unnecessary processing in the wrap_min_width function. It just kept going even when it went beyond the point of failure. Also, it just builds the string unnecessarily.
Added the "separator width" as an adjustable parameter.
It calculates the min width - which is really what you want.
Fixed some bugs.
This is written in Javascript:
// For testing calcMinWidth
var formatString = function (str, nLines) {
var words = str.split(" ");
var elWidths = words.map(function (s, i) {
return s.length;
});
var width = calcMinWidth(elWidths, 1, nLines, 0.1);
var format = function (width)
{
var lines = [];
var curLine = null;
var curLineLength = 0;
for (var i = 0; i < words.length; ++i) {
var word = words[i];
var elWidth = elWidths[i];
if (curLineLength + elWidth > width)
{
lines.push(curLine.join(" "));
curLine = [word];
curLineLength = elWidth;
continue;
}
if (i === 0)
curLine = [word];
else
{
curLineLength += 1;
curLine.push(word);
}
curLineLength += elWidth;
}
if (curLine !== null)
lines.push(curLine.join(" "));
return lines.join("\n");
};
return format(width);
};
var calcMinWidth = function (elWidths, separatorWidth, lines, tolerance)
{
var testFit = function (width)
{
var nCurLine = 1;
var curLineLength = 0;
for (var i = 0; i < elWidths.length; ++i) {
var elWidth = elWidths[i];
if (curLineLength + elWidth > width)
{
if (elWidth > width)
return false;
if (++nCurLine > lines)
return false;
curLineLength = elWidth;
continue;
}
if (i > 0)
curLineLength += separatorWidth;
curLineLength += elWidth;
}
return true;
};
var hi = 0;
var lo = null;
for (var i = 0; i < elWidths.length; ++i) {
var elWidth = elWidths[i];
if (i > 0)
hi += separatorWidth;
hi += elWidth;
if (lo === null || elWidth > lo)
lo = elWidth;
}
if (lo === null)
lo = 0;
while (hi - lo > tolerance)
{
var guess = (hi + lo) / 2;
if (testFit(guess))
hi = guess;
else
lo = guess;
}
return hi;
};