How to solve sequential split dynamically - algorithm

There is a number of weight in array arr.
arr= [1,5,3,2,4], each of the value in arr contains weight.
n = 2, must have 2 blocks while split the weight and order cannot break for split
Combination 1:
block 0: [1] max: 1
block 1: [5,3,2,4] max: 5
----------------------------
sum of max from block 0 and 1 is 6
Combination 2:
block 0: [1,5] max: 5
block 1: [3,2,4] max: 4
----------------------------
sum of max from block 0 and 1 is 9
Combination 3:
block 0: [1,5,3] max: 5
block 1: [2,4] max: 4
----------------------------
sum of max from block 0 and 1 is 9
Combination 4:
block 0: [1,5,3, 2] max: 5
block 1: [4] max: 4
----------------------------
sum of max from block 0 and 1 is 9
So here answer is 6 from Combination 1

The hardest part of some problems is just stating them clearly. If you can do that, the code practically writes itself.
I think the problem statement is this: Find the minimum value of a function (f) applied at every index of an array (f(array, index)), where f is the sum of the max values of two subarrays formed by splitting the input array at the given index.
function f(array, index) {
let left = array.slice(0, index)
let right = array.slice(index)
return Math.max(...left) + Math.max(...right)
}
let array = [1, 5, 3, 2, 4]
let smallestMax = Infinity
for (let i=1; i<array.length; i++) {
let max = f(array, i)
smallestMax = max < smallestMax ? max : smallestMax
}
console.log(smallestMax)

#Danh has a straightforward O(N^2) solution, but linear time is also possible without too much more work, and if you have enough data, it'll make a huge difference. Dan did his in JS it looks like, so I'll try to do the same. A bit rusty on these style for loops, so they may be off by one, but quick test in the JS console gave me 6 as expected (after fixing a copy/paste error).
Idea is to pass left to right finding the max at each index (left side). Then right to left finding the max at each index (right side). Then we look at left side plus the right side to get the value. Basically dynamic programming of a sort.
let array = [1, 5, 3, 2, 4]
let maxLeft = {}
let maxRight = {}
let max,newMax;
for (let i=0; i<array.length; i++) {
if (i === 0) {
maxLeft[i] = array[i]
} else {
maxLeft[i] = array[i] < maxLeft[i-1] ? maxLeft[i-1] : array[i]
}
}
for (let i=array.length - 1; i >= 0; i--) {
if (i === array.length - 1) {
maxRight[i] = array[i]
} else {
maxRight[i] = array[i] < maxRight[i+1] ? maxRight[i+1] : array[i]
}
}
for (let i=0; i<array.length; i++) {
newMax = maxLeft[i] + maxRight[i + 1]
if (i === 0) {
max = newMax
} else {
maxLeft[i] = newMax < maxLeft[i-1] ? max : newMax
}
}
console.log(max)

Related

Algorithm for the largest subarray of distinct values in linear time

I'm trying to come up with a fast algorithm for, given any array of length n, obtaining the largest subarray of distinct values.
For example, the largest subarray of distinct values of
[1, 4, 3, 2, 4, 2, 8, 1, 9]
would be
[4, 2, 8, 1, 9]
This is my current solution, I think it runs in O(n^2). This is because check_dups runs in linear time, and it is called every time j or i increments.
arr = [0,...,n]
i = 0
j = 1
i_best = i
j_best = j
while i < n-1 and j < n:
if check_dups(arr, i j): //determines if there's duplicates in the subarray i,j in linear time
i += 1
else:
if j - i > j_best - i_best:
i_best = i
j_best = j
j += 1
return subarray(arr, i_best, j_best)
Does anyone have a better solution, in linear time?
Please note this is pseudocode and I'm not looking for an answer that relies on specific existing functions of a defined language (such as arr.contains()).
Thanks!
Consider the problem of finding the largest distinct-valued subarray ending at a particular index j. Conceptually this is straightforward: starting at arr[j], you go backwards and include all elements until you find a duplicate.
Let's use this intuition to solve this problem for all j from 0 up to length(arr). We need to know, at any point in the iteration, how far back we can go before we find a duplicate. That is, we need to know the least i such that subarray(arr, i, j) contains distinct values. (I'm assuming subarray treats the indices as inclusive.)
If we knew i at some point in the iteration (say, when j = k), can we quickly update i when j = k+1? Indeed, if we knew when was the last occurrence of arr[k+1], then we can update i := max(i, lastOccurrence(arr[k+1]) + 1). We can compute lastOccurrence in O(1) time with a HashMap.
Pseudocode:
arr = ... (from input)
map = empty HashMap
i = 0
i_best = 0
j_best = 0
for j from 0 to length(arr) - 1 inclusive:
if map contains-key arr[j]:
i = max(i, map[arr[j]] + 1)
map[arr[j]] = j
if j - i > j_best - i_best:
i_best = i
j_best = j
return subarray(arr, i_best, j_best)
We can adapt pkpnd's algorithm to use an array rather than hash map for an O(n log n) solution or potentially O(n) if your data allows for an O(n) stable sort, but you'd need to implement a stable sorting function that also provides the original indexes of the elements.
1 4 3 2 4 2 8 1 9
0 1 2 3 4 5 6 7 8 (indexes)
Sorted:
1 1 2 2 3 4 4 8 9
0 7 3 5 2 1 4 6 8 (indexes)
--- --- ---
Now, instead of a hash map, build a new array by iterating over the sorted array and inserting the last occurrence of each element according to the duplicate index arrangements. The final array would look like:
1 4 3 2 4 2 8 1 9
-1 -1 -1 -1 1 3 -1 0 -1 (previous occurrence)
We're now ready to run pkpnd's algorithm with a slight modification:
arr = ... (from input)
map = previous occurrence array
i = 0
i_best = 0
j_best = 0
for j from 0 to length(arr) - 1 inclusive:
if map[j] >= 0:
i = max(i, map[j] + 1)
if j - i > j_best - i_best:
i_best = i
j_best = j
return subarray(arr, i_best, j_best)
JavaScript code:
function f(arr, map){
let i = 0
let i_best = 0
let j_best = 0
for (j=0; j<arr.length; j++){
if (map[j] >= 0)
i = Math.max(i, map[j] + 1)
if (j - i > j_best - i_best){
i_best = i
j_best = j
}
}
return [i_best, j_best]
}
let arr = [ 1, 4, 3, 2, 4, 2, 8, 1, 9]
let map = [-1,-1,-1,-1, 1, 3,-1, 0,-1]
console.log(f(arr, map))
arr = [ 1, 2, 2, 2, 2, 2, 1]
map = [-1,-1, 1, 2, 3, 4, 0]
console.log(f(arr, map))
We can use Hashtable(Dictionary in c#)
public int[] FindSubarrayWithDistinctEntities(int[] arr)
{
Dictionary<int, int> dic = new Dictionary<int, int>();
Result r = new Result(); //struct containing start and end index for subarray
int result = 0;
r.st = 1;
r.end = 1;
for (int i = 0; i < arr.Length; i++)
{
if (dic.ContainsKey(arr[i]))
{
int diff = i - (dic[arr[i]] + 1);
if(result<diff)
{
result = diff;
r.st = Math.Min(r.st, (dic[arr[i]] + 1));
r.end = i-1;
}
dic.Remove(arr[i]);
}
dic.Add(arr[i], i);
}
return arr.Skip(r.st).Take(r.end).ToArray();
}
Add every number to Hashset if it isn't already in it. Hashset's insert and search are both O(1). So final result will be O(n).

Median of two sorted arrays of different length

I am trying to understand the algorithm that solves this problem in O(log(n+m)) where n and m are the lengths of the arrays. I have taken the liberty to post the link to the explanation of this algorithm:
https://www.geeksforgeeks.org/median-of-two-sorted-arrays-of-different-sizes/
It's so hard for me to digest completely the idea behind this algorithm. I can see that the idea is to reduce the length of one of the arrays to either 1 or 2 and then apply the base cases. The base cases make sense, but I wonder if one can omit the base case for n = 2 and just work on n = 1. I also don't understand the remaining cases part. It looks so weird to me that we have to cut the array B[] from the start to idx. It's weird because idx can be equal to the length of B[], so we would be ignoring the whole array.
TL;DR:
The main idea is that you may delete N elements that are surely smaller than (or equal to) the median from your number set, as long as you delete the same amount that are surely greater or equal.
Let's explain it with an example:
A=[1 2 3 9 10], B=[3 4 5 6 7 8 9]
The middle elements marked:
A=[1 2 3 9 10], B=[3 4 5 6 7 8 9]
The overall median will be between 3 and 6, inclusive. So, if we delete two elements smaller than 3, and two elements greater than 6, we'll still have the same median. The smaller elements we delete from A, and the greater ones from B:
A=[3 9 10], B=[3 4 5 6 7]
Now we delete one element greater than 9 (from A) and one smaller than 5 (from B):
A=[3 9], B=[4 5 6 7]
We reached Case 4 (smaller array has 2 elements): the algorithm calls for the median of
B[M/2], B[M/2 – 1], max(A[0], B[M/2 – 2]), min(A[1], B[M/2 + 1])
being B[2], B[1], max(A[0], B[0]), min(A[1], B[3])
being 6, 5, max(3,4), min(9,7)
being [6 5 4 7]
The median of that array is 5.5. And that's the correct result.
def findmedian(A,B):
if len(A) > len(B):
return findmedian(B,A)# always ensuring that we do the binsearch on the shorter arr
x = len(A)
y = len(B)
start = 0# start and end of shorter arr
end = x
while (start <= end):
partition_x = (start + end)//2# the mid of smaller arr, partition_x is an index
partition_y = (x+y+1)//2 - partition_x# the suitable partition of larger arr to divide the arrs into equal halves
if partition_x == 0:# if there is nothing on the left
left_x = None
if partition_x == x:# if there is nothing on the right
right_x = sys.maxint# +inf
if partition_y == 0:
left_y = None# this is -inf similar to the case for smaller arr
if partition_y == y:
right_y = sys.maxint
if (left_x <= right_y) and (left_y <= right_x):# all the elems on left are smaller than all the elems on right is ensured by
#checking on the extremes only since arrs sorted. Also, the partition always makes equal halves, so found the right spot.
if (x+y) % 2 == 0:
return (max(left_x,left_y) + min(right_x,right_y))/2.0
else:
return max(left_x,left_y)# if the num of elems is odd
elif left_x > right_y:# if we have come more towards right of smaller arr, then move left on smaller arr
end = partition_x -1
else:# if we have come more to the left
start = partition_x + 1
class Solution(object):
def findMedianSortedArrays(self, nums1, nums2):
merged_array = (nums1 + nums2)
merged_array.sort()
l_m_a = len(merged_array)
count = int(l_m_a / 2)
if l_m_a % 2 == 1:
median = merged_array[count]
return median
else:
median_in_even = (merged_array[count] + merged_array[count - 1]) / 2
return median_in_even
class Solution:
def findMedianSortedArrays(self, nums1, nums2):
nums1.extend(nums2)
newArray = sorted(nums1)
if len(newArray)%2==0:
index = len(newArray)//2
median = (newArray[index] + newArray[index-1])/2
return float(median)
else:
index = len(newArray)//2
median = newArray[index]
return float(median)
if __name__ == '__main__':
obj = Solution()
print(obj.findMedianSortedArrays([1,3],[2]))
Median of two sorted Arrays | Same length | Different length
1st we need to merge both arrays in sorted order. And then we can find
the median. The Median will be the central element of the sorted array.
var findMedianSortedArrays = function(nums1, nums2) {
let array = [], leftIndex = 0, rightIndex = 0;
while (leftIndex < nums1.length && rightIndex < nums2.length) {
if (nums1[leftIndex] < nums2[rightIndex]) {
array.push(nums1[leftIndex]);
leftIndex++;
} else {
array.push(nums2[rightIndex]);
rightIndex++;
}
}
// add uninitiated remaining element from either array if any remains.
array = array.concat(nums1.slice(leftIndex)).concat(nums2.slice(rightIndex));
if (array.length % 2 == 0) {
return (array[(array.length / 2) - 1] + array[array.length / 2]) / 2;
} else {
return array[Math.floor(array.length / 2)]
}
};
findMedianSortedArrays([1 2 3 9 10], [3 4 5 6 7 8 9]);
/**
* #param {number[]} nums1
* #param {number[]} nums2
* #return {number}
*/
var findMedianSortedArrays = function (nums1, nums2) {
let newArray = [];
let median;
if (nums1.length > 0 && nums2.length > 0) {
newArray = [...nums1, ...nums2]
newArray.sort(function (a, b) {
return a - b;
})
} else if (nums1.length === 0) {
newArray = nums2
newArray.sort(function (a, b) {
return a - b;
})
} else if (nums2.length === 0) {
newArray = nums1
newArray.sort(function (a, b) {
return a - b;
})
}
if (newArray.length === 1) {
return newArray[0]
}
if (newArray.length === 3) {
return newArray[1]
}
if (newArray.length % 2 === 0) {
const findIndex = Math.floor(newArray.length / 2)
console.log("findIndexeven", findIndex)
const addValue = Math.max((newArray[findIndex - 1] +
newArray[findIndex]), 0)
median = addValue / 2
} else {
const findIndex = Math.floor(newArray.length / 2) + 1
console.log("findIndexodd", findIndex)
median = newArray[findIndex - 1]
}
console.log("medianValue",median)
return median
};
findMedianSortedArrays([1, 2], [3, 4])
For me, it's just several minutes of several lines of python codes and it passed the leetcode check with a runtime beating 62% of Python3 online submissions. My code is here:
class Solution:
def findMedianSortedArrays(self, nums1: List[int], nums2: List[int]) -> float:
n = len(nums1) + len(nums2)
nums1.extend(nums2) # merge two lists
nums1.sort() # sort it
if n % 2 == 0:
return (nums1[n//2-1] + nums1[n//2])/2 # return the median for even n
else:
return nums1[n//2] # return the median for odd n

Maximum number achievable by converting two adjacent x to one (x+1)

Given a sequence of N integers where 1 <= N <= 500 and the numbers are between 1 and 50. In a step any two adjacent equal numbers x x can be replaced with a single x + 1. What is the maximum number achievable by such steps.
For example if given 2 3 1 1 2 2 then the maximum possible is 4:
2 3 1 1 2 2 ---> 2 3 2 2 2 ---> 2 3 3 2 ---> 2 4 2.
It is evident that I should try to do better than the maximum number available in the sequence. But I can't figure out a good algorithm.
Each substring of the input can make at most one single number (invariant: the log base two of the sum of two to the power of each entry). For every x, we can find the set of substrings that can make x. For each x, this is (1) every occurrence of x (2) the union of two contiguous substrings that can make x - 1. The resulting algorithm is O(N^2)-time.
An algorithm could work like this:
Convert the input to an array where every element has a frequency attribute, collapsing repeated consecutive values in the input into one single node. For example, this input:
1 2 2 4 3 3 3 3
Would be represented like this:
{val: 1, freq: 1} {val: 2, freq: 2} {val: 4, freq: 1} {val: 3, freq: 4}
Then find local minima nodes, like the node (3 3 3 3) in 1 (2 2) 4 (3 3 3 3) 4, i.e. nodes whose neighbours both have higher values. For those local minima that have an even frequency, "lift" those by applying the step. Repeat this until no such local minima (with even frequency) exist any more.
Start of the recursive part of the algorithm:
At both ends of the array, work inwards to "lift" values as long as the more inner neighbour has a higher value. With this rule, the following:
1 2 2 3 5 4 3 3 3 1 1
will completely resolve. First from the left side inward:
1 4 5 4 3 3 3 1 1
Then from the right side:
1 4 6 3 2
Note that when there is an odd frequency (like for the 3s above), there will be a "remainder" that cannot be incremented. The remainder should in this rule always be left on the outward side, so to maximise the potential towards the inner part of the array.
At this point the remaining local minima have odd frequencies. Applying the step to such a node will always leave a "remainder" (like above) with the original value. This remaining node can appear anywhere, but it only makes sense to look at solutions where this remainder is on the left side or the right side of the lift (not in the middle). So for example:
4 1 1 1 1 1 2 3 4
Can resolve to one of these:
4 2 2 1 2 3 4
Or:
4 1 2 2 2 3 4
The 1 in either second or fourth position, is the above mentioned "remainder". Obviously, the second way of resolving is more promising in this example. In general, the choice is obvious when on one side there is a value that is too high to merge with, like the left-most 4 is too high for five 1 values to get to. The 4 is like a wall.
When the frequency of the local minimum is one, there is nothing we can do with it. It actually separates the array in a left and right side that do not influence each other. The same is true for the remainder element discussed above: it separates the array into two parts that do not influence each other.
So the next step in the algorithm is to find such minima (where the choice is obvious), apply that kind of step and separate the problem into two distinct problems which should be solved recursively (from the top). So in the last example, the following two problems would be solved separately:
4
2 2 3 4
Then the best of both solutions will count as the overall solution. In this case that is 5.
The most challenging part of the algorithm is to deal with those local minima for which the choice of where to put the remainder is not obvious. For instance;
3 3 1 1 1 1 1 2 3
This can go to either:
3 3 2 2 1 2 3
3 3 1 2 2 2 3
In this example the end result is the same for both options, but in bigger arrays it would be less and less obvious. So here both options have to be investigated. In general you can have many of them, like 2 in this example:
3 1 1 1 2 3 1 1 1 1 1 3
Each of these two minima has two options. This seems like to explode into too many possibilities for larger arrays. But it is not that bad. The algorithm can take opposite choices in neighbouring minima, and go alternating like this through the whole array. This way alternating sections are favoured, and get the most possible value drawn into them, while the other sections are deprived of value. Now the algorithm turns the tables, and toggles all choices so that the sections that were previously favoured are now deprived, and vice versa. The solution of both these alternatives is derived by resolving each section recursively, and then comparing the two "grand" solutions to pick the best one.
Snippet
Here is a live JavaScript implementation of the above algorithm.
Comments are provided which hopefully should make it readable.
"use strict";
function Node(val, freq) {
// Immutable plain object
return Object.freeze({
val: val,
freq: freq || 1, // Default frequency is 1.
// Max attainable value when merged:
reduced: val + (freq || 1).toString(2).length - 1
});
}
function compress(a) {
// Put repeated elements in a single node
var result = [], i, j;
for (i = 0; i < a.length; i = j) {
for (j = i + 1; j < a.length && a[j] == a[i]; j++);
result.push(Node(a[i], j - i));
}
return result;
}
function decompress(a) {
// Expand nodes into separate, repeated elements
var result = [], i, j;
for (i = 0; i < a.length; i++) {
for (j = 0; j < a[i].freq; j++) {
result.push(a[i].val);
}
}
return result;
}
function str(a) {
return decompress(a).join(' ');
}
function unstr(s) {
s = s.replace(/\D+/g, ' ').trim();
return s.length ? compress(s.split(/\s+/).map(Number)) : [];
}
/*
The function merge modifies an array in-place, performing a "step" on
the indicated element.
The array will get an element with an incremented value
and decreased frequency, unless a join occurs with neighboring
elements with the same value: then the frequencies are accumulated
into one element. When the original frequency was odd there will
be a "remainder" element in the modified array as well.
*/
function merge(a, i, leftWards, stats) {
var val = a[i].val+1,
odd = a[i].freq % 2,
newFreq = a[i].freq >> 1,
last = i;
// Merge with neighbouring nodes of same value:
if ((!odd || !leftWards) && a[i+1] && a[i+1].val === val) {
newFreq += a[++last].freq;
}
if ((!odd || leftWards) && i && a[i-1].val === val) {
newFreq += a[--i].freq;
}
// Replace nodes
a.splice(i, last-i+1, Node(val, newFreq));
if (odd) a.splice(i+leftWards, 0, Node(val-1));
// Update statistics and trace: this is not essential to the algorithm
if (stats) {
stats.total_applied_merges++;
if (stats.trace) stats.trace.push(str(a));
}
return i;
}
/* Function Solve
Parameters:
a: The compressed array to be reduced via merges. It is changed in-place
and should not be relied on after the call.
stats: Optional plain object that will be populated with execution statistics.
Return value:
The array after the best merges were applied to achieve the highest
value, which is stored in the maxValue custom property of the array.
*/
function solve(a, stats) {
var maxValue, i, j, traceOrig, skipLeft, skipRight, sections, goLeft,
b, choice, alternate;
if (!a.length) return a;
if (stats && stats.trace) {
traceOrig = stats.trace;
traceOrig.push(stats.trace = [str(a)]);
}
// Look for valleys of even size, and "lift" them
for (i = 1; i < a.length - 1; i++) {
if (a[i-1].val > a[i].val && a[i].val < a[i+1].val && (a[i].freq % 2) < 1) {
// Found an even valley
i = merge(a, i, false, stats);
if (i) i--;
}
}
// Check left-side elements with always increasing values
for (i = 0; i < a.length-1 && a[i].val < a[i+1].val; i++) {
if (a[i].freq > 1) i = merge(a, i, false, stats) - 1;
};
// Check right-side elements with always increasing values, right-to-left
for (j = a.length-1; j > 0 && a[j-1].val > a[j].val; j--) {
if (a[j].freq > 1) j = merge(a, j, true, stats) + 1;
};
// All resolved?
if (i == j) {
while (a[i].freq > 1) merge(a, i, true, stats);
a.maxValue = a[i].val;
} else {
skipLeft = i;
skipRight = a.length - 1 - j;
// Look for other valleys (odd sized): they will lead to a split into sections
sections = [];
for (i = a.length - 2 - skipRight; i > skipLeft; i--) {
if (a[i-1].val > a[i].val && a[i].val < a[i+1].val) {
// Odd number of elements: if more than one, there
// are two ways to merge them, but maybe
// one of both possibilities can be excluded.
goLeft = a[i+1].val > a[i].reduced;
if (a[i-1].val > a[i].reduced || goLeft) {
if (a[i].freq > 1) i = merge(a, i, goLeft, stats) + goLeft;
// i is the index of the element which has become a 1-sized valley
// Split off the right part of the array, and store the solution
sections.push(solve(a.splice(i--), stats));
}
}
}
if (sections.length) {
// Solve last remaining section
sections.push(solve(a, stats));
sections.reverse();
// Combine the solutions of all sections into one
maxValue = sections[0].maxValue;
for (i = sections.length - 1; i >= 0; i--) {
maxValue = Math.max(sections[i].maxValue, maxValue);
}
} else {
// There is no more valley that can be resolved without branching into two
// directions. Look for the remaining valleys.
sections = [];
b = a.slice(0); // take copy
for (choice = 0; choice < 2; choice++) {
if (choice) a = b; // restore from copy on second iteration
alternate = choice;
for (i = a.length - 2 - skipRight; i > skipLeft; i--) {
if (a[i-1].val > a[i].val && a[i].val < a[i+1].val) {
// Odd number of elements
alternate = !alternate
i = merge(a, i, alternate, stats) + alternate;
sections.push(solve(a.splice(i--), stats));
}
}
// Solve last remaining section
sections.push(solve(a, stats));
}
sections.reverse(); // put in logical order
// Find best section:
maxValue = sections[0].maxValue;
for (i = sections.length - 1; i >= 0; i--) {
maxValue = Math.max(sections[i].maxValue, maxValue);
}
for (i = sections.length - 1; i >= 0 && sections[i].maxValue < maxValue; i--);
// Which choice led to the highest value (choice = 0 or 1)?
choice = (i >= sections.length / 2)
// Discard the not-chosen version
sections = sections.slice(choice * sections.length/2);
}
// Reconstruct the solution from the sections.
a = [].concat.apply([], sections);
a.maxValue = maxValue;
}
if (traceOrig) stats.trace = traceOrig;
return a;
}
function randomValues(len) {
var a = [];
for (var i = 0; i < len; i++) {
// 50% chance for a 1, 25% for a 2, ... etc.
a.push(Math.min(/\.1*/.exec(Math.random().toString(2))[0].length,5));
}
return a;
}
// I/O
var inputEl = document.querySelector('#inp');
var randEl = document.querySelector('#rand');
var lenEl = document.querySelector('#len');
var goEl = document.querySelector('#go');
var outEl = document.querySelector('#out');
goEl.onclick = function() {
// Get the input and structure it
var a = unstr(inputEl.value),
stats = {
total_applied_merges: 0,
trace: a.length < 100 ? [] : undefined
};
// Apply algorithm
a = solve(a, stats);
// Output results
var output = {
value: a.maxValue,
compact: str(a),
total_applied_merges: stats.total_applied_merges,
trace: stats.trace || 'no trace produced (input too large)'
};
outEl.textContent = JSON.stringify(output, null, 4);
}
randEl.onclick = function() {
// Get input (count of numbers to generate):
len = lenEl.value;
// Generate
var a = randomValues(len);
// Output
inputEl.value = a.join(' ');
// Simulate click to find the solution immediately.
goEl.click();
}
// Tests
var tests = [
' ', '',
'1', '1',
'1 1', '2',
'2 2 1 2 2', '3 1 3',
'3 2 1 1 2 2 3', '5',
'3 2 1 1 2 2 3 1 1 1 1 3 2 2 1 1 2', '6',
'3 1 1 1 3', '3 2 1 3',
'2 1 1 1 2 1 1 1 2 1 1 1 1 1 2', '3 1 2 1 4 1 2',
'3 1 1 2 1 1 1 2 3', '4 2 1 2 3',
'1 4 2 1 1 1 1 1 1 1', '1 5 1',
];
var res;
for (var i = 0; i < tests.length; i+=2) {
var res = str(solve(unstr(tests[i])));
if (res !== tests[i+1]) throw 'Test failed: ' + tests[i] + ' returned ' + res + ' instead of ' + tests[i+1];
}
Enter series (space separated):<br>
<input id="inp" size="60" value="2 3 1 1 2 2"><button id="go">Solve</button>
<br>
<input id="len" size="4" value="30"><button id="rand">Produce random series of this size and solve</button>
<pre id="out"></pre>
As you can see the program produces a reduced array with the maximum value included. In general there can be many derived arrays that have this maximum; only one is given.
An O(n*m) time and space algorithm is possible, where, according to your stated limits, n <= 500 and m <= 58 (consider that even for a billion elements, m need only be about 60, representing the largest element ± log2(n)). m is representing the possible numbers 50 + floor(log2(500)):
Consider the condensed sequence, s = {[x, number of x's]}.
If M[i][j] = [num_j,start_idx] where num_j represents the maximum number of contiguous js ending at index i of the condensed sequence; start_idx, the index where the sequence starts or -1 if it cannot join earlier sequences; then we have the following relationship:
M[i][j] = [s[i][1] + M[i-1][j][0], M[i-1][j][1]]
when j equals s[i][0]
j's greater than s[i][0] but smaller than or equal to s[i][0] + floor(log2(s[i][1])), represent converting pairs and merging with an earlier sequence if applicable, with a special case after the new count is odd:
When M[i][j][0] is odd, we do two things: first calculate the best so far by looking back in the matrix to a sequence that could merge with M[i][j] or its paired descendants, and then set a lower bound in the next applicable cells in the row (meaning a merge with an earlier sequence cannot happen via this cell). The reason this works is that:
if s[i + 1][0] > s[i][0], then s[i + 1] could only possibly pair with the new split section of s[i]; and
if s[i + 1][0] < s[i][0], then s[i + 1] might generate a lower j that would combine with the odd j from M[i], potentially making a longer sequence.
At the end, return the largest entry in the matrix, max(j + floor(log2(num_j))), for all j.
JavaScript code (counterexamples would be welcome; the limit on the answer is set at 7 for convenient visualization of the matrix):
function f(str){
var arr = str.split(/\s+/).map(Number);
var s = [,[arr[0],0]];
for (var i=0; i<arr.length; i++){
if (s[s.length - 1][0] == arr[i]){
s[s.length - 1][1]++;
} else {
s.push([arr[i],1]);
}
}
var M = [new Array(8).fill([0,0])],
best = 0;
for (var i=1; i<s.length; i++){
M[i] = new Array(8).fill([0,i]);
var temp = s[i][1],
temp_odd,
temp_start,
odd = false;
for (var j=s[i][0]; temp>0; j++){
var start_idx = odd ? temp_start : M[i][j-1][1];
if (start_idx != -1 && M[start_idx - 1][j][0]){
temp += M[start_idx - 1][j][0];
start_idx = M[start_idx - 1][j][1];
}
if (!odd){
M[i][j] = [temp,start_idx];
temp_odd = temp;
} else {
M[i][j] = [temp_odd,-1];
temp_start = start_idx;
}
if (!odd && temp & 1 && temp > 1){
odd = true;
temp_start = start_idx;
}
best = Math.max(best,j + Math.floor(Math.log2(temp)));
temp >>= 1;
temp_odd >>= 1;
}
}
return [arr, s, best, M];
}
// I/O
var button = document.querySelector('button');
var input = document.querySelector('input');
var pre = document.querySelector('pre');
button.onclick = function() {
var val = input.value;
var result = f(val);
var text = '';
for (var i=0; i<3; i++){
text += JSON.stringify(result[i]) + '\n\n';
}
for (var i in result[3]){
text += JSON.stringify(result[3][i]) + '\n';
}
pre.textContent = text;
}
<input value ="2 2 3 3 2 2 3 3 5">
<button>Solve</button>
<pre></pre>
Here's a brute force solution:
function findMax(array A, int currentMax)
for each pair (i, i+1) of indices for which A[i]==A[i+1] do
currentMax = max(A[i]+1, currentMax)
replace A[i],A[i+1] by a single number A[i]+1
currentMax = max(currentMax, findMax(A, currentMax))
end for
return currentMax
Given the array A, let currentMax=max(A[0], ..., A[n])
print findMax(A, currentMax)
The algorithm terminates because in each recursive call the array shrinks by 1.
It's also clear that it is correct: we try out all possible replacement sequences.
The code is extremely slow when the array is large and there's lots of options regarding replacements, but actually works reasonbly fast on arrays with small number of replaceable pairs. (I'll try to quantify the running time in terms of the number of replaceable pairs.)
A naive working code in Python:
def findMax(L, currMax):
for i in range(len(L)-1):
if L[i] == L[i+1]:
L[i] += 1
del L[i+1]
currMax = max(currMax, L[i])
currMax = max(currMax, findMax(L, currMax))
L[i] -= 1
L.insert(i+1, L[i])
return currMax
# entry point
if __name__ == '__main__':
L1 = [2, 3, 1, 1, 2, 2]
L2 = [2, 3, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2]
print findMax(L1, max(L1))
print findMax(L2, max(L2))
The result of the first call is 4, as expected.
The result of the second call is 5 as expected; the sequence that gives the result: 2,3,1,1,2,2,2,2,2,2,2,2, -> 2,3,1,1,3,2,2,2,2,2,2 -> 2,3,1,1,3,3,2,2,2,2, -> 2,3,1,1,3,3,3,2,2 -> 2,3,1,1,3,3,3,3 -> 2,3,1,1,4,3, -> 2,3,1,1,4,4 -> 2,3,1,1,5

Split a random value into four that sum up to it

I have one value like 24, and I have four textboxes. How can I dynamically generate four values that add up to 24?
All the values must be integers and can't be negative, and the result cannot be 6, 6, 6, 6; they must be different like: 8, 2, 10, 4. (But 5, 6, 6, 7 would be okay.)
For your stated problem, it is possible to generate an array of all possible solutions and then pick one randomly. There are in fact 1,770 possible solutions.
var solutions = [[Int]]()
for i in 1...21 {
for j in 1...21 {
for k in 1...21 {
let l = 24 - (i + j + k)
if l > 0 && !(i == 6 && j == 6 && k == 6) {
solutions.append([i, j, k, l])
}
}
}
}
// Now generate 20 solutions
for _ in 1...20 {
let rval = Int(arc4random_uniform(UInt32(solutions.count)))
println(solutions[rval])
}
This avoids any bias at the cost of initial setup time and storage.
This could be improved by:
Reducing storage space by only storing the first 3 numbers. The 4th one is always 24 - (sum of first 3)
Reducing storage space by storing each solution as a single integer: (i * 10000 + j * 100 + k)
Speeding up the generation of solutions by realizing that each loop doesn't need to go to 21.
Here is the solution that stores each solution as a single integer and optimizes the loops:
var solutions = [Int]()
for i in 1...21 {
for j in 1...22-i {
for k in 1...23-i-j {
if !(i == 6 && j == 6 && k == 6) {
solutions.append(i * 10000 + j * 100 + k)
}
}
}
}
// Now generate 20 solutions
for _ in 1...20 {
let rval = Int(arc4random_uniform(UInt32(solutions.count)))
let solution = solutions[rval]
// unpack the values
let i = solution / 10000
let j = (solution % 10000) / 100
let k = solution % 100
let l = 24 - (i + j + k)
// print the solution
println("\([i, j, k, l])")
}
Here is a Swift implementation of the algorithm given in https://stackoverflow.com/a/8064754/1187415, with a slight
modification because all numbers are required to be positive.
The method to producing N positive random integers with sum M is
Build an array containing the number 0, followed by N-1 different
random numbers in the range 1 .. M-1, and finally the number M.
Compute the differences of subsequent array elements.
In the first step, we need a random subset of N-1 elements out of
the set { 1, ..., M-1 }. This can be achieved by iterating over this
set and choosing each element with probability n/m, where
m is the remaining number of elements we can choose from and
n is the remaining number of elements to choose.
Instead of storing the chosen random numbers in an array, the
difference to the previously chosen number is computed immediately
and stored.
This gives the following function:
func randomNumbers(#count : Int, withSum sum : Int) -> [Int] {
precondition(sum >= count, "`sum` must not be less than `count`")
var diffs : [Int] = []
var last = 0 // last number chosen
var m = UInt32(sum - 1) // remaining # of elements to choose from
var n = UInt32(count - 1) // remaining # of elements to choose
for i in 1 ..< sum {
// Choose this number `i` with probability n/m:
if arc4random_uniform(m) < n {
diffs.append(i - last)
last = i
n--
}
m--
}
diffs.append(sum - last)
return diffs
}
println(randomNumbers(count: 4, withSum: 24))
If a solution with all elements equal (e.g 6+6+6+6=24) is not
allowed, you can repeat the method until a valid solution is found:
func differentRandomNumbers(#count : Int, withSum sum : Int) -> [Int] {
precondition(count >= 2, "`count` must be at least 2")
var v : [Int]
do {
v = randomNumbers(count: count, withSum: sum)
} while (!contains(v, { $0 != v[0]} ))
return v
}
Here is a simple test. It computes 1,000,000 random representations
of 7 as the sum of 3 positive integers, and counts the distribution
of the results.
let set = NSCountedSet()
for i in 1 ... 1_000_000 {
let v = randomNumbers(count: 3, withSum: 7)
set.addObject(v)
}
for (_, v) in enumerate(set) {
let count = set.countForObject(v)
println("\(v as! [Int]) \(count)")
}
Result:
[1, 4, 2] 66786
[1, 5, 1] 67082
[3, 1, 3] 66273
[2, 2, 3] 66808
[2, 3, 2] 66966
[5, 1, 1] 66545
[2, 1, 4] 66381
[1, 3, 3] 67153
[3, 3, 1] 67034
[4, 1, 2] 66423
[3, 2, 2] 66674
[2, 4, 1] 66418
[4, 2, 1] 66292
[1, 1, 5] 66414
[1, 2, 4] 66751
Update for Swift 3:
func randomNumbers(count : Int, withSum sum : Int) -> [Int] {
precondition(sum >= count, "`sum` must not be less than `count`")
var diffs : [Int] = []
var last = 0 // last number chosen
var m = UInt32(sum - 1) // remaining # of elements to choose from
var n = UInt32(count - 1) // remaining # of elements to choose
for i in 1 ..< sum {
// Choose this number `i` with probability n/m:
if arc4random_uniform(m) < n {
diffs.append(i - last)
last = i
n -= 1
}
m -= 1
}
diffs.append(sum - last)
return diffs
}
print(randomNumbers(count: 4, withSum: 24))
Update for Swift 4.2 (and later), using the unified random API:
func randomNumbers(count : Int, withSum sum : Int) -> [Int] {
precondition(sum >= count, "`sum` must not be less than `count`")
var diffs : [Int] = []
var last = 0 // last number chosen
var m = sum - 1 // remaining # of elements to choose from
var n = count - 1 // remaining # of elements to choose
for i in 1 ..< sum {
// Choose this number `i` with probability n/m:
if Int.random(in: 0..<m) < n {
diffs.append(i - last)
last = i
n -= 1
}
m -= 1
}
diffs.append(sum - last)
return diffs
}
func getRandomValues(amountOfValues:Int, totalAmount:Int) -> [Int]?{
if amountOfValues < 1{
return nil
}
if totalAmount < 1{
return nil
}
if totalAmount < amountOfValues{
return nil
}
var values:[Int] = []
var valueLeft = totalAmount
for i in 0..<amountOfValues{
if i == amountOfValues - 1{
values.append(valueLeft)
break
}
var value = Int(arc4random_uniform(UInt32(valueLeft - (amountOfValues - i))) + 1)
valueLeft -= value
values.append(value)
}
var shuffledArray:[Int] = []
for i in 0..<values.count {
var rnd = Int(arc4random_uniform(UInt32(values.count)))
shuffledArray.append(values[rnd])
values.removeAtIndex(rnd)
}
return shuffledArray
}
getRandomValues(4, 24)
This is not a final answer, but it should be a (good) starting point.
How it works: It takes 2 parameters. The amount of random values (4 in your case) and the total amount (24 in your case).
It takes a random value between the total Amount and 0, stores this in an array and it subtracts this from a variable which stores the amount that is left and stores the new value.
Than it takes a new random value between the amount that is left and 0, stores this in an array and it again subtracts this from the amount that is left and stores the new value.
When it is the last number needed, it sees what amount is left and adds that to the array
EDIT:
Adding a +1 to the random value removes the problem of having 0 in your array.
EDIT 2:
Shuffling the array does remove the increased chance of having a high value as the first value.
One solution that is unfortunatly non-deterministic but completely random is as follows:
For a total of 24 in 4 numbers:
pick four random numbers between 1 and 21
repeat until the total of the numbers equals 24 and they are not all 6.
This will, on average, loop about 100 times before finding a solution.
Here's a solution which should have significantly* less bias than some of the other methods. It works by generating the requested number of random floating point numbers, multiplying or dividing all of them until they add up to the target total, and then rounding them into integers. The rounding process changes the total, so we need to correct for that by adding or subtracting from random terms until they add up to the right amount.
func getRandomDoubles(#count: Int, #total: Double) -> [Double] {
var nonNormalized = [Double]()
nonNormalized.reserveCapacity(count)
for i in 0..<count {
nonNormalized.append(Double(arc4random()) / 0xFFFFFFFF)
}
let nonNormalizedSum = reduce(nonNormalized, 0) { $0 + $1 }
let normalized = nonNormalized.map { $0 * total / nonNormalizedSum }
return normalized
}
func getRandomInts(#count: Int, #total: Int) -> [Int] {
let doubles = getRandomDoubles(count: count, total: Double(total))
var ints = [Int]()
ints.reserveCapacity(count)
for double in doubles {
if double < 1 || double % 1 >= 0.5 {
// round up
ints.append(Int(ceil(double)))
} else {
// round down
ints.append(Int(floor(double)))
}
}
let roundingErrors = total - (reduce(ints, 0) { $0 + $1 })
let directionToAdjust: Int = roundingErrors > 0 ? 1 : -1
var corrections = abs(roundingErrors)
while corrections > 0 {
let index = Int(arc4random_uniform(UInt32(count)))
if directionToAdjust == -1 && ints[index] <= 1 { continue }
ints[index] += directionToAdjust
corrections--
}
return ints
}
*EDIT: Martin R has correctly pointed out that this is not nearly as uniform as one might expect, and is in fact highly biased towards numbers in the middle of the 1-24 range. I would not recommend using this solution, but I'm leaving it up so that others can know not to make the same mistake.
As a recursive function the algorithm is very nice:
func getRandomValues(amount: Int, total: Int) -> [Int] {
if amount == 1 { return [total] }
if amount == total { return Array(count: amount, repeatedValue: 1) }
let number = Int(arc4random()) % (total - amount + 1) + 1
return [number] + getRandomValues(amount - 1, total - number)
}
And with safety check:
func getRandomValues(amount: Int, total: Int) -> [Int]? {
if !(1...total ~= amount) { return nil }
if amount == 1 { return [total] }
if amount == total { return Array(count: amount, repeatedValue: 1) }
let number = Int(arc4random()) % (total - amount + 1) + 1
return [number] + getRandomValues(amount - 1, total - number)!
}
As #MartinR pointed out the code above is extremely biased. So in order to have a uniform distribution of the output values you should use this piece of code:
func getRandomValues(amount: Int, total: Int) -> [Int] {
var numberSet = Set<Int>()
// add splitting points to numberSet
for _ in 1...amount - 1 {
var number = Int(arc4random()) % (total - 1) + 1
while numberSet.contains(number) {
number = Int(arc4random()) % (total - 1) + 1
}
numberSet.insert(number)
}
// sort numberSet and return the differences between the splitting points
let sortedArray = (Array(numberSet) + [0, total]).sort()
return sortedArray.enumerate().flatMap{
indexElement in
if indexElement.index == amount { return nil }
return sortedArray[indexElement.index + 1] - indexElement.element
}
}
A javascript implementation for those who may be looking for such case:
const numbersSumTo = (length, value) => {
const fourRandomNumbers = Array.from({ length: length }, () => Math.floor(Math.random() * 6) + 1);
const res = fourRandomNumbers.map(num => (num / fourRandomNumbers.reduce((a, b) => a + b, 0)) * value).map(num => Math.trunc(num));
res[0] += Math.abs(res.reduce((a, b) => a + b, 0) - value);
return res;
}
// Gets an array with 4 items which sum to 100
const res = numbersSumTo(4, 100);
const resSum = res.reduce((a, b) => a + b, 0);
console.log({
res,
resSum
});
Also plenty of different methods of approach can be found here on this question: https://math.stackexchange.com/questions/1276206/method-of-generating-random-numbers-that-sum-to-100-is-this-truly-random

How to calculate the index (lexicographical order) when the combination is given

I know that there is an algorithm that permits, given a combination of number (no repetitions, no order), calculates the index of the lexicographic order.
It would be very useful for my application to speedup things...
For example:
combination(10, 5)
1 - 1 2 3 4 5
2 - 1 2 3 4 6
3 - 1 2 3 4 7
....
251 - 5 7 8 9 10
252 - 6 7 8 9 10
I need that the algorithm returns the index of the given combination.
es: index( 2, 5, 7, 8, 10 ) --> index
EDIT: actually I'm using a java application that generates all combinations C(53, 5) and inserts them into a TreeMap.
My idea is to create an array that contains all combinations (and related data) that I can index with this algorithm.
Everything is to speedup combination searching.
However I tried some (not all) of your solutions and the algorithms that you proposed are slower that a get() from TreeMap.
If it helps: my needs are for a combination of 5 from 53 starting from 0 to 52.
Thank you again to all :-)
Here is a snippet that will do the work.
#include <iostream>
int main()
{
const int n = 10;
const int k = 5;
int combination[k] = {2, 5, 7, 8, 10};
int index = 0;
int j = 0;
for (int i = 0; i != k; ++i)
{
for (++j; j != combination[i]; ++j)
{
index += c(n - j, k - i - 1);
}
}
std::cout << index + 1 << std::endl;
return 0;
}
It assumes you have a function
int c(int n, int k);
that will return the number of combinations of choosing k elements out of n elements.
The loop calculates the number of combinations preceding the given combination.
By adding one at the end we get the actual index.
For the given combination there are
c(9, 4) = 126 combinations containing 1 and hence preceding it in lexicographic order.
Of the combinations containing 2 as the smallest number there are
c(7, 3) = 35 combinations having 3 as the second smallest number
c(6, 3) = 20 combinations having 4 as the second smallest number
All of these are preceding the given combination.
Of the combinations containing 2 and 5 as the two smallest numbers there are
c(4, 2) = 6 combinations having 6 as the third smallest number.
All of these are preceding the given combination.
Etc.
If you put a print statement in the inner loop you will get the numbers
126, 35, 20, 6, 1.
Hope that explains the code.
Convert your number selections to a factorial base number. This number will be the index you want. Technically this calculates the lexicographical index of all permutations, but if you only give it combinations, the indexes will still be well ordered, just with some large gaps for all the permutations that come in between each combination.
Edit: pseudocode removed, it was incorrect, but the method above should work. Too tired to come up with correct pseudocode at the moment.
Edit 2: Here's an example. Say we were choosing a combination of 5 elements from a set of 10 elements, like in your example above. If the combination was 2 3 4 6 8, you would get the related factorial base number like so:
Take the unselected elements and count how many you have to pass by to get to the one you are selecting.
1 2 3 4 5 6 7 8 9 10
2 -> 1
1 3 4 5 6 7 8 9 10
3 -> 1
1 4 5 6 7 8 9 10
4 -> 1
1 5 6 7 8 9 10
6 -> 2
1 5 7 8 9 10
8 -> 3
So the index in factorial base is 1112300000
In decimal base, it's
1*9! + 1*8! + 1*7! + 2*6! + 3*5! = 410040
This is Algorithm 2.7 kSubsetLexRank on page 44 of Combinatorial Algorithms by Kreher and Stinson.
r = 0
t[0] = 0
for i from 1 to k
if t[i - 1] + 1 <= t[i] - 1
for j from t[i - 1] to t[i] - 1
r = r + choose(n - j, k - i)
return r
The array t holds your values, for example [5 7 8 9 10]. The function choose(n, k) calculates the number "n choose k". The result value r will be the index, 251 for the example. Other inputs are n and k, for the example they would be 10 and 5.
zero-base,
# v: array of length k consisting of numbers between 0 and n-1 (ascending)
def index_of_combination(n,k,v):
idx = 0
for p in range(k-1):
if p == 0: arrg = range(1,v[p]+1)
else: arrg = range(v[p-1]+2, v[p]+1)
for a in arrg:
idx += combi[n-a, k-1-p]
idx += v[k-1] - v[k-2] - 1
return idx
Null Set has the right approach. The index corresponds to the factorial-base number of the sequence. You build a factorial-base number just like any other base number, except that the base decreases for each digit.
Now, the value of each digit in the factorial-base number is the number of elements less than it that have not yet been used. So, for combination(10, 5):
(1 2 3 4 5) == 0*9!/5! + 0*8!/5! + 0*7!/5! + 0*6!/5! + 0*5!/5!
== 0*3024 + 0*336 + 0*42 + 0*6 + 0*1
== 0
(10 9 8 7 6) == 9*3024 + 8*336 + 7*42 + 6*6 + 5*1
== 30239
It should be pretty easy to calculate the index incrementally.
If you have a set of positive integers 0<=x_1 < x_2< ... < x_k , then you could use something called the squashed order:
I = sum(j=1..k) Choose(x_j,j)
The beauty of the squashed order is that it works independent of the largest value in the parent set.
The squashed order is not the order you are looking for, but it is related.
To use the squashed order to get the lexicographic order in the set of k-subsets of {1,...,n) is by taking
1 <= x1 < ... < x_k <=n
compute
0 <= n-x_k < n-x_(k-1) ... < n-x_1
Then compute the squashed order index of (n-x_k,...,n-k_1)
Then subtract the squashed order index from Choose(n,k) to get your result, which is the lexicographic index.
If you have relatively small values of n and k, you can cache all the values Choose(a,b) with a
See Anderson, Combinatorics on Finite Sets, pp 112-119
I needed also the same for a project of mine and the fastest solution I found was (Python):
import math
def nCr(n,r):
f = math.factorial
return f(n) / f(r) / f(n-r)
def index(comb,n,k):
r=nCr(n,k)
for i in range(k):
if n-comb[i]<k-i:continue
r=r-nCr(n-comb[i],k-i)
return r
My input "comb" contained elements in increasing order You can test the code with for example:
import itertools
k=3
t=[1,2,3,4,5]
for x in itertools.combinations(t, k):
print x,index(x,len(t),k)
It is not hard to prove that if comb=(a1,a2,a3...,ak) (in increasing order) then:
index=[nCk-(n-a1+1)Ck] + [(n-a1)C(k-1)-(n-a2+1)C(k-1)] + ... =
nCk -(n-a1)Ck -(n-a2)C(k-1) - .... -(n-ak)C1
There's another way to do all this. You could generate all possible combinations and write them into a binary file where each comb is represented by it's index starting from zero. Then, when you need to find an index, and the combination is given, you apply a binary search on the file. Here's the function. It's written in VB.NET 2010 for my lotto program, it works with Israel lottery system so there's a bonus (7th) number; just ignore it.
Public Function Comb2Index( _
ByVal gAr() As Byte) As UInt32
Dim mxPntr As UInt32 = WHL.AMT.WHL_SYS_00 '(16.273.488)
Dim mdPntr As UInt32 = mxPntr \ 2
Dim eqCntr As Byte
Dim rdAr() As Byte
modBinary.OpenFile(WHL.WHL_SYS_00, _
FileMode.Open, FileAccess.Read)
Do
modBinary.ReadBlock(mdPntr, rdAr)
RP: If eqCntr = 7 Then GoTo EX
If gAr(eqCntr) = rdAr(eqCntr) Then
eqCntr += 1
GoTo RP
ElseIf gAr(eqCntr) < rdAr(eqCntr) Then
If eqCntr > 0 Then eqCntr = 0
mxPntr = mdPntr
mdPntr \= 2
ElseIf gAr(eqCntr) > rdAr(eqCntr) Then
If eqCntr > 0 Then eqCntr = 0
mdPntr += (mxPntr - mdPntr) \ 2
End If
Loop Until eqCntr = 7
EX: modBinary.CloseFile()
Return mdPntr
End Function
P.S. It takes 5 to 10 mins to generate 16 million combs on a Core 2 Duo. To find the index using binary search on file takes 397 milliseconds on a SATA drive.
Assuming the maximum setSize is not too large, you can simply generate a lookup table, where the inputs are encoded this way:
int index(a,b,c,...)
{
int key = 0;
key |= 1<<a;
key |= 1<<b;
key |= 1<<c;
//repeat for all arguments
return Lookup[key];
}
To generate the lookup table, look at this "banker's order" algorithm. Generate all the combinations, and also store the base index for each nItems. (For the example on p6, this would be [0,1,5,11,15]). Note that by you storing the answers in the opposite order from the example (LSBs set first) you will only need one table, sized for the largest possible set.
Populate the lookup table by walking through the combinations doing Lookup[combination[i]]=i-baseIdx[nItems]
EDIT: Never mind. This is completely wrong.
Let your combination be (a1, a2, ..., ak-1, ak) where a1 < a2 < ... < ak. Let choose(a,b) = a!/(b!*(a-b)!) if a >= b and 0 otherwise. Then, the index you are looking for is
choose(ak-1, k) + choose(ak-1-1, k-1) + choose(ak-2-1, k-2) + ... + choose (a2-1, 2) + choose (a1-1, 1) + 1
The first term counts the number of k-element combinations such that the largest element is less than ak. The second term counts the number of (k-1)-element combinations such that the largest element is less than ak-1. And, so on.
Notice that the size of the universe of elements to be chosen from (10 in your example) does not play a role in the computation of the index. Can you see why?
Sample solution:
class Program
{
static void Main(string[] args)
{
// The input
var n = 5;
var t = new[] { 2, 4, 5 };
// Helping transformations
ComputeDistances(t);
CorrectDistances(t);
// The algorithm
var r = CalculateRank(t, n);
Console.WriteLine("n = 5");
Console.WriteLine("t = {2, 4, 5}");
Console.WriteLine("r = {0}", r);
Console.ReadKey();
}
static void ComputeDistances(int[] t)
{
var k = t.Length;
while (--k >= 0)
t[k] -= (k + 1);
}
static void CorrectDistances(int[] t)
{
var k = t.Length;
while (--k > 0)
t[k] -= t[k - 1];
}
static int CalculateRank(int[] t, int n)
{
int k = t.Length - 1, r = 0;
for (var i = 0; i < t.Length; i++)
{
if (t[i] == 0)
{
n--;
k--;
continue;
}
for (var j = 0; j < t[i]; j++)
{
n--;
r += CalculateBinomialCoefficient(n, k);
}
n--;
k--;
}
return r;
}
static int CalculateBinomialCoefficient(int n, int k)
{
int i, l = 1, m, x, y;
if (n - k < k)
{
x = k;
y = n - k;
}
else
{
x = n - k;
y = k;
}
for (i = x + 1; i <= n; i++)
l *= i;
m = CalculateFactorial(y);
return l/m;
}
static int CalculateFactorial(int n)
{
int i, w = 1;
for (i = 1; i <= n; i++)
w *= i;
return w;
}
}
The idea behind the scenes is to associate a k-subset with an operation of drawing k-elements from the n-size set. It is a combination, so the overall count of possible items will be (n k). It is a clue that we could seek the solution in Pascal Triangle. After a while of comparing manually written examples with the appropriate numbers from the Pascal Triangle, we will find the pattern and hence the algorithm.
I used user515430's answer and converted to python3. Also this supports non-continuous values so you could pass in [1,3,5,7,9] as your pool instead of range(1,11)
from itertools import combinations
from scipy.special import comb
from pandas import Index
debugcombinations = False
class IndexedCombination:
def __init__(self, _setsize, _poolvalues):
self.setsize = _setsize
self.poolvals = Index(_poolvalues)
self.poolsize = len(self.poolvals)
self.totalcombinations = 1
fast_k = min(self.setsize, self.poolsize - self.setsize)
for i in range(1, fast_k + 1):
self.totalcombinations = self.totalcombinations * (self.poolsize - fast_k + i) // i
#fill the nCr cache
self.choose_cache = {}
n = self.poolsize
k = self.setsize
for i in range(k + 1):
for j in range(n + 1):
if n - j >= k - i:
self.choose_cache[n - j,k - i] = comb(n - j,k - i, exact=True)
if debugcombinations:
print('testnth = ' + str(self.testnth()))
def get_nth_combination(self,index):
n = self.poolsize
r = self.setsize
c = self.totalcombinations
#if index < 0 or index >= c:
# raise IndexError
result = []
while r:
c, n, r = c*r//n, n-1, r-1
while index >= c:
index -= c
c, n = c*(n-r)//n, n-1
result.append(self.poolvals[-1 - n])
return tuple(result)
def get_n_from_combination(self,someset):
n = self.poolsize
k = self.setsize
index = 0
j = 0
for i in range(k):
setidx = self.poolvals.get_loc(someset[i])
for j in range(j + 1, setidx + 1):
index += self.choose_cache[n - j, k - i - 1]
j += 1
return index
#just used to test whether nth_combination from the internet actually works
def testnth(self):
n = 0
_setsize = self.setsize
mainset = self.poolvals
for someset in combinations(mainset, _setsize):
nthset = self.get_nth_combination(n)
n2 = self.get_n_from_combination(nthset)
if debugcombinations:
print(str(n) + ': ' + str(someset) + ' vs ' + str(n2) + ': ' + str(nthset))
if n != n2:
return False
for x in range(_setsize):
if someset[x] != nthset[x]:
return False
n += 1
return True
setcombination = IndexedCombination(5, list(range(1,10+1)))
print( str(setcombination.get_n_from_combination([2,5,7,8,10])))
returns 188

Resources