Creating associative magic squares z3 python - z3py

So Im working with magic squares. I've created conditions for a regular magic square. Now I need to add the constraints to make it associative.
I'm using the z3 python module.
To be associative, the sum of the opposite numbers need to be equal to the same number 't'. the sum of all the rows and columns and diagonals need to be equal to this number t as well. So I'm struggling with accessing the numbers individually from each list in the list of lists.
For example, these are the constraints I have for a regular magic square:
t = Int('t')
Square = [ [ Int("x_%s_%s" % (i+1, j+1)) for j in range(n) ] for i in range(n) ]
distinct = [ Distinct([ Square[i][j] for i in range(n) for j in range(n) ]) ]
cell = [ And(1 <= Square[i][j], Square[i][j] <= n**2) for i in range(n) for j in range(n) ]
row = [(sum(Square[i]) == t) for i in range(n)]
col = [(sum([Square[j][i] for j in range(n)]) == t) for i in range(n)]
firstDiagonal = [ (sum([Square[i][i] for i in range(n)]) == t) ]
secondDiagonal = [ (sum([Square[n-i-1][i] for i in range(n)]) == t) ]
So how do I associative conditions?

I amended your model and added associative constraints:
from z3 import *
def ShowSquare(model, square, rows, cols):
print()
print("Square")
for row in range(rows):
s = ""
for col in range(cols):
s = s + str(model.eval(square[row][col])).ljust(4)
print(s)
s = Solver()
n = 3
t = Int('t')
magicSquare = [ [ Int("x_%s_%s" % (i+1, j+1)) for j in range(n) ] for i in range(n) ]
distinctConstraints = [ Distinct([ magicSquare[i][j] for i in range(n) for j in range(n) ]) ]
cellConstraints = [ And(1 <= magicSquare[i][j], magicSquare[i][j] <= n**2) for i in range(n) for j in range(n) ]
rowConstraints = [(sum(magicSquare[i]) == t) for i in range(n)]
colConstraints = [(sum([magicSquare[j][i] for j in range(n)]) == t) for i in range(n)]
firstDiagConstraints = [ (sum([magicSquare[i][i] for i in range(n)]) == t) ]
secondDiagConstraints = [ (sum([magicSquare[n-i-1][i] for i in range(n)]) == t) ]
AssocSum = Int('AssocSum')
assocRowConstraints = [((magicSquare[0][i] + magicSquare[n-1][n-i-1]) == AssocSum) for i in range(n) ]
assocColConstraints = [((magicSquare[i][0] + magicSquare[n-i-1][n-1]) == AssocSum) for i in range(1, n-1) ]
# performance tweak
# the magic sum is known in advance
s.add(2*t == n*(n*n+1))
s.add(distinctConstraints)
s.add(cellConstraints)
s.add(rowConstraints)
s.add(colConstraints)
s.add(firstDiagConstraints)
s.add(secondDiagConstraints)
s.add(assocRowConstraints)
s.add(assocColConstraints)
print(s.check())
model = s.model()
print("magic sum %s" % model.eval(t))
print("assoc sum %s" % model.eval(AssocSum))
ShowSquare(model, magicSquare, n, n)
The result is a transposed version of the Lo Shu Square linked in the question:
sat
magic sum 15
assoc sum 10
Square
4 3 8
9 5 1
2 7 6
For n=5, a solution is:
magic sum 65
assoc sum 26
Square
8 7 12 15 23
17 20 22 1 5
16 2 13 24 10
21 25 4 6 9
3 11 14 19 18

Related

Min Abs Sum task from codility

There is already a topic about this task, but I'd like to ask about my specific approach.
The task is:
Let A be a non-empty array consisting of N integers.
The abs sum of two for a pair of indices (P, Q) is the absolute value
|A[P] + A[Q]|, for 0 ≤ P ≤ Q < N.
For example, the following array A:
A[0] = 1 A1 = 4 A[2] = -3 has pairs of indices (0, 0), (0,
1), (0, 2), (1, 1), (1, 2), (2, 2). The abs sum of two for the pair
(0, 0) is A[0] + A[0] = |1 + 1| = 2. The abs sum of two for the pair
(0, 1) is A[0] + A1 = |1 + 4| = 5. The abs sum of two for the pair
(0, 2) is A[0] + A[2] = |1 + (−3)| = 2. The abs sum of two for the
pair (1, 1) is A1 + A1 = |4 + 4| = 8. The abs sum of two for the
pair (1, 2) is A1 + A[2] = |4 + (−3)| = 1. The abs sum of two for
the pair (2, 2) is A[2] + A[2] = |(−3) + (−3)| = 6. Write a function:
def solution(A)
that, given a non-empty array A consisting of N integers, returns the
minimal abs sum of two for any pair of indices in this array.
For example, given the following array A:
A[0] = 1 A1 = 4 A[2] = -3 the function should return 1, as
explained above.
Given array A:
A[0] = -8 A1 = 4 A[2] = 5 A[3] =-10 A[4] = 3 the
function should return |(−8) + 5| = 3.
Write an efficient algorithm for the following assumptions:
N is an integer within the range [1..100,000]; each element of array A
is an integer within the range [−1,000,000,000..1,000,000,000].
The official solution is O(N*M^2), but I think it could be solved in O(N).
My approach is to first get rid of duplicates and sort the array. Then we check both ends and sompare the abs sum moving the ends by one towards each other. We try to move the left end, the right one or both. If this doesn't improve the result, our sum is the lowest. My code is:
def solution(A):
A = list(set(A))
n = len(A)
A.sort()
beg = 0
end = n - 1
min_sum = abs(A[beg] + A[end])
while True:
min_left = abs(A[beg+1] + A[end]) if beg+1 < n else float('inf')
min_right = abs(A[beg] + A[end-1]) if end-1 >= 0 else float('inf')
min_both = abs(A[beg+1] + A[end-1]) if beg+1 < n and end-1 >= 0 else float('inf')
min_all = min([min_left, min_right, min_both])
if min_sum <= min_all:
return min_sum
if min_left == min_all:
beg += 1
min_sum = min_left
elif min_right == min_all:
end -= 1
min_sum = min_right
else:
beg += 1
end -= 1
min_sum = min_both
It passes almost all of the tests, but not all. Is there some bug in my code or the approach is wrong?
EDIT:
After the aka.nice answer I was able to fix the code. It scores 100% now.
def solution(A):
A = list(set(A))
n = len(A)
A.sort()
beg = 0
end = n - 1
min_sum = abs(A[beg] + A[end])
while beg <= end:
min_left = abs(A[beg+1] + A[end]) if beg+1 < n else float('inf')
min_right = abs(A[beg] + A[end-1]) if end-1 >= 0 else float('inf')
min_all = min(min_left, min_right)
if min_all < min_sum:
min_sum = min_all
if min_left <= min_all:
beg += 1
else:
end -= 1
return min_sum
Just take this example for array A
-11 -5 -2 5 6 8 12
and execute your algorithm step by step, you get a premature return:
beg=0
end=6
min_sum=1
min_left=7
min_right=3
min_both=3
min_all=3
return min_sum
though there is a better solution abs(5-5)=0.
Hint: you should check the sign of A[beg] and A[end] to decide whether to continue or exit the loop. What to do if both >= 0, if both <= 0, else ?
Note that A.sort() has a non neglectable cost, likely O(N*log(N)), it will dominate the cost of the solution you exhibit.
By the way, what is M in the official cost O(N*M^2)?
And the link you provide is another problem (sum all the elements of A or their opposite).

Add all the values between 100 and 4000000 inclusively that are divisable by 3 or 5 but not both 3 and 5

Add all the values between 100 and 4000000 inclusively that are divisable by 3 or 5 but not both 3 and 5
Can't figure out how to implement second part of that stipulation. Here's what I have so far:
var sum = 0;
for (var i = 100; i < 4000001; i++) {
if (i % 3 || i % 5 === 0) {
sum = sum + i;
}
}
You can compute the sum without any loop, using the formula for the sum of an arithmetic progression: We have
3 + 5 + 6 + 9 + 10 + 12 + 18 + 20 + ...
= 3 + 6 + 9 + 12 + 15 + 18 + ...
+ 5 + 10 + 15 + 20 + ...
- 2*(15 + 30 + 45 + ...)
Note that we add all the multiples of 3 and 5 but then subtract the multiples of 15 twice, because they were counted twice as multiples of both 3 and 5.
Let g(n) be the sum of integers from 1 to n. We have g(n) = n*(n+1)/2.
Let f(n) be the sum of integers between 1 and n that are divisible by 3 or 5, but not both. Then we have
f(n) = 3*g(floor(n / 3)) + 5*g(floor(n/5)) - 30*g(floor(n/15))
And the sum of integers between m and n that are divisible by 3 or 5, but not both is then just f(n) - f(m - 1). This can be computed in O(1).
You simply need to escape only those part which involves division by 15, and other higher numbers(multiple of 15) will be avoided further automatically.
Note that checking divisibility by 15 should be at the top, which on being true will continue further iteration without executing the below codes of divisibility by 3 and 5. If false, then a number can only be divisible by 3 or 5 or none, but not both.
for (var i = 100; i < 4000001; i++) {
if(i % 15 == 0 )
continue;
if (i % 3 == 0) {
sum = sum + i;
}
if (i % 5 == 0) {
sum = sum + i;
}
}
Also, note that you have used === operator which I don't think is a valid operator, probably you want ==. BTW, I am not sure whether any language supports ===, I think Javascript supports that. So, be careful at that step.
You can use != instead of || since this is exactly what you want. Only divisible by 3 or 5 but not by both.
var sum = 0;
for (var i = 100; i < 4000001; i++) {
if ((i % 3 == 0) != (i % 5 == 0)) {
sum = sum + i;
}
}
var sum = 0;
for (var i = 100; i < 4000001; i++) {
if (i % 3 === 0 ^ i % 5 === 0) {
sum = sum + i;
}
}
use the exclusive OR , XOR ^ returns true only when one of the conditions not both is true.

Maximum length of zigzag sequence

A sequence of integers is called zigzag sequence if each of its elements is either strictly less or strictly greater than its neighbors.
Example : The sequence 4 2 3 1 5 3 forms a zigzag, but 7 3 5 5 2 and 3 8 6 4 5 don't.
For a given array of integers we need to find the length of its largest (contiguous) sub-array that forms a zigzag sequence.
Can this be done in O(N) ?
Currently my solution is O(N^2) which is just simply taking every two points and checking each possible sub-array if it satisfies the condition or not.
I claim that the length of overlapping sequence of any 2 zigzag sub-sequences is a most 1
Proof by contradiction:
Assume a_i .. a_j is the longest zigzag sub-sequence, and there is another zigzag sub-sequence b_m...b_n overlapping it.
without losing of generality, let's say the overlapping part is
a_i ... a_k...a_j
--------b_m...b_k'...b_n
a_k = b_m, a_k+1 = b_m+1....a_j = b_k' where k'-m = j-k > 0 (at least 2 elements are overlapping)
Then they can merge to form a longer zig-zag sequence, contradiction.
This means the only case they can be overlapping each other is like
3 5 3 2 3 2 3
3 5 3 and 3 2 3 2 3 is overlapping at 1 element
This can still be solved in O(N) I believe, like just greedily increase the zig-zag length whenever possible. If fails, move iterator 1 element back and treat it as a new zig-zag starting point
Keep record the latest and longest zig-zag length you have found
Walk along the array and see if the current item belongs to (fits a definition of) a zigzag. Remember the las zigzag start, which is either the array's start or the first zigzag element after the most recent non-zigzag element. This and the current item define some zigzag subarray. When it appears longer than the previously found, store the new longest zigzag length. Proceed till the end of array and you should complete the task in O(N).
Sorry I use perl to write this.
#!/usr/bin/perl
#a = ( 5, 4, 2, 3, 1, 5, 3, 7, 3, 5, 5, 2, 3, 8, 6, 4, 5 );
$n = scalar #a;
$best_start = 0;
$best_end = 1;
$best_length = 2;
$start = 0;
$end = 1;
$direction = ($a[0] > $a[1]) ? 1 : ($a[0] < $a[1]) ? -1 : 0;
for($i=2; $i<$n; $i++) {
// a trick here, same value make $new_direction = $direction
$new_direction = ($a[$i-1] > $a[$i]) ? 1 : ($a[$i-1] < $a[$i]) ? -1 : $direction;
print "$a[$i-1] > $a[$i] : direction $new_direction Vs $direction\n";
if ($direction != $new_direction) {
$end = $i;
} else {
$this_length = $end - $start + 1;
if ($this_length > $best_length) {
$best_start = $start;
$best_end = $end;
$best_length = $this_length;
}
$start = $i-1;
$end = $i;
}
$direction = $new_direction;
}
$this_length = $end - $start + 1;
if ($this_length > $best_length) {
$best_start = $start;
$best_end = $end;
$best_length = $this_length;
}
print "BEST $best_start to $best_end length $best_length\n";
for ($i=$best_start; $i <= $best_end; $i++) {
print $a[$i], " ";
}
print "\n";
For each index i, you can find the smallest j such that the subarray with index j,j+1,...,i-1,i is a zigzag. This can be done in two phases:
Find the longest "increasing" zig zag (starts with a[1]>a[0]):
start = 0
increasing[0] = 0
sign = true
for (int i = 1; i < n; i ++)
if ((arr[i] > arr[i-1] && sign) || )arr[i] < arr[i-1] && !sign)) {
increasing[i] = start
sign = !sign
} else if (arr[i-1] < arr[i]) { //increasing and started last element
start = i-1
sign = false
increasing[i] = i-1
} else { //started this element
start = i
sign = true
increasing[i] = i
}
}
Do similarly for "decreasing" zig-zag, and you can find for each index the "earliest" possible start for a zig-zag subarray.
From there, finding the maximal possible zig-zag is easy.
Since all oporations are done in O(n), and you basically do one after the other, this is your complexity.
You can combine the both "increasing" and "decreasing" to one go:
start = 0
maxZigZagStart[0] = 0
sign = true
for (int i = 1; i < n; i ++)
if ((arr[i] > arr[i-1] && sign) || )arr[i] < arr[i-1] && !sign)) {
maxZigZagStart[i] = start
sign = !sign
} else if (arr[i-1] > arr[i]) { //decreasing:
start = i-1
sign = false
maxZigZagStart[i] = i-1
} else if (arr[i-1] < arr[i]) { //increasing:
start = i-1
sign = true
maxZigZagStart[i] = i-1
} else { //equality
start = i
//guess it is increasing, if it is not - will be taken care of next iteration
sign = true
maxZigZagStart[i] = i
}
}
You can see that you can actually even let go of maxZigZagStart aux array and stored local maximal length instead.
A sketch of simple one-pass algorithm. Cmp compares neighbour elements, returning -1, 0, 1 for less, equal and greater cases.
Zigzag ends for cases of Cmp transitions:
0 0
-1 0
1 0
Zigzag ends and new series starts:
0 -1
0 1
-1 -1
1 1
Zigzag series continues for transitions
-1 1
1 -1
Algo:
Start = 0
LastCmp = - Compare(A[i], A[i - 1]) //prepare to use the first element individually
MaxLen = 0
for i = 1 to N - 1 do
Cmp = Compare(A[i], A[i - 1]) //returns -1, 0, 1 for less, equal and greater cases
if Abs(Cmp - LastCmp) <> 2 then
//zigzag condition is violated, series ends, new series starts
MaxLen = Max(MaxLen, i - 1 - Start)
Start = i
//else series continues, nothing to do
LastCmp = Cmp
//check for ending zigzag
if LastCmp <> 0 then
MaxLen = Max(MaxLen, N - Start)
examples of output:
2 6 7 1 7 0 7 3 1 1 7 4
5 (7 1 7 0 7)
8 0 0 3 5 8
1
0 0 7 0
2
1 2 0 7 9
3
8 3 5 2
4
1 3 7 1 6 6
2
1 4 0 6 6 3 4 3 8 0 9 9
5
Lets consider sequence 5 9 3 4 5 4 2 3 6 5 2 1 3 as an example. You have a condition which every internal element of subsequence should satisfy (element is strictly less or strictly greater than its neighbors). Lets compute this condition for every element of the whole sequence:
5 9 3 6 5 7 2 3 6 5 2 1 3
0 1 1 1 1 1 1 0 1 0 0 1 0
The condition is undefined for outermost elements because they have only one neighbor each. But I defined it as 0 for convenience.
The longest subsequence of 1's (9 3 6 5 7 2) is the internal part of the longest zigzag subsequence (5 9 3 6 5 7 2 3). So the algorithm is:
Find the longest subsequence of elements satisfying condition.
Add to it one element to each side.
The first step can be done in O(n) by the following algorithm:
max_length = 0
current_length = 0
for i from 2 to len(a) - 1:
if a[i - 1] < a[i] > a[i + 1] or a[i - 1] > a[i] < a[i + 1]:
current_length += 1
else:
max_length = max(max_length, current_length)
current_length = 0
max_length = max(max_length, current_length)
The only special case is if the sequence total length is 0 or 1. Then the whole sequence would be the longest zigzag subsequence.
#include "iostream"
using namespace std ;
int main(){
int t ; scanf("%d",&t) ;
while(t--){
int n ; scanf("%d",&n) ;
int size1 = 1 , size2 = 1 , seq1 , seq2 , x ;
bool flag1 = true , flag2 = true ;
for(int i=1 ; i<=n ; i++){
scanf("%d",&x) ;
if( i== 1 )seq1 = seq2 = x ;
else {
if( flag1 ){
if( x>seq1){
size1++ ;
seq1 = x ;
flag1 = !flag1 ;
}
else if( x < seq1 )
seq1 = x ;
}
else{
if( x<seq1){
size1++ ;
seq1=x ;
flag1 = !flag1 ;
}
else if( x > seq1 )
seq1 = x ;
}
if( flag2 ){
if( x < seq2 ){
size2++ ;
seq2=x ;
flag2 = !flag2 ;
}
else if( x > seq2 )
seq2 = x ;
}
else {
if( x > seq2 ){
size2++ ;
seq2 = x ;
flag2 = !flag2 ;
}
else if( x < seq2 )
seq2 = x ;
}
}
}
printf("%d\n",max(size1,size2)) ;
}
return 0 ;
}

Caculating total combinations

I don't know how to go about this programming problem.
Given two integers n and m, how many numbers exist such that all numbers have all digits from 0 to n-1 and the difference between two adjacent digits is exactly 1 and the number of digits in the number is atmost 'm'.
What is the best way to solve this problem? Is there a direct mathematical formula?
Edit: The number cannot start with 0.
Example:
for n = 3 and m = 6 there are 18 such numbers (210, 2101, 21012, 210121 ... etc)
Update (some people have encountered an ambiguity):
All digits from 0 to n-1 must be present.
This Python code computes the answer in O(nm) by keeping track of the numbers ending with a particular digit.
Different arrays (A,B,C,D) are used to track numbers that have hit the maximum or minimum of the range.
n=3
m=6
A=[1]*n # Number of ways of being at digit i and never being to min or max
B=[0]*n # number of ways with minimum being observed
C=[0]*n # number of ways with maximum being observed
D=[0]*n # number of ways with both being observed
A[0]=0 # Cannot start with 0
A[n-1]=0 # Have seen max so this 1 moves from A to C
C[n-1]=1 # Have seen max if start with highest digit
t=0
for k in range(m-1):
A2=[0]*n
B2=[0]*n
C2=[0]*n
D2=[0]*n
for i in range(1,n-1):
A2[i]=A[i+1]+A[i-1]
B2[i]=B[i+1]+B[i-1]
C2[i]=C[i+1]+C[i-1]
D2[i]=D[i+1]+D[i-1]
B2[0]=A[1]+B[1]
C2[n-1]=A[n-2]+C[n-2]
D2[0]=C[1]+D[1]
D2[n-1]=B[n-2]+D[n-2]
A=A2
B=B2
C=C2
D=D2
x=sum(d for d in D2)
t+=x
print t
After doing some more research, I think there may actually be a mathematical approach after all, although the math is advanced for me. Douglas S. Stones pointed me in the direction of Joseph Myers' (2008) article, BMO 2008–2009 Round 1 Problem 1—Generalisation, which derives formulas for calculating the number of zig-zag paths across a rectangular board.
As I understand it, in Anirudh's example, our board would have 6 rows of length 3 (I believe this would mean n=3 and r=6 in the article's terms). We can visualize our board so:
0 1 2 example zig-zag path: 0
0 1 2 1
0 1 2 0
0 1 2 1
0 1 2 2
0 1 2 1
Since Myers' formula m(n,r) would generate the number for all the zig-zag paths, that is, the number of all 6-digit numbers where all adjacent digits are consecutive and digits are chosen from (0,1,2), we would still need to determine and subtract those that begin with zero and those that do not include all digits.
If I understand correctly, we may do this in the following way for our example, although generalizing the concept to arbitrary m and n may prove more complicated:
Let m(3,6) equal the number of 6-digit numbers where all adjacent digits
are consecutive and digits are chosen from (0,1,2). According to Myers,
m(3,r) is given by formula and also equals OEIS sequence A029744 at
index r+2, so we have
m(3,6) = 16
How many of these numbers start with zero? Myers describes c(n,r) as the
number of zig-zag paths whose colour is that of the square in the top
right corner of the board. In our case, c(3,6) would include the total
for starting-digit 0 as well as starting-digit 2. He gives c(3,2r) as 2^r,
so we have
c(3,6) = 8. For starting-digit 0 only, we divide by two to get 4.
Now we need to obtain only those numbers that include all the digits in
the range, but how? We can do this be subtracting m(n-1,r) from m(n,r).
In our case, we have all the m(2,6) that would include only 0's and 1's,
and all the m(2,6) that would include 1's and 2's. Myers gives
m(2,anything) as 2, so we have
2*m(2,6) = 2*2 = 4
But we must remember that one of the zero-starting numbers is included
in our total for 2*m(2,6), namely 010101. So all together we have
m(3,6) - c(3,6)/2 - 4 + 1
= 16 - 4 - 4 + 1
= 9
To complete our example, we must follow a similar process for m(3,5),
m(3,4) and m(3,3). Since it's late here, I might follow up tomorrow...
One approach could be to program it recursively, calling the function to add as well as subtract from the last digit.
Haskell code:
import Data.List (sort,nub)
f n m = concatMap (combs n) [n..m]
combs n m = concatMap (\x -> combs' 1 [x]) [1..n - 1] where
combs' count result
| count == m = if test then [concatMap show result] else []
| otherwise = combs' (count + 1) (result ++ [r + 1])
++ combs' (count + 1) (result ++ [r - 1])
where r = last result
test = (nub . sort $ result) == [0..n - 1]
Output:
*Main> f 3 6
["210","1210","1012","2101","12101","10121","21210","21012"
,"21010","121210","121012","121010","101212","101210","101012"
,"212101","210121","210101"]
In response to Anirudh Rayabharam's comment, I hope the following code will be more 'pseudocode' like. When the total number of digits reaches m, the function g outputs 1 if the solution has hashed all [0..n-1], and 0 if not. The function f accumulates the results for g for starting digits [1..n-1] and total number of digits [n..m].
Haskell code:
import qualified Data.Set as S
g :: Int -> Int -> Int -> Int -> (S.Set Int, Int) -> Int
g n m digitCount lastDigit (hash,hashCount)
| digitCount == m = if test then 1 else 0
| otherwise =
if lastDigit == 0
then g n m d' (lastDigit + 1) (hash'',hashCount')
else if lastDigit == n - 1
then g n m d' (lastDigit - 1) (hash'',hashCount')
else g n m d' (lastDigit + 1) (hash'',hashCount')
+ g n m d' (lastDigit - 1) (hash'',hashCount')
where test = hashCount' == n
d' = digitCount + 1
hash'' = if test then S.empty else hash'
(hash',hashCount')
| hashCount == n = (S.empty,hashCount)
| S.member lastDigit hash = (hash,hashCount)
| otherwise = (S.insert lastDigit hash,hashCount + 1)
f n m = foldr forEachNumDigits 0 [n..m] where
forEachNumDigits numDigits accumulator =
accumulator + foldr forEachStartingDigit 0 [1..n - 1] where
forEachStartingDigit startingDigit accumulator' =
accumulator' + g n numDigits 1 startingDigit (S.empty,0)
Output:
*Main> f 3 6
18
(0.01 secs, 571980 bytes)
*Main> f 4 20
62784
(1.23 secs, 97795656 bytes)
*Main> f 4 25
762465
(11.73 secs, 1068373268 bytes)
model your problem as 2 superimposed lattices in 2 dimensions, specifically as pairs (i,j) interconnected with oriented edges ((i0,j0),(i1,j1)) where i1 = i0 + 1, |j1 - j0| = 1, modified as follows:
dropping all pairs (i,j) with j > 9 and its incident edges
dropping all pairs (i,j) with i > m-1 and its incident edges
dropping edge ((0,0), (1,1))
this construction results in a structure like in this diagram:
:
the requested numbers map to paths in the lattice starting at one of the green elements ((0,j), j=1..min(n-1,9)) that contain at least one pink and one red element ((i,0), i=1..m-1, (i,n-1), i=0..m-1 ). to see this, identify the i-th digit j of a given number with point (i,j). including pink and red elements ('extremal digits') guarantee that all available diguts are represented in the number.
Analysis
for convenience, let q1, q2 denote the position-1.
let q1 be the position of a number's first digit being either 0 or min(n-1,9).
let q2 be the position of a number's first 0 if the digit at position q1 is min(n-1,9) and vv.
case 1: first extremal digit is 0
the number of valid prefixes containing no 0 can be expressed as sum_{k=1..min(n-1,9)} (paths_to_0(k,1,q1), the function paths_to_0 being recursively defined as
paths_to_0(0,q1-1,q1) = 0;
paths_to_0(1,q1-1,q1) = 1;
paths_to_0(digit,i,q1) = 0; if q1-i < digit;
paths_to_0(x,_,_) = 0; if x >= min(n-1,9)
// x=min(n-1,9) mustn't occur before position q2,
// x > min(n-1,9) not at all
paths_to_0(x,_,_) = 0; if x <= 0;
// x=0 mustn't occur before position q1,
// x < 0 not at all
and else paths_to_0(digit,i,q1) =
paths_to_0(digit+1,i+1,q1) + paths_to_0(digit-1,i+1,q1);
similarly we have
paths_to_max(min(n-1,9),q2-1,q2) = 0;
paths_to_max(min(n-2,8),q2-1,q2) = 1;
paths_to_max(digit,i,q2) = 0 if q2-i < n-1;
paths_to_max(x,_,_) = 0; if x >= min(n-1,9)
// x=min(n-1,9) mustn't occur before
// position q2,
// x > min(n-1,9) not at all
paths_to_max(x,_,_) = 0; if x < 0;
and else paths_to_max(digit,q1,q2) =
paths_max(digit+1,q1+1,q2) + paths_to_max(digit-1,q1+1,q2);
and finally
paths_suffix(digit,length-1,length) = 2; if digit > 0 and digit < min(n-1,9)
paths_suffix(digit,length-1,length) = 1; if digit = 0 or digit = min(n-1,9)
paths_suffix(digit,k,length) = 0; if length > m-1
or length < q2
or k > length
paths_suffix(digit,k,0) = 1; // the empty path
and else paths_suffix(digit,k,length) =
paths_suffix(digit+1,k+1,length) + paths_suffix(digit-1,k+1,length);
... for a grand total of
number_count_case_1(n, m) =
sum_{first=1..min(n-1,9), q1=1..m-1-(n-1), q2=q1..m-1, l_suffix=0..m-1-q2} (
paths_to_0(first,1,q1)
+ paths_to_max(0,q1,q2)
+ paths_suffix(min(n-1,9),q2,l_suffix+q2)
)
case 2: first extremal digit is min(n-1,9)
case 2.1: initial digit is not min(n-1,9)
this is symmetrical to case 1 with all digits d replaced by min(n,10) - d. as the lattice structure is symmetrical, this means number_count_case_2_1 = number_count_case_1.
case 2.2: initial digit is min(n-1,9)
note that q1 is 1 and the second digit must be min(n-2,8).
thus
number_count_case_2_2 (n, m) =
sum_{q2=1..m-2, l_suffix=0..m-2-q2} (
paths_to_max(1,1,q2)
+ paths_suffix(min(n-1,9),q2,l_suffix+q2)
)
so the grand grand total will be
number_count ( n, m ) = 2 * number_count_case_1 (n, m) + number_count_case_2_2 (n, m);
Code
i don't know whether a closed expression for number_count exists, but the following perl code will compute it (the code is but a proof of concept as it does not use memoization techniques to avoid recomputing results already obtained):
use strict;
use warnings;
my ($n, $m) = ( 5, 7 ); # for example
$n = ($n > 10) ? 10 : $n; # cutoff
sub min
sub paths_to_0 ($$$) {
my (
$d
, $at
, $until
) = #_;
#
if (($d == 0) && ($at == $until - 1)) { return 0; }
if (($d == 1) && ($at == $until - 1)) { return 1; }
if ($until - $at < $d) { return 0; }
if (($d <= 0) || ($d >= $n))) { return 0; }
return paths_to_0($d+1, $at+1, $until) + paths_to_0($d-1, $at+1, $until);
} # paths_to_0
sub paths_to_max ($$$) {
my (
$d
, $at
, $until
) = #_;
#
if (($d == $n-1) && ($at == $until - 1)) { return 0; }
if (($d == $n-2) && ($at == $until - 1)) { return 1; }
if ($until - $at < $n-1) { return 0; }
if (($d < 0) || ($d >= $n-1)) { return 0; }
return paths_to_max($d+1, $at+1, $until) + paths_to_max($d-1, $at+1, $until);
} # paths_to_max
sub paths_suffix ($$$) {
my (
$d
, $at
, $until
) = #_;
#
if (($d < $n-1) && ($d > 0) && ($at == $until - 1)) { return 2; }
if ((($d == $n-1) && ($d == 0)) && ($at == $until - 1)) { return 1; }
if (($until > $m-1) || ($at > $until)) { return 0; }
if ($until == 0) { return 1; }
return paths_suffix($d+1, $at+1, $until) + paths_suffix($d-1, $at+1, $until);
} # paths_suffix
#
# main
#
number_count =
sum_{first=1..min(n-1,9), q1=1..m-1-(n-1), q2=q1..m-1, l_suffix=0..m-1-q2} (
paths_to_0(first,1,q1)
+ paths_to_max(0,q1,q2)
+ paths_suffix(min(n-1,9),q2,l_suffix+q2)
)
my ($number_count, $number_count_2_2) = (0, 0);
my ($first, $q1, i, $l_suffix);
for ($first = 1; $first <= $n-1; $first++) {
for ($q1 = 1; $q1 <= $m-1 - ($n-1); $q1++) {
for ($q2 = $q1; $q2 <= $m-1; $q2++) {
for ($l_suffix = 0; $l_suffix <= $m-1 - $q2; $l_suffix++) {
$number_count =
$number_count
+ paths_to_0($first,1,$q1)
+ paths_to_max(0,$q1,$q2)
+ paths_suffix($n-1,$q2,$l_suffix+$q2)
;
}
}
}
}
#
# case 2.2
#
for ($q2 = 1; $q2 <= $m-2; $q2++) {
for ($l_suffix = 0; $l_suffix <= $m-2 - $q2; $l_suffix++) {
$number_count_2_2 =
$number_count_2_2
+ paths_to_max(1,1,$q2)
+ paths_suffix($n-1,$q2,$l_suffix+$q2)
;
}
}
$number_count = 2 * $number_count + number_count_2_2;

How to calculate the index (lexicographical order) when the combination is given

I know that there is an algorithm that permits, given a combination of number (no repetitions, no order), calculates the index of the lexicographic order.
It would be very useful for my application to speedup things...
For example:
combination(10, 5)
1 - 1 2 3 4 5
2 - 1 2 3 4 6
3 - 1 2 3 4 7
....
251 - 5 7 8 9 10
252 - 6 7 8 9 10
I need that the algorithm returns the index of the given combination.
es: index( 2, 5, 7, 8, 10 ) --> index
EDIT: actually I'm using a java application that generates all combinations C(53, 5) and inserts them into a TreeMap.
My idea is to create an array that contains all combinations (and related data) that I can index with this algorithm.
Everything is to speedup combination searching.
However I tried some (not all) of your solutions and the algorithms that you proposed are slower that a get() from TreeMap.
If it helps: my needs are for a combination of 5 from 53 starting from 0 to 52.
Thank you again to all :-)
Here is a snippet that will do the work.
#include <iostream>
int main()
{
const int n = 10;
const int k = 5;
int combination[k] = {2, 5, 7, 8, 10};
int index = 0;
int j = 0;
for (int i = 0; i != k; ++i)
{
for (++j; j != combination[i]; ++j)
{
index += c(n - j, k - i - 1);
}
}
std::cout << index + 1 << std::endl;
return 0;
}
It assumes you have a function
int c(int n, int k);
that will return the number of combinations of choosing k elements out of n elements.
The loop calculates the number of combinations preceding the given combination.
By adding one at the end we get the actual index.
For the given combination there are
c(9, 4) = 126 combinations containing 1 and hence preceding it in lexicographic order.
Of the combinations containing 2 as the smallest number there are
c(7, 3) = 35 combinations having 3 as the second smallest number
c(6, 3) = 20 combinations having 4 as the second smallest number
All of these are preceding the given combination.
Of the combinations containing 2 and 5 as the two smallest numbers there are
c(4, 2) = 6 combinations having 6 as the third smallest number.
All of these are preceding the given combination.
Etc.
If you put a print statement in the inner loop you will get the numbers
126, 35, 20, 6, 1.
Hope that explains the code.
Convert your number selections to a factorial base number. This number will be the index you want. Technically this calculates the lexicographical index of all permutations, but if you only give it combinations, the indexes will still be well ordered, just with some large gaps for all the permutations that come in between each combination.
Edit: pseudocode removed, it was incorrect, but the method above should work. Too tired to come up with correct pseudocode at the moment.
Edit 2: Here's an example. Say we were choosing a combination of 5 elements from a set of 10 elements, like in your example above. If the combination was 2 3 4 6 8, you would get the related factorial base number like so:
Take the unselected elements and count how many you have to pass by to get to the one you are selecting.
1 2 3 4 5 6 7 8 9 10
2 -> 1
1 3 4 5 6 7 8 9 10
3 -> 1
1 4 5 6 7 8 9 10
4 -> 1
1 5 6 7 8 9 10
6 -> 2
1 5 7 8 9 10
8 -> 3
So the index in factorial base is 1112300000
In decimal base, it's
1*9! + 1*8! + 1*7! + 2*6! + 3*5! = 410040
This is Algorithm 2.7 kSubsetLexRank on page 44 of Combinatorial Algorithms by Kreher and Stinson.
r = 0
t[0] = 0
for i from 1 to k
if t[i - 1] + 1 <= t[i] - 1
for j from t[i - 1] to t[i] - 1
r = r + choose(n - j, k - i)
return r
The array t holds your values, for example [5 7 8 9 10]. The function choose(n, k) calculates the number "n choose k". The result value r will be the index, 251 for the example. Other inputs are n and k, for the example they would be 10 and 5.
zero-base,
# v: array of length k consisting of numbers between 0 and n-1 (ascending)
def index_of_combination(n,k,v):
idx = 0
for p in range(k-1):
if p == 0: arrg = range(1,v[p]+1)
else: arrg = range(v[p-1]+2, v[p]+1)
for a in arrg:
idx += combi[n-a, k-1-p]
idx += v[k-1] - v[k-2] - 1
return idx
Null Set has the right approach. The index corresponds to the factorial-base number of the sequence. You build a factorial-base number just like any other base number, except that the base decreases for each digit.
Now, the value of each digit in the factorial-base number is the number of elements less than it that have not yet been used. So, for combination(10, 5):
(1 2 3 4 5) == 0*9!/5! + 0*8!/5! + 0*7!/5! + 0*6!/5! + 0*5!/5!
== 0*3024 + 0*336 + 0*42 + 0*6 + 0*1
== 0
(10 9 8 7 6) == 9*3024 + 8*336 + 7*42 + 6*6 + 5*1
== 30239
It should be pretty easy to calculate the index incrementally.
If you have a set of positive integers 0<=x_1 < x_2< ... < x_k , then you could use something called the squashed order:
I = sum(j=1..k) Choose(x_j,j)
The beauty of the squashed order is that it works independent of the largest value in the parent set.
The squashed order is not the order you are looking for, but it is related.
To use the squashed order to get the lexicographic order in the set of k-subsets of {1,...,n) is by taking
1 <= x1 < ... < x_k <=n
compute
0 <= n-x_k < n-x_(k-1) ... < n-x_1
Then compute the squashed order index of (n-x_k,...,n-k_1)
Then subtract the squashed order index from Choose(n,k) to get your result, which is the lexicographic index.
If you have relatively small values of n and k, you can cache all the values Choose(a,b) with a
See Anderson, Combinatorics on Finite Sets, pp 112-119
I needed also the same for a project of mine and the fastest solution I found was (Python):
import math
def nCr(n,r):
f = math.factorial
return f(n) / f(r) / f(n-r)
def index(comb,n,k):
r=nCr(n,k)
for i in range(k):
if n-comb[i]<k-i:continue
r=r-nCr(n-comb[i],k-i)
return r
My input "comb" contained elements in increasing order You can test the code with for example:
import itertools
k=3
t=[1,2,3,4,5]
for x in itertools.combinations(t, k):
print x,index(x,len(t),k)
It is not hard to prove that if comb=(a1,a2,a3...,ak) (in increasing order) then:
index=[nCk-(n-a1+1)Ck] + [(n-a1)C(k-1)-(n-a2+1)C(k-1)] + ... =
nCk -(n-a1)Ck -(n-a2)C(k-1) - .... -(n-ak)C1
There's another way to do all this. You could generate all possible combinations and write them into a binary file where each comb is represented by it's index starting from zero. Then, when you need to find an index, and the combination is given, you apply a binary search on the file. Here's the function. It's written in VB.NET 2010 for my lotto program, it works with Israel lottery system so there's a bonus (7th) number; just ignore it.
Public Function Comb2Index( _
ByVal gAr() As Byte) As UInt32
Dim mxPntr As UInt32 = WHL.AMT.WHL_SYS_00 '(16.273.488)
Dim mdPntr As UInt32 = mxPntr \ 2
Dim eqCntr As Byte
Dim rdAr() As Byte
modBinary.OpenFile(WHL.WHL_SYS_00, _
FileMode.Open, FileAccess.Read)
Do
modBinary.ReadBlock(mdPntr, rdAr)
RP: If eqCntr = 7 Then GoTo EX
If gAr(eqCntr) = rdAr(eqCntr) Then
eqCntr += 1
GoTo RP
ElseIf gAr(eqCntr) < rdAr(eqCntr) Then
If eqCntr > 0 Then eqCntr = 0
mxPntr = mdPntr
mdPntr \= 2
ElseIf gAr(eqCntr) > rdAr(eqCntr) Then
If eqCntr > 0 Then eqCntr = 0
mdPntr += (mxPntr - mdPntr) \ 2
End If
Loop Until eqCntr = 7
EX: modBinary.CloseFile()
Return mdPntr
End Function
P.S. It takes 5 to 10 mins to generate 16 million combs on a Core 2 Duo. To find the index using binary search on file takes 397 milliseconds on a SATA drive.
Assuming the maximum setSize is not too large, you can simply generate a lookup table, where the inputs are encoded this way:
int index(a,b,c,...)
{
int key = 0;
key |= 1<<a;
key |= 1<<b;
key |= 1<<c;
//repeat for all arguments
return Lookup[key];
}
To generate the lookup table, look at this "banker's order" algorithm. Generate all the combinations, and also store the base index for each nItems. (For the example on p6, this would be [0,1,5,11,15]). Note that by you storing the answers in the opposite order from the example (LSBs set first) you will only need one table, sized for the largest possible set.
Populate the lookup table by walking through the combinations doing Lookup[combination[i]]=i-baseIdx[nItems]
EDIT: Never mind. This is completely wrong.
Let your combination be (a1, a2, ..., ak-1, ak) where a1 < a2 < ... < ak. Let choose(a,b) = a!/(b!*(a-b)!) if a >= b and 0 otherwise. Then, the index you are looking for is
choose(ak-1, k) + choose(ak-1-1, k-1) + choose(ak-2-1, k-2) + ... + choose (a2-1, 2) + choose (a1-1, 1) + 1
The first term counts the number of k-element combinations such that the largest element is less than ak. The second term counts the number of (k-1)-element combinations such that the largest element is less than ak-1. And, so on.
Notice that the size of the universe of elements to be chosen from (10 in your example) does not play a role in the computation of the index. Can you see why?
Sample solution:
class Program
{
static void Main(string[] args)
{
// The input
var n = 5;
var t = new[] { 2, 4, 5 };
// Helping transformations
ComputeDistances(t);
CorrectDistances(t);
// The algorithm
var r = CalculateRank(t, n);
Console.WriteLine("n = 5");
Console.WriteLine("t = {2, 4, 5}");
Console.WriteLine("r = {0}", r);
Console.ReadKey();
}
static void ComputeDistances(int[] t)
{
var k = t.Length;
while (--k >= 0)
t[k] -= (k + 1);
}
static void CorrectDistances(int[] t)
{
var k = t.Length;
while (--k > 0)
t[k] -= t[k - 1];
}
static int CalculateRank(int[] t, int n)
{
int k = t.Length - 1, r = 0;
for (var i = 0; i < t.Length; i++)
{
if (t[i] == 0)
{
n--;
k--;
continue;
}
for (var j = 0; j < t[i]; j++)
{
n--;
r += CalculateBinomialCoefficient(n, k);
}
n--;
k--;
}
return r;
}
static int CalculateBinomialCoefficient(int n, int k)
{
int i, l = 1, m, x, y;
if (n - k < k)
{
x = k;
y = n - k;
}
else
{
x = n - k;
y = k;
}
for (i = x + 1; i <= n; i++)
l *= i;
m = CalculateFactorial(y);
return l/m;
}
static int CalculateFactorial(int n)
{
int i, w = 1;
for (i = 1; i <= n; i++)
w *= i;
return w;
}
}
The idea behind the scenes is to associate a k-subset with an operation of drawing k-elements from the n-size set. It is a combination, so the overall count of possible items will be (n k). It is a clue that we could seek the solution in Pascal Triangle. After a while of comparing manually written examples with the appropriate numbers from the Pascal Triangle, we will find the pattern and hence the algorithm.
I used user515430's answer and converted to python3. Also this supports non-continuous values so you could pass in [1,3,5,7,9] as your pool instead of range(1,11)
from itertools import combinations
from scipy.special import comb
from pandas import Index
debugcombinations = False
class IndexedCombination:
def __init__(self, _setsize, _poolvalues):
self.setsize = _setsize
self.poolvals = Index(_poolvalues)
self.poolsize = len(self.poolvals)
self.totalcombinations = 1
fast_k = min(self.setsize, self.poolsize - self.setsize)
for i in range(1, fast_k + 1):
self.totalcombinations = self.totalcombinations * (self.poolsize - fast_k + i) // i
#fill the nCr cache
self.choose_cache = {}
n = self.poolsize
k = self.setsize
for i in range(k + 1):
for j in range(n + 1):
if n - j >= k - i:
self.choose_cache[n - j,k - i] = comb(n - j,k - i, exact=True)
if debugcombinations:
print('testnth = ' + str(self.testnth()))
def get_nth_combination(self,index):
n = self.poolsize
r = self.setsize
c = self.totalcombinations
#if index < 0 or index >= c:
# raise IndexError
result = []
while r:
c, n, r = c*r//n, n-1, r-1
while index >= c:
index -= c
c, n = c*(n-r)//n, n-1
result.append(self.poolvals[-1 - n])
return tuple(result)
def get_n_from_combination(self,someset):
n = self.poolsize
k = self.setsize
index = 0
j = 0
for i in range(k):
setidx = self.poolvals.get_loc(someset[i])
for j in range(j + 1, setidx + 1):
index += self.choose_cache[n - j, k - i - 1]
j += 1
return index
#just used to test whether nth_combination from the internet actually works
def testnth(self):
n = 0
_setsize = self.setsize
mainset = self.poolvals
for someset in combinations(mainset, _setsize):
nthset = self.get_nth_combination(n)
n2 = self.get_n_from_combination(nthset)
if debugcombinations:
print(str(n) + ': ' + str(someset) + ' vs ' + str(n2) + ': ' + str(nthset))
if n != n2:
return False
for x in range(_setsize):
if someset[x] != nthset[x]:
return False
n += 1
return True
setcombination = IndexedCombination(5, list(range(1,10+1)))
print( str(setcombination.get_n_from_combination([2,5,7,8,10])))
returns 188

Resources