Repeated Squaring - Matrix Multiplication using NEWMAT - matrix

I'm trying to use the repeated squaring algorithm (using recursion) to perform matrix exponentiation. I've included header files from the NEWMAT library instead of using arrays. The original matrix has elements in the range (-5,5), all numbers being of type float.
# include "C:\User\newmat10\newmat.h"
# include "C:\User\newmat10\newmatio.h"
# include "C:\User\newmat10\newmatap.h"
# include <iostream>
# include <time.h>
# include <ctime>
# include <cstdlib>
# include <iomanip>
using namespace std;
Matrix repeated_squaring(Matrix A, int exponent, int n) //Recursive function
{
A(n,n);
IdentityMatrix I(n);
if (exponent == 0) //Matrix raised to zero returns an Identity Matrix
return I;
else
{
if ( exponent%2 == 1 ) // if exponent is odd
return (A * repeated_squaring (A*A, (exponent-1)/2, n));
else //if exponent is even
return (A * repeated_squaring( A*A, exponent/2, n));
}
}
Matrix direct_squaring(Matrix B, int k, int no) //Brute Force Multiplication
{
B(no,no);
Matrix C = B;
for (int i = 1; i <= k; i++)
C = B*C;
return C;
}
//----Creating a matrix with elements b/w (-5,5)----
float unifRandom()
{
int a = -5;
int b = 5;
float temp = (float)((b-a)*( rand()/RAND_MAX) + a);
return temp;
}
Matrix initialize_mat(Matrix H, int ord)
{
H(ord,ord);
for (int y = 1; y <= ord; y++)
for(int z = 1; z<= ord; z++)
H(y,z) = unifRandom();
return(H);
}
//---------------------------------------------------
void main()
{
int exponent, dimension;
cout<<"Insert exponent:"<<endl;
cin>>exponent;
cout<< "Insert dimension:"<<endl;
cin>>dimension;
cout<<"The number of rows/columns in the square matrix is: "<<dimension<<endl;
cout<<"The exponent is: "<<exponent<<endl;
Matrix A(dimension,dimension),B(dimension,dimension);
Matrix C(dimension,dimension),D(dimension,dimension);
B= initialize_mat(A,dimension);
cout<<"Initial Matrix: "<<endl;
cout<<setw(5)<<setprecision(2)<<B<<endl;
//-----------------------------------------------------------------------------
cout<<"Repeated Squaring Result: "<<endl;
clock_t time_before1 = clock();
C = repeated_squaring (B, exponent , dimension);
cout<< setw(5) <<setprecision(2) <<C;
clock_t time_after1 = clock();
float diff1 = ((float) time_after1 - (float) time_before1);
cout << "It took " << diff1/CLOCKS_PER_SEC << " seconds to complete" << endl<<endl;
//---------------------------------------------------------------------------------
cout<<"Direct Squaring Result:"<<endl;
clock_t time_before2 = clock();
D = direct_squaring (B, exponent , dimension);
cout<<setw(5)<<setprecision(2)<<D;
clock_t time_after2 = clock();
float diff2 = ((float) time_after2 - (float) time_before2);
cout << "It took " << diff2/CLOCKS_PER_SEC << " seconds to complete" << endl<<endl;
}
I face the following problems:
The random number generator returns only "-5" as each element in the output.
The Matrix multiplication yield different results with brute force multiplication and using the repeated squaring algorithm.
I'm timing the execution time of my code to compare the times taken by brute force multiplication and by repeated squaring.
Could someone please find out what's wrong with the recursion and with the matrix initialization?
NOTE: While compiling this program, make sure you've imported the NEWMAT library.
Thanks in advance!

rand() returns an int so rand()/RAND_MAX will truncate to an integer = 0. Try your
repeated square algorithm by hand with n = 1, 2 and 3 and you'll find a surplus A *
and a gross inefficiency.

Final Working code has the following improvements:
Matrix repeated_squaring(Matrix A, int exponent, int n) //Recursive function
{
A(n,n);
IdentityMatrix I(n);
if (exponent == 0) //Matrix raised to zero returns an Identity Matrix
return I;
if (exponent == 1)
return A;
{
if (exponent % 2 == 1) // if exponent is odd
return (A*repeated_squaring (A*A, (exponent-1)/2, n));
else //if exponent is even
return (repeated_squaring(A*A, exponent/2, n));
}
}
Matrix direct_squaring(Matrix B, int k, int no) //Brute Force Multiplication
{
B(no,no);
Matrix C(no,no);
C=B;
for (int i = 0; i < k-1; i++)
C = B*C;
return C;
}
//----Creating a matrix with elements b/w (-5,5)----
float unifRandom()
{
int a = -5;
int b = 5;
float temp = (float) ((b-a)*((float) rand()/RAND_MAX) + a);
return temp;
}

Related

Convert a number m to n using minimum number of given operations

Question:
Given 2 integers N and M. Convert a number N to M using minimum number of given operations.
The operations are:
Square N (N = N^2)
Divide N by a prime integer P if N is divisible by P (N = N / P and N % P == 0)
Contrants:
N, M <= 10^9
Example:
N = 12, M = 18
The minimum operations are:
N /= 2 -> N = 6
N = N^2 -> N = 36
N /= 2 -> N = 18
My take:
I'm trying to use BFS to solve this problem. For each number, the available edges to other numberers are the operations. But it got Time Limit Exceeded. Is there any better way to solve this?
Here is my BFS code:
queue<pair<int,int> > q;
vector<long long> pr;
ll m,n;
bool prime[MAXN+1];
void solve()
{
while (!q.empty())
{
pii x=q.front();
q.pop();
if (x.first==m)
{
cout << x.second;
return;
}
if (x.first==1) continue;
for(ll k:pr)
{
if (k>x.first) break;
if (x.first%k==0) q.push({x.first/k,x.second+1});
}
q.push({x.first*x.first,x.second+1});
}
}
The algorithm uses the decomposition on N and M in prime factors, keeping trace of the corresponding exponents.
If M has a prime factor that N does not have, there is no solution (the code returns -1).
If N has some prime factors that M doesn't have, then the first step is to divide N by these primes.
The corresponding number of operations is the sum of the corresponding exponents.
At this stage, we get two arrays A and B corresponding to the exponents of the common prime factors, for N and M.
It is worth noting that at this stage, the values of the primes involved is not relevant anymore, only the exponents matter.
Then one must determine the minimum number of squares (= multiplications by 2 of the exponents).
The is the smallest k such that A[i] >= 2^k B[i] for all indices i.
The number of multiplications is added to the number of operations only once, as all exponents are multiplied by 2 at the same time.
Last step is to determine, for each pair (a, b) = (A[i], B[i]), the number of subtractions needed to go from a to b, while implementing exactly k multiplications by 2. This is performed with the following rules:
- if (k == 0) f(a, b, k) = a-b
- Else:
- if ((a-1)*2^k >= b: f(a, b, k) = 1 + f(a-1, b, k)
- else: f(a, b, k) = f(2*a, b, k-1)
The complexity is dominated by the decomposition in primes factors: O(sqrt(n))
Code:
This code is rather long, but a great part consists if helper routines needed for debugging and analysis.
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
void print (const std::vector<int> &v, const std::string s = "") {
std::cout << s;
for (auto &x: v) {
std::cout << x << " ";
}
std::cout << std::endl;
}
void print_decomp (int n, const std::vector<int> &primes, const std::vector<int> &mult) {
std::cout << n << " = ";
int k = primes.size();
for (int i = 0; i < k; ++i) {
std::cout << primes[i];
if (mult[i] > 1) std::cout << "^" << mult[i];
std::cout << " ";
}
std::cout << "\n";
}
void prime_decomp (int nn, std::vector<int> &primes, std::vector<int> &mult) {
int n = nn;
if (n <= 1) return;
if (n % 2 == 0) {
primes.push_back(2);
int cpt = 1;
n/= 2;
while (n%2 == 0) {n /= 2; cpt++;}
mult.push_back (cpt);
}
int max_prime = sqrt(n);
int p = 3;
while (p <= max_prime) {
if (n % p == 0) {
primes.push_back(p);
int cpt = 1;
n/= p;
while (n%p == 0) {n /= p; cpt++;}
mult.push_back (cpt);
max_prime = sqrt(n);
}
p += 2;
}
if (n != 1) {
primes.push_back(n);
mult.push_back (1);
}
print_decomp (nn, primes, mult);
}
// Determine the number of subtractions to go from a to b, with exactly k multiplications by 2
int n_sub (int a, int b, int k, int power2) {
if (k == 0){
if (b > a) exit(1);
return a - b;
}
//if (a == 1) return n_sub (2*a, b, k-1, power2/2);
if ((a-1)*power2 >= b) {
return 1 + n_sub(a-1, b, k, power2);
} else {
return n_sub (2*a, b, k-1, power2/2);
}
return 0;
}
// A return of -1 means no possibility
int n_operations (int N, int M) {
int count = 0;
if (N == M) return 0;
if (N == 1) return -1;
std::vector<int> primes_N, primes_M, expon_N, expon_M;
// Prime decomposition
prime_decomp(N, primes_N, expon_N);
prime_decomp (M, primes_M, expon_M);
// Compare decomposition, check if a solution can exist, set up two exponent arrays
std::vector<int> A, B;
int index_A = 0, index_B = 0;
int nA = primes_N.size();
int nB = primes_M.size();
while (true) {
if ((index_A == nA) && (index_B == nB)) {
break;
}
if ((index_A < nA) && (index_B < nB)) {
if (primes_N[index_A] == primes_M[index_B]) {
A.push_back(expon_N[index_A]);
B.push_back(expon_M[index_B]);
index_A++; index_B++;
continue;
}
if (primes_N[index_A] < primes_M[index_B]) {
count += expon_N[index_A];
index_A++;
continue;
}
return -1; // M has a prime that N doesn't have: impossibility to go to M
}
if (index_B != nB) { // impossibility
return -1;
}
for (int i = index_A; i < nA; ++i) {
count += expon_N[i]; // suppression of primes in N not in M
}
break;
}
std::cout << "1st step, count = " << count << "\n";
print (A, "exponents of N: ");
print (B, "exponents of M: ");
// Determination of the number of multiplications by two of the exponents (= number of squares)
int n = A.size();
int n_mult2 = 0;
int power2 = 1;
for (int i = 0; i < n; ++i) {
while (power2*A[i] < B[i]) {
power2 *= 2;
n_mult2++;
}
}
count += n_mult2;
std::cout << "number of squares = " << n_mult2 << " -> " << power2 << "\n";
// For each pair of exponent, determine the number of subtractions,
// with a fixed number of multiplication by 2
for (int i = 0; i < n; ++i) {
count += n_sub (A[i], B[i], n_mult2, power2);
}
return count;
}
int main() {
int N, M;
std::cin >> N >> M;
auto ans = n_operations (N, M);
std::cout << ans << "\n";
return 0;
}

Smallest Multiple of given number With digits only 0 and 1

You are given an integer N. You have to find smallest multiple of N which consists of digits 0 and 1 only. Since this multiple could be large, return it in form of a string.
Returned string should not contain leading zeroes.
For example,
For N = 55, 110 is smallest multiple consisting of digits 0 and 1.
For N = 2, 10 is the answer.
I saw several related problems, but I could not find the problem with my code.
Here is my code giving TLE on some cases even after using map instead of set.
#define ll long long
int getMod(string s, int A)
{
int res=0;
for(int i=0;i<s.length();i++)
{
res=res*10+(s[i]-'0');
res%=A;
}
return res;
}
string Solution::multiple(int A) {
if(A<=1)
return to_string(A);
queue<string>q;
q.push("1");
set<int>st;
string s="1";
while(!q.empty())
{
s=q.front();
q.pop();
int mod=getMod(s,A);
if(mod==0)
{
return s;
}
else if(st.find(mod)==st.end())
{
st.insert(mod);
q.push(s+"0");
q.push(s+"1");
}
}
}
Here is an implementation in Raku.
my $n = 55;
(1 .. Inf).map( *.base(2) ).first( * %% $n );
(1 .. Inf) is a lazy list from one to infinity. The "whatever star" * establishes a closure and stands for the current element in the map.
base is a method of Rakus Num type which returns a string representation of a given number in the wanted base, here a binary string.
first returns the current element when the "whatever star" closure holds true for it.
The %% is the divisible by operator, it implicitly casts its left side to Int.
Oh, and to top it off. It's easy to parallelize this, so your code can use multiple cpu cores:
(1 .. Inf).race( :batch(1000), :degree(4) ).map( *.base(2) ).first( * %% $n );
As mentioned in the "math" reference, the result is related to the congruence of the power of 10 modulo A.
If
n = sum_i a[i] 10^i
then
n modulo A = sum_i a[i] b[i]
Where the a[i] are equal to 0 or 1, and the b[i] = (10^i) modulo A
Then the problem is to find the minimum a[i] sequence, such that the sum is equal to 0 modulo A.
From a graph a point of view, we have to find the shortest path to zero modulo A.
A BFS is generally well adapted to find such a path. The issue is the possible exponential increase of the number of nodes to visit. Here, were are sure to get a number of nodes less than A, by rejecting the nodes, the sum of which (modulo A) has already been obtained (see vector used in the program). Note that this rejection is needed in order to get the minimum number at the end.
Here is a program in C++. The solution being quite simple, it should be easy to understand even by those no familiar with C++.
#include <iostream>
#include <string>
#include <vector>
struct node {
int sum = 0;
std::string s;
};
std::string multiple (int A) {
std::vector<std::vector<node>> nodes (2);
std::vector<bool> used (A, false);
int range = 0;
int ten = 10 % A;
int pow_ten = 1;
if (A == 0) return "0";
if (A == 1) return "1";
nodes[range].push_back (node{0, "0"});
nodes[range].push_back (node{1, "1"});
used[1] = true;
while (1) {
int range_new = (range + 1) % 2;
nodes[range_new].resize(0);
pow_ten = (pow_ten * ten) % A;
for (node &x: nodes[range]) {
node y = x;
y.s = "0" + y.s;
nodes[range_new].push_back(y);
y = x;
y.sum = (y.sum + pow_ten) % A;
if (used[y.sum]) continue;
used[y.sum] = true;
y.s = "1" + y.s;
if (y.sum == 0) return y.s;
nodes[range_new].push_back(y);
}
range = range_new;
}
}
int main() {
std::cout << "input number: ";
int n;
std::cin >> n;
std::cout << "Result = " << multiple(n) << "\n";
return 0;
}
EDIT
The above program is using a kind of memoization in order to speed up the process but for large inputs memory becomes too large.
As indicated in a comment for example, it cannot handle the case N = 60000007.
I improved the speed and the range a little bit with the following modifications:
A function (reduction) was created to simplify the search when the input number is divisible by 2 or 5
For the memorization of the nodes (nodes array), only one array is used now instead of two
A kind of meet-in-the middle procedure is used: in a first step, a function mem_gen memorizes all relevant 01 sequences up to N_DIGIT_MEM (=20) digits. Then the main procedure multiple2 generates valid 01 sequences "after the 20 first digits" and then in the memory looks for a "complementary sequence" such that the concatenation of both is a valid sequence
With this new program the case N = 60000007 provides the good result (100101000001001010011110111, 27 digits) in about 600ms on my PC.
EDIT 2
Instead of limiting the number of digits for the memorization in the first step, I now use a threshold on the size of the memory, as this size does not depent only on the number of digits but also of the input number. Note that the optimal value of this threshold would depend of the input number. Here, I selected a thresholf of 50k as a compromise. With a threshold of 20k, for 60000007, I obtain the good result in 36 ms. Besides, with a threshold of 100k, the worst case 99999999 is solved in 5s.
I made different tests with values less than 10^9. In about all tested cases, the result is provided in less that 1s. However, I met a corner case N=99999999, for which the result consists in 72 consecutive "1". In this particular case, the program takes about 6.7s. For 60000007, the good result is obtained in 69ms.
Here is the new program:
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <unordered_map>
#include <chrono>
#include <cmath>
#include <algorithm>
std::string reverse (std::string s) {
std::string res {s.rbegin(), s.rend()};
return res;
}
struct node {
int sum = 0;
std::string s;
node (int sum_ = 0, std::string s_ = ""): sum(sum_), s(s_) {};
};
// This function simplifies the search when the input number is divisible by 2 or 5
node reduction (int &X, long long &pow_ten) {
node init {0, ""};
while (1) {
int digit = X % 10;
if (digit == 1 || digit == 3 || digit == 7 || digit == 9) break;
switch (digit) {
case(0):
X /= 10;
break;
case(2):
case(4):
case(6):
case(8):
X = (5*X)/10;
break;
case(5):
X = (2*X)/10;
break;
}
init.s.push_back('0');
pow_ten = (pow_ten * 10) % X;
}
return init;
}
const int N_DIGIT_MEM = 30; // 20
const int threshold_size_mem = 50000;
// This function memorizes all relevant 01 sequences up to N_DIGIT_MEM digits
bool gene_mem (int X, long long &pow_ten, int index_max, std::map<int, std::string> &mem, node &result) {
std::vector<node> nodes;
std::vector<bool> used (X, false);
bool start = true;
for (int index = 0; index < index_max; ++index){
if (start) {
node x = {int(pow_ten), "1"};
nodes.push_back (x);
} else {
for (node &x: nodes) {
x.s.push_back('0');
}
int n = nodes.size();
for (int i = 0; i < n; ++i) {
node y = nodes[i];
y.sum = (y.sum + pow_ten) % X;
y.s.back() = '1';
if (used[y.sum]) continue;
used[y.sum] = true;
if (y.sum == 0) {
result = y;
return true;
}
nodes.push_back(y);
}
}
pow_ten = (10 * pow_ten) % X;
start = false;
int n_mem = nodes.size();
if (n_mem > threshold_size_mem) {
break;
}
}
for (auto &x: nodes) {
mem[x.sum] = x.s;
}
//std::cout << "size mem = " << mem.size() << "\n";
return false;
}
// This function generates valid 01 sequences "after the 20 first digits" and then in the memory
// looks for a "complementary sequence" such that the concatenation of both is a valid sequence
std::string multiple2 (int A) {
std::vector<node> nodes;
std::map<int, std::string> mem;
int ten = 10 % A;
long long pow_ten = 1;
int digit;
if (A == 0) return "0";
int X = A;
node init = reduction (X, pow_ten);
if (X != A) ten = ten % X;
if (X == 1) {
init.s.push_back('1');
return reverse(init.s);
}
std::vector<bool> used (X, false);
node result;
int index_max = N_DIGIT_MEM;
if (gene_mem (X, pow_ten, index_max, mem, result)) {
return reverse(init.s + result.s);
}
node init2 {0, ""};
nodes.push_back(init2);
while (1) {
for (node &x: nodes) {
x.s.push_back('0');
}
int n = nodes.size();
for (int i = 0; i < n; ++i) {
node y = nodes[i];
y.sum = (y.sum + pow_ten) % X;
if (used[y.sum]) continue;
used[y.sum] = true;
y.s.back() = '1';
if (y.sum != 0) {
int target = X - y.sum;
auto search = mem.find(target);
if (search != mem.end()) {
//std::cout << "mem size 2nd step = " << nodes.size() << "\n";
return reverse(init.s + search->second + y.s);
}
}
nodes.push_back(y);
}
pow_ten = (pow_ten * ten) % X;
}
}
int main() {
std::cout << "input number: ";
int n;
std::cin >> n;
std::string res;
auto t1 = std::chrono::high_resolution_clock::now();
res = multiple2(n),
std::cout << "Result = " << res << " ndigit = " << res.size() << std::endl;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration2 = std::chrono::duration_cast<std::chrono::microseconds>( t2 - t1 ).count();
std::cout << "time = " << duration2/1000 << " ms" << std::endl;
return 0;
}
For people more familiar with Python, here is a converted version of #Damien's code. Damien's important insight is to strongly reduce the search tree, taking advantage of the fact that each partial sum only needs to be investigated once, namely the first time it is encountered.
The problem is also described at Mathpuzzle, but there they mostly fix on the necessary existence of a solution. There's also code mentioned at the online encyclopedia of integer sequences. The sage version seems to be somewhat similar.
I made a few changes:
Starting with an empty list helps to correctly solve A=1 while simplifying the code. The multiplication by 10 is moved to the end of the loop. Doing the same for 0 seems to be hard, as log10(0) is minus infinity.
Instead of alternating between nodes[range] and nodes[new_range], two different lists are used.
As Python supports integers of arbitrary precision, the partial results could be stored as decimal or binary numbers instead of as strings. This is not yet done in the code below.
from collections import namedtuple
node = namedtuple('node', 'sum str')
def find_multiple_ones_zeros(A):
nodes = [node(0, "")]
used = set()
pow_ten = 1
while True:
new_nodes = []
for x in nodes:
y = node(x.sum, "0" + x.str)
new_nodes.append(y)
next_sum = (x.sum + pow_ten) % A
y = node((x.sum + pow_ten) % A, x.str)
if next_sum in used:
continue
used.add(next_sum)
y = node(next_sum, "1" + x.str)
if next_sum == 0:
return y.str
new_nodes.append(y)
pow_ten = (pow_ten * 10) % A
nodes = new_nodes

How to expand the product of a sequence of binomials efficiently?

The product of the sequence of binomials reads
where {a_i} and {b_i} are coefficients in binomials.
I need to expand it to a polynomial
and use all coefficients {c_k} in the polynomial afterwards.
How to expand it efficiently? The speed has priority over the memory occupation because the expansion will be used many times.
What I tried
At present I just come up with an update scheme, which expands the polynomial right after absorbing one binomial.
This scheme needs two arrays — one for results up to i-1 and the other for results up to i.
Here is the C++ code for my naive scheme, but I think this question is irrelevant to what language is used.
#include <iostream>
#include <vector>
int main()
{
using namespace std;
// just an example, the coefficients are actually real numbers in [0,1]
unsigned d = 3;
vector<double> a;
vector<double> b;
a.resize(d, 1); b.resize(d, 1);
// given two arrays, a[] and b[], of length d
vector< vector<double> > coefficients(2);
coefficients[0].resize(d + 1);
coefficients[1].resize(d + 1);
if (d > 0) {
auto &coeff = coefficients[0]; // i = 0
coeff[0] = a[0];
coeff[1] = b[0];
for (unsigned i = 1; i < d; ++i) {// i : [1, d-1]
const auto ai = a[i];
const auto bi = b[i];
const auto &oldCoeff = coefficients[(i-1)%2];
auto &coeff = coefficients[i%2];
coeff[0] = oldCoeff[0] * ai; // j = 0
for (unsigned j = 1; j <= i; ++j) { // j : [1, i]
coeff[j] = oldCoeff[j] * ai + oldCoeff[j-1] * bi;
}
coeff[i+1] = oldCoeff[i] * bi; // j = i
}
}
const auto &coeff = coefficients[(d-1)%2];
for (unsigned i = 0; i < d; ++i) {
cout << coeff[i] << "\t";
}
cout << coeff[d] << '\n';
}

Algorithm to sum a triple?

We have an array A with m positive integer numbers, what's an algorithm that will
return true if there's a triple (x,y,z) in A
such that A[x] + A[y] + A[z] = 200
Otherwise return false. Numbers in array are distinct and running time must be O(n).
I came up with O(n^3). Any ideas on how to achieve this with O(n)?
Since elements are unique, this boils down to pre processing the array in O(n) to filter redundant elements - which are larger than 200 (none of them will be in the triplet).
Than, you have an array which its size is no larger than 200.
Checking all triplets in this array is O(200^3)=O(1) (it can be done more efficiently in terms of constants though).
So, this will be O(n) U O(200^3) = O(n)
I think you can solve this problem with bit operation. Such as bitset in C++ STL.
Using 3 bitsets, the first bitset cache all numbers you can get it by add 1 number, the second bitset cache all numbers you can get it by add 2 numbers, the third bitset cache all numbers you can get it by add 3 numbers. Then if a new number is coming, you can maintain the bitsets by simple bit operation.
Here is a sample C++ code:
bitset<256> bs[4];
for (int i = 0; i < 4; ++i)
bs[i].reset();
int N, number;
cin >> N;
while (N--)
{
cin >> number;
bs[3] |= (bs[2] << number);
bs[2] |= (bs[1] << number);
if (number <= 200)
bs[1].set(number);
//cout << "1: " << bs[1] << endl;
//cout << "2: " << bs[2] << endl;
//cout << "3: " << bs[3] << endl;
}
cout << bs[3][200] << endl;
The algorithm complexity is O(n). Because bit operation is quickly, each 64-bit long type can cache 64 number, so if you don't want to use bitset, you can use 4 long type(64 * 4 = 256) to replace it.
I agree with #amit's solution, but there is an question: How can we make it better, in our case just faster.
Here is my solution and it's almost based on amit' idea, but the asymptotic complexity == O(n + sum*(sum+1)/2), where n is a length of input array.
Firstly, we need n steps to filter the input array and put each value, that less the sum into the new array, where index of the value is equal to the value. At the end of this step we have the array, which size is equal to sum and we are able to access any value in O(1).
Finally, to find x,y,z we only need sum*(sum+1)/2 steps.
typedef struct SumATripleResult
{
unsigned int x;
unsigned int y;
unsigned int z;
} SumATripleResult;
SumATripleResult sumATriple(unsigned int totalSum, unsigned int *inputArray, unsigned int n)
{
SumATripleResult result;
unsigned int array[totalSum];
//Filter the input array and put each value into 'array' where array[value] = value
for (size_t i = 0; i<n; i++)
{
unsigned int value = inputArray[i];
if(value<totalSum)
{
array[value] = value;
}
}
unsigned int x;
unsigned int y;
unsigned int z;
for (size_t i = 0; i<totalSum; i++)
{
x = array[i];
for (size_t j = i+1; x>0 && j<totalSum; j++)
{
y = array[j];
if( y==0 || x + y >= totalSum) continue;
unsigned int zIdx = totalSum - (x + y);
if(zIdx == x || zIdx == y) continue;
z = array[zIdx];
if( z != 0)
{
result.x = x;
result.y = y;
result.z = z;
return result;
}
}
}
//nothing found
return result;
}
//Test
unsigned int array[] = {1, 21, 30, 12, 15, 10, 3, 5, 6, 11, 17, 31};
SumATripleResult r = sumATriple(52, array, 12);
printf("result = %d %d %d", r.x, r.y, r.y);
r = sumATriple(49, array, 12);
printf("result = %d %d %d", r.x, r.y, r.y);
r = sumATriple(32, array, 12);
printf("result = %d %d %d", r.x, r.y, r.y);
This is known as 3SUM problem and has no linear solution yet. I am providing a pseudo code running with O(n^2) using binary search algorithm:
sumTriple(А[1...n]: array of integers,sum: integer): bool
sort(A)
for i ← 1 to n-2
j ← i+1
k ← n
while k > j
if A[i]+A[j]+A[k] = sum
print i,j,k
return true
else if A[i]+A[j]+A[k] > sum
k ← k-1
else // A[i]+A[j]+A[k] < sum
j ← j+1
return false
More information and further details for the problem you can find here.

Counting tilings of a rectangle

I am trying to solve this problem but I can't find a solution:
A board consisting of squares arranged into N rows and M columns is given. A tiling of this board is a pattern of tiles that covers it. A tiling is interesting if:
only tiles of size 1x1 and/or 2x2 are used;
each tile of size 1x1 covers exactly one whole square;
each tile of size 2x2 covers exactly four whole squares;
each square of the board is covered by exactly one tile.
For example, the following images show a few interesting tilings of a board of size 4 rows and 3 columns:
http://dabi.altervista.org/images/task.img.4x3_tilings_example.gif
Two interesting tilings of a board are different if there exists at least one square on the board that is covered with a tile of size 1x1 in one tiling and with a tile of size 2x2 in the other. For example, all tilings shown in the images above are different.
Write a function
int count_tilings(int N, int M);
that, given two integers N and M, returns the remainder modulo 10,000,007 of the number of different interesting tilings of a board of size N rows and M columns.
Assume that:
N is an integer within the range [1..1,000,000];
M is an integer within the range [1..7].
For example, given N = 4 and M = 3, the function should return 11, because there are 11 different interesting tilings of a board of size 4 rows and 3 columns:
http://dabi.altervista.org/images/task.img.4x3_tilings_all.gif
for (4,3) the result is 11, for (6,5) the result is 1213.
I tried the following but it doesn't work:
static public int count_tilings ( int N,int M ) {
int result=1;
if ((N==1)||(M==1)) return 1;
result=result+(N-1)*(M-1);
int max_tiling= (int) ((int)(Math.ceil(N/2))*(Math.ceil(M/2)));
System.out.println(max_tiling);
for (int i=2; i<=(max_tiling);i++){
if (N>=2*i){
int n=i+(N-i);
int k=i;
//System.out.println("M-1->"+(M-1) +"i->"+i);
System.out.println("(M-1)^i)->"+(Math.pow((M-1),i)));
System.out.println( "n="+n+ " k="+k);
System.out.println(combinations(n, k));
if (N-i*2>0){
result+= Math.pow((M-1),i)*combinations(n, k);
}else{
result+= Math.pow((M-1),i);
}
}
if (M>=2*i){
int n=i+(M-i);
int k=i;
System.out.println("(N-1)^i)->"+(Math.pow((N-1),i)));
System.out.println( "n="+n+ " k="+k);
System.out.println(combinations(n, k));
if (M-i*2>0){
result+= Math.pow((N-1),i)*combinations(n, k);
}else{
result+= Math.pow((N-1),i);
}
}
}
return result;
}
static long combinations(int n, int k) {
/*binomial coefficient*/
long coeff = 1;
for (int i = n - k + 1; i <= n; i++) {
coeff *= i;
}
for (int i = 1; i <= k; i++) {
coeff /= i;
}
return coeff;
}
Since this is homework I won't give a full solution, but I'll give you some hints.
First here's a recursive solution:
class Program
{
// Important note:
// The value of masks given here is hard-coded for m == 5.
// In a complete solution, you need to calculate the masks for the
// actual value of m given. See explanation in answer for more details.
int[] masks = { 0, 3, 6, 12, 15, 24, 27, 30 };
int CountTilings(int n, int m, int s = 0)
{
if (n == 1) { return 1; }
int result = 0;
foreach (int mask in masks)
{
if ((mask & s) == 0)
{
result += CountTilings(n - 1, m, mask);
}
}
return result;
}
public static void Main()
{
Program p = new Program();
int result = p.CountTilings(6, 5);
Console.WriteLine(result);
}
}
See it working online: ideone
Note that I've added an extra parameter s. This stores the contents of the first column. If the first column is empty, s = 0. If the first column contains some filled squares the corresponding bits in s are set. Initially s = 0, but when a 2 x 2 tile is placed, this fills up some squares in the next column, and that will mean that s will be non-zero in the recursive call.
The masks variable is hard-coded but in a complete solution it needs to be calculated based on the actual value of m. The values stored in masks make more sense if you look at their binary representations:
00000
00011
00110
01100
01111
11000
11011
11110
In other words, it's all the ways of setting pairs of bits in a binary number with m bits. You can write some code to generate all these possiblities. Or since there are only 7 possible values of m, you could also just hard-code all seven possibilities for masks.
There are however two serious problems with the recursive solution.
It will overflow the stack for large values of N.
It requires exponential time to calculate. It is incredibly slow even for small values of N
Both these problems can be solved by rewriting the algorithm to be iterative. Keep m constant and initalize the result for n = 1 for all possible values of s to be 1. This is because if you only have one column you must use only 1x1 tiles, and there is only one way to do this.
Now you can calculate n = 2 for all possible values of s by using the results from n = 1. This can be repeated until you reach n = N. This algorithm completes in linear time with respect to N, and requires constant space.
Here is a recursive solution:
// time used : 27 min
#include <set>
#include <vector>
#include <iostream>
using namespace std;
void placement(int n, set< vector <int> > & p){
for (int i = 0; i < n -1 ; i ++){
for (set<vector<int> > :: iterator j = p.begin(); j != p.end(); j ++){
vector <int> temp = *j;
if (temp[i] == 1 || temp[i+1] == 1) continue;
temp[i] = 1; temp[i+1] = 1;
p.insert(temp);
}
}
}
vector<vector<int> > placement( int n){
if (n > 7) throw "error";
set <vector <int> > p;
vector <int> temp (n,0);
p.insert (temp);
for (int i = 0; i < 3; i ++) placement(n, p);
vector <vector <int> > s;
s.assign (p.begin(), p.end());
return s;
}
bool tryput(vector <vector <int> > &board, int current, vector<int> & comb){
for (int i = 0; i < comb.size(); i ++){
if ((board[current][i] == 1 || board[current+1][i]) && comb[i] == 1) return false;
}
return true;
}
void put(vector <vector <int> > &board, int current, vector<int> & comb){
for (int i = 0; i < comb.size(); i ++){
if (comb[i] == 1){
board[current][i] = 1;
board[current+1][i] = 1;
}
}
return;
}
void undo(vector <vector <int> > &board, int current, vector<int> & comb){
for (int i = 0; i < comb.size(); i ++){
if (comb[i] == 1){
board[current][i] = 0;
board[current+1][i] = 0;
}
}
return;
}
int place (vector <vector <int> > &board, int current, vector < vector <int> > & all_comb){
int m = board.size();
if (current >= m) throw "error";
if (current == m - 1) return 1;
int count = 0;
for (int i = 0; i < all_comb.size(); i ++){
if (tryput(board, current, all_comb[i])){
put(board, current, all_comb[i]);
count += place(board, current+1, all_comb) % 10000007;
undo(board, current, all_comb[i]);
}
}
return count;
}
int place (int m, int n){
if (m == 0) return 0;
if (m == 1) return 1;
vector < vector <int> > all_comb = placement(n);
vector <vector <int> > board(m, vector<int>(n, 0));
return place (board, 0, all_comb);
}
int main(){
cout << place(3, 4) << endl;
return 0;
}
time complexity O(n^3 * exp(m))
to reduce the space usage try bit vector.
to reduce the time complexity to O(m*(n^3)), try dynamic programming.
to reduce the time complexity to O(log(m) * n^3) try divide and conquer + dynamic programming.
good luck

Resources