Getting random integer without 3 set bits in a row [closed] - algorithm

Closed. This question is opinion-based. It is not currently accepting answers.
Want to improve this question? Update the question so it can be answered with facts and citations by editing this post.
Closed last month.
Improve this question
Is there a performant way to generate an unbiased 64b random integer without 3 set bits in a row, assuming a fast-and-unbiased input PRNG? I don't care about 'wasting bits' of the input source.
That is, something better than the naive rejection-sampling approach:
uint64_t r;
do {
r = get_rand_64();
} while (r & (r >> 1) & (r >> 2));
...which "works", but is very slow. It looks like it's iterating ~187x on average or so.
One possibility I've explored is roughly:
bool p2 = get_rand_bit();
bool p1 = get_rand_bit();
uint64_t r = (p1 << 1) | p2;
for (int i = 2; i < 64; i++) {
bool p0 = (p1 && p2) ? false : get_rand_bit();
r |= p0 << i;
p2 = p1;
p1 = p0;
}
...however, this is still slow. Mainly because using this approach the entire calculation is bit-serial. EDIT: and it's also biased. Easiest to see with a 3-bit integer - 0b011 occurs 1/8th of the time, which is wrong (should be 1/7th).
I've tried doing various parallel fixups, but haven't been able to come up with anything unbiased. It's useful to play around with 4-bit integers first - e.g. setting all bits involved in a conflict to random values ends up biased, and drawing out the Markov chain for 4 bits makes that obvious
Is there a better way to do this?

I optimized the lexicographic decoder, resulting in a four-fold speedup relative to my previous answer. There are two new ideas:
Use the one-to-one correspondence implied by the recurrence T(n) = T(k−1) T(n−k) + T(k−2) T(n−k−1) + T(k−2) T(n−k−2) + T(k−3) T(n−k−1) to avoid working one bit at a time;
Cache the small words without 111 in addition to the recurrence values, incurring an L1 cache hit to save a number of arithmetic operations.
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
enum { kTribonacci14 = 5768 };
static uint64_t g_tribonacci[65];
static void InitTribonacci(void) {
for (unsigned i = 0; i < 65; i++) {
g_tribonacci[i] =
i < 3 ? 1 << i
: g_tribonacci[i - 1] + g_tribonacci[i - 2] + g_tribonacci[i - 3];
}
assert(g_tribonacci[14] == kTribonacci14);
}
static uint16_t g_words_no_111[kTribonacci14];
static void InitCachedWordsNo111(void) {
unsigned i = 0;
for (unsigned word = 0; word < ((unsigned)1 << 14); word++) {
if ((word & (word >> 1) & (word >> 2)) == 0) {
assert(i < kTribonacci14);
g_words_no_111[i++] = (uint16_t)word;
}
}
assert(i == kTribonacci14);
}
static bool CaseNo111(uint64_t *restrict result, unsigned *restrict n,
uint64_t *restrict index, unsigned left_n,
unsigned right_n) {
uint64_t left_count = g_tribonacci[left_n];
uint64_t right_count = g_tribonacci[right_n];
uint64_t product = left_count * right_count;
if (*index >= product) {
*index -= product;
return false;
}
*result = (*result << left_n) + g_words_no_111[*index / right_count];
*n = right_n;
*index %= right_count;
return true;
}
static void Append(uint64_t *result, uint64_t bit) {
*result = (*result << 1) + bit;
}
static uint64_t DecodeNo111(unsigned n, uint64_t index) {
assert(0 <= n && n <= 64);
assert(index < g_tribonacci[n]);
uint64_t result = 0;
while (n > 14) {
assert(g_tribonacci[n] == g_tribonacci[12] * g_tribonacci[n - 13] +
g_tribonacci[11] * g_tribonacci[n - 14] +
g_tribonacci[11] * g_tribonacci[n - 15] +
g_tribonacci[10] * g_tribonacci[n - 14]);
if (CaseNo111(&result, &n, &index, 12, n - 13)) {
Append(&result, 0);
} else if (CaseNo111(&result, &n, &index, 11, n - 14)) {
Append(&result, 0);
Append(&result, 1);
Append(&result, 0);
} else if (CaseNo111(&result, &n, &index, 11, n - 15)) {
Append(&result, 0);
Append(&result, 1);
Append(&result, 1);
Append(&result, 0);
} else if (CaseNo111(&result, &n, &index, 10, n - 14)) {
Append(&result, 0);
Append(&result, 1);
Append(&result, 1);
Append(&result, 0);
} else {
assert(false);
}
}
return (result << n) + g_words_no_111[index];
}
static void PrintWord(unsigned n, uint64_t word) {
assert(0 <= n && n <= 64);
while (n-- > 0) {
putchar('0' + ((word >> n) & 1));
}
putchar('\n');
}
int main(void) {
InitTribonacci();
InitCachedWordsNo111();
if ((false)) {
enum { kN = 20 };
for (uint64_t i = 0; i < g_tribonacci[kN]; i++) {
PrintWord(kN, DecodeNo111(kN, i));
}
}
uint64_t sum = 0;
uint64_t index = 0;
for (uint32_t i = 0; i < 10000000; i++) {
sum += DecodeNo111(64, index % g_tribonacci[64]);
index = (index * 2862933555777941757) + 3037000493;
}
return sum & 127;
}

From #John Coleman's comment, here's the start of an approach based on Tribonacci numbers. Basic idea:
Generate an unbiased number in the range [0..T(bits)), where T(0) = 1, T(1) = 2, T(2) = 4, T(n) = T(n-1) + T(n-2) + T(n-3).
Convert to Tribonacci representation.
You're done.
A minimal example is as follows:
// 1, 2, 4, TRIBO[n-3]+TRIBO[n-2]+TRIBO[n-1]
// possible minor perf optimization: reverse TRIBO
static const uint64_t TRIBO[65] = {1, 2, 4, 7, 13, 24, 44, 81, 149, 274, 504, 927, 1705, 3136, 5768, 10609, 19513, 35890, 66012, 121415, 223317, 410744, 755476, 1389537, 2555757, 4700770, 8646064, 15902591, 29249425, 53798080, 98950096, 181997601, 334745777, 615693474, 1132436852, 2082876103, 3831006429, 7046319384, 12960201916, 23837527729, 43844049029, 80641778674, 148323355432, 272809183135, 501774317241, 922906855808, 1697490356184, 3122171529233, 5742568741225, 10562230626642, 19426970897100, 35731770264967, 65720971788709, 120879712950776, 222332455004452, 408933139743937, 752145307699165, 1383410902447554, 2544489349890656, 4680045560037375, 8607945812375585, 15832480722303616, 29120472094716576, 53560898629395777, 98513851446415969];
// exclusive of max
extern uint64_t get_rand_64_range(uint64_t max);
uint64_t get_rand_no111(void) {
uint64_t idx = get_rand_64_range(TRIBO[64]);
uint64_t ret = 0;
for (int i = 63; i >= 0; i--) {
if (idx >= TRIBO[i]) {
ret |= ((uint64_t) 1) << i;
idx -= TRIBO[i];
}
// optional: if (idx == 0) {break;}
}
return ret;
}
(Warning: retyped from Python code. I suggest testing.)
This satisfies the 'unbiased' portion, and is indeed faster than the naive rejection-sampling approach, but unfortunately is still pretty slow, because it's looping ~64 times.

The idea behind the code below is to generate the upper 32 bits with the proper (non-uniform!) distribution, then generate the lower 32 conditional on the upper. On my laptop, it’s significantly faster than the baseline, and slightly faster than lexicographic decoding.
You can see the logic behind the non-uniform upper distribution with 4-bit outputs: 00 and 10 have four 2-bit lowers, 01 has three lowers, and 11 has two lowers.
#include <cstdint>
#include <random>
namespace {
using Generator = std::mt19937_64;
template <int bits> std::uint64_t GenerateUniform(Generator &gen) {
static_assert(0 <= bits && bits <= 63);
return gen() & ((std::uint64_t{1} << bits) - 1);
}
template <> std::uint64_t GenerateUniform<64>(Generator &gen) { return gen(); }
template <int bits> std::uint64_t GenerateNo111Baseline(Generator &gen) {
std::uint64_t r;
do {
r = GenerateUniform<bits>(gen);
} while (r & (r >> 1) & (r >> 2));
return r;
}
template <int bits> struct Tribonacci {
static constexpr std::uint64_t value = Tribonacci<bits - 1>::value +
Tribonacci<bits - 2>::value +
Tribonacci<bits - 3>::value;
};
template <> struct Tribonacci<0> { static constexpr std::uint64_t value = 1; };
template <> struct Tribonacci<-1> { static constexpr std::uint64_t value = 1; };
template <> struct Tribonacci<-2> { static constexpr std::uint64_t value = 0; };
template <int bits> std::uint64_t GenerateNo111(Generator &gen) {
constexpr int upper_bits = 16;
constexpr int lower_bits = bits - upper_bits;
const std::uint64_t upper = GenerateNo111Baseline<upper_bits>(gen);
for (;;) {
if ((upper & 1) == 0) {
return (upper << lower_bits) + GenerateNo111<lower_bits>(gen);
}
std::uint64_t outcome = std::uniform_int_distribution<std::uint64_t>{
0, Tribonacci<upper_bits>::value - 1}(gen);
if ((upper & 2) == 0) {
if (outcome < Tribonacci<upper_bits - 2>::value) {
return (upper << lower_bits) + (std::uint64_t{1} << (lower_bits - 1)) +
GenerateNo111<lower_bits - 2>(gen);
}
outcome -= Tribonacci<upper_bits - 2>::value;
}
if (outcome < Tribonacci<lower_bits - 1>::value) {
return (upper << lower_bits) + GenerateNo111<lower_bits - 1>(gen);
}
}
}
#define BASELINE(bits) \
template <> std::uint64_t GenerateNo111<bits>(Generator & gen) { \
return GenerateNo111Baseline<bits>(gen); \
}
BASELINE(0)
BASELINE(1)
BASELINE(2)
BASELINE(3)
BASELINE(4)
BASELINE(5)
BASELINE(6)
BASELINE(7)
BASELINE(8)
BASELINE(9)
BASELINE(10)
BASELINE(11)
BASELINE(12)
BASELINE(13)
BASELINE(14)
BASELINE(15)
BASELINE(16)
#undef BASELINE
static const std::uint64_t TRIBO[65] = {1,
2,
4,
7,
13,
24,
44,
81,
149,
274,
504,
927,
1705,
3136,
5768,
10609,
19513,
35890,
66012,
121415,
223317,
410744,
755476,
1389537,
2555757,
4700770,
8646064,
15902591,
29249425,
53798080,
98950096,
181997601,
334745777,
615693474,
1132436852,
2082876103,
3831006429,
7046319384,
12960201916,
23837527729,
43844049029,
80641778674,
148323355432,
272809183135,
501774317241,
922906855808,
1697490356184,
3122171529233,
5742568741225,
10562230626642,
19426970897100,
35731770264967,
65720971788709,
120879712950776,
222332455004452,
408933139743937,
752145307699165,
1383410902447554,
2544489349890656,
4680045560037375,
8607945812375585,
15832480722303616,
29120472094716576,
53560898629395777,
98513851446415969};
std::uint64_t get_rand_no111(Generator &gen) {
std::uint64_t idx =
std::uniform_int_distribution<std::uint64_t>{0, TRIBO[64] - 1}(gen);
std::uint64_t ret = 0;
for (int i = 63; i >= 0; --i) {
if (idx >= TRIBO[i]) {
ret |= std::uint64_t{1} << i;
idx -= TRIBO[i];
}
}
return ret;
}
} // namespace
int main() {
Generator gen{std::random_device{}()};
std::uint64_t sum = 0;
for (std::int32_t i = 0; i < 10000000; i++) {
if constexpr (true) {
sum += GenerateNo111<64>(gen);
} else {
sum += get_rand_no111(gen);
}
}
return sum & 127;
}

What about following simple idea:
Generate random r.
Find within this r window(s)-mask, contains 3 or more sequenced 1s.
If mask is 0 (no 3 or more sequenced bits) - return the r.
Substitute "incorrect" bits under that mask to new random ones.
Goto 2
Code sample (did not tested, compiled only):
uint64_t rand_no3() {
uint64_t r, mask;
for(r = get_rand_64() ; ; ) {
mask = r & (r >> 1) & (r >> 2);
mask |= (mask << 1) | (mask << 2);
if(mask == 0)
return r;
r ^= mask & get_rand_64();
}
}
Another variant of same code, with just single call get_rand_64():
uint64_t rand_no3() {
uint64_t r, mask = ~0ULL;
do {
r ^= mask & get_rand_64();
mask = r & (r >> 1) & (r >> 2);
mask |= (mask << 1) | (mask << 2);
} while(mask != 0);
return r;
}
I know, the last code does not init the r, but it is not matter, because of this variable will be overwritten in 1st loop iteration.

You could generate the number one bit at a time, keeping track of the number of consecutive set bits. Whenever you have two consecutive set bits, you insert an unset bit and set the count back to 0.

Related

Integer Iterator is less than Returned size of Vector (C++)

Currently writing a function to return a Pascal Triangle represented in Vectors. When writing the nested for loop within the function, I noticed that the function was returning empty vectors. Going through the debugger, I realized that the inner for loop never runs.
The code is as follows:
vector<vector<int>> generate(int numRows) {
vector<vector<int>> res = { {1} };
int k;
for (int i = 0; i < numRows; i++)
{
vector<int> c = {};
cout << res[i].size() << endl;
for (k = -1; k < res[i].size(); k++)
{
if (k == -1 || k == res[i].size() - 1)
{
c.push_back(1);
}
else
{
c.push_back(res[i][k] + res[i][k + 1]);
}
}
res.push_back(c);
}
return res;
}
I had changed the iterator variable name multiple times, and have switched the iterator type to size. However the for still does not run.
I tried printing out the iterator k (revealed to be -1) and the size of the first element in the res vector (revealed to be 1). However, when running:
cout << (k < res[i].size() << endl;
the output was 0.
Take extra care when mixing signed and unsigned types.
As explained by #YahavBoneh in the comment above, signed types are converted to unsigned when they are both used in a comparison. In this case, a k value of -1, when converted to unsigned, turns into quite a big number (demo).
If possible, let your compiler warn you about it (e.g. in gcc, using -Wall -Wextra; demo).
Since you seem to be working only with signed types, a good way to avoid introducing unsigned types into the play, is to use std::ssize (since C++20).
[Demo]
#include <fmt/ranges.h>
#include <iostream>
#include <vector>
std::vector<std::vector<int>> generate(int numRows) {
std::vector<std::vector<int>> res{{1}};
for (auto i = 0; i < numRows; i++) {
std::vector<int> c{};
auto width{ std::ssize(res[i]) };
for (auto k = -1; k < width; k++) {
if (k == -1 || k == width - 1) {
c.push_back(1);
} else {
c.push_back(res[i][k] + res[i][k + 1]);
}
}
res.push_back(c);
}
return res;
}
int main() {
fmt::print("{}", fmt::join(generate(3), "\n"));
}
// Outputs:
//
// [1]
// [1, 1]
// [1, 2, 1]
// [1, 3, 3, 1]

How to expand the product of a sequence of binomials efficiently?

The product of the sequence of binomials reads
where {a_i} and {b_i} are coefficients in binomials.
I need to expand it to a polynomial
and use all coefficients {c_k} in the polynomial afterwards.
How to expand it efficiently? The speed has priority over the memory occupation because the expansion will be used many times.
What I tried
At present I just come up with an update scheme, which expands the polynomial right after absorbing one binomial.
This scheme needs two arrays — one for results up to i-1 and the other for results up to i.
Here is the C++ code for my naive scheme, but I think this question is irrelevant to what language is used.
#include <iostream>
#include <vector>
int main()
{
using namespace std;
// just an example, the coefficients are actually real numbers in [0,1]
unsigned d = 3;
vector<double> a;
vector<double> b;
a.resize(d, 1); b.resize(d, 1);
// given two arrays, a[] and b[], of length d
vector< vector<double> > coefficients(2);
coefficients[0].resize(d + 1);
coefficients[1].resize(d + 1);
if (d > 0) {
auto &coeff = coefficients[0]; // i = 0
coeff[0] = a[0];
coeff[1] = b[0];
for (unsigned i = 1; i < d; ++i) {// i : [1, d-1]
const auto ai = a[i];
const auto bi = b[i];
const auto &oldCoeff = coefficients[(i-1)%2];
auto &coeff = coefficients[i%2];
coeff[0] = oldCoeff[0] * ai; // j = 0
for (unsigned j = 1; j <= i; ++j) { // j : [1, i]
coeff[j] = oldCoeff[j] * ai + oldCoeff[j-1] * bi;
}
coeff[i+1] = oldCoeff[i] * bi; // j = i
}
}
const auto &coeff = coefficients[(d-1)%2];
for (unsigned i = 0; i < d; ++i) {
cout << coeff[i] << "\t";
}
cout << coeff[d] << '\n';
}

load vector from large vector with simd based on mask

I hope someone can help here.
I have a large byte vector from which i create a small byte vector ( based on a mask ) which I then process with simd.
Currently the mask is an array of baseOffset + submask (byte[256]) , optimized for storage as there are > 10^8 . I create a maxsize subvector , then loop through the mask array multiply the baseOffssetby 256 and for each bit offset in the mask load from the large vector and put the values in a smaller vector sequentially . The smaller vector is then processed via a number of VPMADDUBSW and accumulated . I can change this structure. eg walk the bits once to use a 8K bit array buffer and then create the small vector.
Is there a faster way i can create the subarray ?
I pulled the code out of the app into a test program but the original is in a state of flux ( moving to AVX2 and pulling more out of C# )
#include "stdafx.h"
#include<stdio.h>
#include <mmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include <smmintrin.h>
#include <immintrin.h>
//from
char N[4096] = { 9, 5, 5, 5, 9, 5, 5, 5, 5, 5 };
//W
char W[4096] = { 1, 2, -3, 5, 5, 5, 5, 5, 5, 5 };
char buffer[4096] ;
__declspec(align(2))
struct packed_destination{
char blockOffset;
__int8 bitMask[32];
};
__m128i sum = _mm_setzero_si128();
packed_destination packed_destinations[10];
void process128(__m128i u, __m128i s)
{
__m128i calc = _mm_maddubs_epi16(u, s); // pmaddubsw
__m128i loints = _mm_cvtepi16_epi32(calc);
__m128i hiints = _mm_cvtepi16_epi32(_mm_shuffle_epi32(calc, 0x4e));
sum = _mm_add_epi32(_mm_add_epi32(loints, hiints), sum);
}
void process_array(char n[], char w[], int length)
{
sum = _mm_setzero_si128();
int length128th = length >> 7;
for (int i = 0; i < length128th; i++)
{
__m128i u = _mm_load_si128((__m128i*)&n[i * 128]);
__m128i s = _mm_load_si128((__m128i*)&w[i * 128]);
process128(u, s);
}
}
void populate_buffer_from_vector(packed_destination packed_destinations[], char n[] , int dest_length)
{
int buffer_dest_index = 0;
for (int i = 0; i < dest_length; i++)
{
int blockOffset = packed_destinations[i].blockOffset <<8 ;
// go through mask and copy to buffer
for (int j = 0; j < 32; j++)
{
int joffset = blockOffset + j << 3;
int mask = packed_destinations[i].bitMask[j];
if (mask & 1 << 0)
buffer[buffer_dest_index++] = n[joffset + 1<<0 ];
if (mask & 1 << 1)
buffer[buffer_dest_index++] = n[joffset + 1<<1];
if (mask & 1 << 2)
buffer[buffer_dest_index++] = n[joffset + 1<<2];
if (mask & 1 << 3)
buffer[buffer_dest_index++] = n[joffset + 1<<3];
if (mask & 1 << 4)
buffer[buffer_dest_index++] = n[joffset + 1<<4];
if (mask & 1 << 5)
buffer[buffer_dest_index++] = n[joffset + 1<<5];
if (mask & 1 << 6)
buffer[buffer_dest_index++] = n[joffset + 1<<6];
if (mask & 1 << 7)
buffer[buffer_dest_index++] = n[joffset + 1<<7];
};
}
}
int _tmain(int argc, _TCHAR* argv[])
{
for (int i = 0; i < 32; ++i)
{
packed_destinations[0].bitMask[i] = 0x0f;
packed_destinations[1].bitMask[i] = 0x04;
}
packed_destinations[1].blockOffset = 1;
populate_buffer_from_vector(packed_destinations, N, 1);
process_array(buffer, W, 256);
int val = sum.m128i_i32[0] +
sum.m128i_i32[1] +
sum.m128i_i32[2] +
sum.m128i_i32[3];
printf("sum is %d" , val);
printf("Press Any Key to Continue\n");
getchar();
return 0;
}
Normally mask usage would be 5-15% for some work loads it would be 25-100% .
MASKMOVDQU is close but then we would have to re pack /swl according to the mask before saving..
A couple of optimisations for your existing code:
If your data is sparse then it would probably be a good idea to add an additional test of each 8 bit mask value prior to testing the additional bits, i.e.
int mask = packed_destinations[i].bitMask[j];
if (mask != 0)
{
if (mask & 1 << 0)
buffer[buffer_dest_index++] = n[joffset + 1<<0 ];
if (mask & 1 << 1)
buffer[buffer_dest_index++] = n[joffset + 1<<1];
...
Secondly your process128 function can be optimised considerably:
inline __m128i process128(const __m128i u, const __m128i s, const __m128i sum)
{
const __m128i vk1 = _mm_set1_epi16(1);
__m128i calc = _mm_maddubs_epi16(u, s);
calc = _mm_madd_epi16(v, vk1);
return _mm_add_epi32(sum, calc);
}
Note that as well as reducing the SSE instruction count from 6 to 3, I've also made sum a parameter, to get away from any dependency on global variables (it's always a good idea to avoid globals, not only for good software engineering but also because they can inhibit certain compiler optimisations).
It would be interesting to see a profile of your code (using a decent sampling profiler, not via instrumentation), since this would help to prioritise any further optimisation efforts.

Dynamic Programming Altogorithm

I'm trying to construct an algorithm that runs at O(nb) time with the following input/question:
input: an array A[1..n] of n different integers and an integer b (i am assuming that the numbers in A are sequential, starting at 1 ending at n, i.e. for n=4 A[1,2,3,4].
question: in how many ways can b be written as the sum of elements of the array when elements in A[] can only be used once?
I've kind of hit a wall on this one. I'm looking for some kind of recursive solution, but I don't see how to avoid using repeat numbers. Like, for instance, if we started at 1 and stored all the ways to make one (just 1) then 2 (just 2) then three (3 or 2+1) etc, it shouldn't be hard to see how many ways we can make larger numbers. But if, for instance, we take 5, we will see that it can be broken into 4+1, and 4 can be further broken down into 3+1, so then we would see 2 solutions (4+1, and 3+1+1), but one of those has a repeat of a number. Am I missing something obvious? Thanks so much!
Recursive and dynamic solutions in C:
#include <stddef.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
typedef unsigned char uchar;
typedef unsigned int uint;
typedef struct tAddend
{
struct tAddend* pPrev;
uint Value;
} tAddend;
void findRecursiveSolution(uint n, uint maxAddend, tAddend* pPrevAddend)
{
uint i;
for (i = maxAddend; ; i--)
{
if (n == 0)
{
while (pPrevAddend != NULL)
{
printf("+%u", pPrevAddend->Value);
pPrevAddend = pPrevAddend->pPrev;
}
printf("\n");
return;
}
if (n >= i && i > 0)
{
tAddend a;
a.pPrev = pPrevAddend;
a.Value = i;
findRecursiveSolution(n - i, i - 1, &a);
}
if (i <= 1)
{
break;
}
}
}
void printDynamicSolution(uchar** pTable, uint n, uint idx, uint sum, tAddend* pPrevAddend)
{
uchar el = pTable[idx][sum];
assert((el != 0) && (el != 5) && (el != 7));
if (el & 2) // 2,3,6 - other(s)
{
printDynamicSolution(pTable,
n,
idx - 1,
sum,
pPrevAddend);
}
if (el & 4) // self + other(s)
{
tAddend a;
a.pPrev = pPrevAddend;
a.Value = idx + 1;
printDynamicSolution(pTable,
n,
idx - 1,
sum - (idx + 1),
&a);
}
if (el & 1) // self, found a solution
{
tAddend a;
a.pPrev = pPrevAddend;
a.Value = idx + 1;
pPrevAddend = &a;
while (pPrevAddend != NULL)
{
printf("+%u", pPrevAddend->Value);
pPrevAddend = pPrevAddend->pPrev;
}
printf("\n");
}
}
void findDynamicSolution(uint n)
{
uchar** table;
uint i, j;
if (n == 0)
{
return;
}
// Allocate the DP table
table = malloc(sizeof(uchar*) * n);
if (table == NULL)
{
printf("not enough memory\n");
return;
}
for (i = 0; i < n; i++)
{
table[i] = malloc(n + 1);
if (table[i] == NULL)
{
while (i > 0)
{
free(table[--i]);
}
free(table);
printf("not enough memory\n");
return;
}
}
// Fill in the DP table
for (i = 0; i < n; i++)
{
for (j = 0; j <= n; j++)
{
if (i == 0)
{
table[i][j] = (i + 1 == j); // self
}
else
{
table[i][j] = (i + 1 == j) + // self
2 * (table[i - 1][j] != 0) + // other(s)
4 * ((j >= i + 1) && (table[i - 1][j - (i + 1)] != 0)); // self + other(s)
}
}
}
printDynamicSolution(table, n, n - 1, n, NULL);
for (i = 0; i < n; i++)
{
free(table[i]);
}
free(table);
}
int main(int argc, char** argv)
{
uint n;
if (argc != 2 || sscanf(argv[1], "%u", &n) != 1)
{
n = 10;
}
printf("Recursive Solution:\n");
findRecursiveSolution(n, n, NULL);
printf("\nDynamic Solution:\n");
findDynamicSolution(n);
return 0;
}
Output:
for 10:
Recursive Solution:
+10
+1+9
+2+8
+3+7
+1+2+7
+4+6
+1+3+6
+1+4+5
+2+3+5
+1+2+3+4
Dynamic Solution:
+1+2+3+4
+2+3+5
+1+4+5
+1+3+6
+4+6
+1+2+7
+3+7
+2+8
+1+9
+10
See also on ideone.
Let F(x,i) be the number of ways elements of A[1:i] can be summed to get x.
F(x,i+1) = F(x-A[i+1],i) + F(x,i)
That is it!
This is not a dynamic programming solution though. Non-recursive.
Assumption that arr is sorted in your case like [i....j] where a[i] <= a[j]
That's easy enough
void summer(int[] arr, int n , int b)
{
int lowerbound = 0;
int upperbound = n-1;
while (lowerbound < upperbound)
{
if(arr[lowerbound]+arr[upperbound] == b)
{
// print arr[lowerbound] and arr[upperbound]
lowerbound++; upperbound--;
}
else if(arr[lowerbound]+arr[upperbound] < b)
lowerbound++;
else
upperbound--;
}
}
The above program is easily modifiable to a recursive you need to only change the function definition by passing lowerbound and upperbound.
Case for termination is still lowerbound < upperbound
Base case is if arr[lowerbound] +arr[upperbound] == b
Edited based on comments
You will need to use a modified version of integer knapsack problem. The values of [i,j] both need to be modified accordingly. You are having the problem because you are not most probably modifying your i carefully, Increase your i accordingly then their will not be repetition like the one you are having.

Find longest non-decreasing sequence

Given the following question,
Given an array of integers A of length n, find the longest sequence {i_1, ..., i_k} such that i_j < i_(j+1) and A[i_j] <= A[i_(j+1)] for any j in [1, k-1].
Here is my solution, is this correct?
max_start = 0; // store the final result
max_end = 0;
try_start = 0; // store the initial result
try_end = 0;
FOR i=0; i<(A.length-1); i++ DO
if A[i] <= A[i+1]
try_end = i+1; // satisfy the condition so move the ending point
else // now the condition is broken
if (try_end - try_start) > (max_end - max_start) // keep it if it is the maximum
max_end = try_end;
max_start = try_start;
endif
try_start = i+1; // reset the search
try_end = i+1;
endif
ENDFOR
// Checking the boundary conditions based on comments by Jason
if (try_end - try_start) > (max_end - max_start)
max_end = try_end;
max_start = try_start;
endif
Somehow, I don't think this is a correct solution but I cannot find a counter-example that disapprove this solution.
anyone can help?
Thank you
I don't see any backtracking in your algorithm, and it seems to be suited for contiguous blocks of non-decreasing numbers. If I understand correctly, for the following input:
1 2 3 4 10 5 6 7
your algorithm would return 1 2 3 4 10 instead of 1 2 3 4 5 6 7.
Try to find a solution using dynamic programming.
You're missing the case where the condition is not broken at its last iteration:
1, 3, 5, 2, 4, 6, 8, 10
You'll never promote try_start and try_end to max_start and max_end unless your condition is broken. You need to perform the same check at the end of the loop.
Well, it looks like you're finding the start and the end of the sequence, which may be correct but it wasn't what was asked. I'd start by reading http://en.wikipedia.org/wiki/Longest_increasing_subsequence - I believe this is the question that was asked and it's a fairly well-known problem. In general cannot be solved in linear time, and will also require some form of dynamic programming. (There's an easier n^2 variant of the algorithm on Wikipedia as well - just do a linear sweep instead of the binary search.)
#include <algorithm>
#include <vector>
#include <stdio.h>
#include <string.h>
#include <assert.h>
template<class RandIter>
class CompM {
const RandIter X;
typedef typename std::iterator_traits<RandIter>::value_type value_type;
struct elem {
value_type c; // char type
explicit elem(value_type c) : c(c) {}
};
public:
elem operator()(value_type c) const { return elem(c); }
bool operator()(int a, int b) const { return X[a] < X[b]; } // for is_sorted
bool operator()(int a, elem b) const { return X[a] < b.c; } // for find
bool operator()(elem a, int b) const { return a.c < X[b]; } // for find
explicit CompM(const RandIter X) : X(X) {}
};
template<class RandContainer, class Key, class Compare>
int upper(const RandContainer& a, int n, const Key& k, const Compare& comp) {
return std::upper_bound(a.begin(), a.begin() + n, k, comp) - a.begin();
}
template<class RandIter>
std::pair<int,int> lis2(RandIter X, std::vector<int>& P)
{
int n = P.size(); assert(n > 0);
std::vector<int> M(n);
CompM<RandIter> comp(X);
int L = 0;
for (int i = 0; i < n; ++i) {
int j = upper(M, L, comp(X[i]), comp);
P[i] = (j > 0) ? M[j-1] : -1;
if (j == L) L++;
M[j] = i;
}
return std::pair<int,int>(L, M[L-1]);
}
int main(int argc, char** argv)
{
if (argc < 2) {
fprintf(stderr, "usage: %s string\n", argv[0]);
return 3;
}
const char* X = argv[1];
int n = strlen(X);
if (n == 0) {
fprintf(stderr, "param string must not empty\n");
return 3;
}
std::vector<int> P(n), S(n), F(n);
std::pair<int,int> lt = lis2(X, P); // L and tail
int L = lt.first;
printf("Longest_increasing_subsequence:L=%d\n", L);
for (int i = lt.second; i >= 0; --i) {
if (!F[i]) {
int j, k = 0;
for (j = i; j != -1; j = P[j], ++k) {
S[k] = j;
F[j] = 1;
}
std::reverse(S.begin(), S.begin()+k);
for (j = 0; j < k; ++j)
printf("%c", X[S[j]]);
printf("\n");
}
}
return 0;
}

Resources