Efficient and Exact Floating-Point Binary Search - algorithm

Consider the following binary search for a value greater than lo, but less than or equal to hi:
find(lo: number, hi: number, isTooLow: (testVal: number) => boolean) {
for(;;) {
const testVal = between(lo, hi);
if (testVal <= lo || testVal >= hi) {
break;
}
if (isTooLow(testVal)) {
lo = testVal;
} else {
hi = testVal;
}
}
return hi;
}
Note that a number here is a 64-bit float.
The search will will always terminate, and if the between function is very carefully written to choose the median available 64-bit float between lo and hi, if it exists, then also:
The search will terminate within 64 iterations; and
It will exactly find the smallest value hi such that isTooLow(hi) == false
But such a between function is tricky and complicated, and it depends on the fine details of the floating point representation.
Can anyone suggest an implementation for between that is simpler and that does not depend on any specifics of the floating point representation, except that there is a fixed-width mantissa, a fixed-width exponent, and a sign?
It will need to be implemented in Javascript, and it only needs to be almost as good, such that:
The search will always terminate within 200 iterations or so; and
It will very nearly (within 3 or 4 possible values) find the smallest value hi such that isTooLow(hi) == false
Extra points for avoiding transcendental functions and sqrt.
RESOLUTION
In the end, I really liked David's stateful guesser, but I hoisted the state up into the call stack, and the result essentially does a search for the exponent first, without any knowledge of the representation.
I haven't tested/debugged this yet:
function find(lo: number, hi: number, isTooLow: (testVal: number) => boolean) {
[lo, hi] = getLinearRange(lo, hi, isTooLow);
for (; ;) {
const testVal = lo + (hi - lo) * 0.5;
if (testVal <= lo || testVal >= hi) {
break;
}
if (isTooLow(testVal)) {
lo = testVal;
} else {
hi = testVal;
}
}
return hi;
}
/**
* Reduce a floating-point range to a size where a conventional binary
* search is appropriate.
* #returns [newlow, newhigh]
*/
function getLinearRange(
low: number, high: number,
isTooLow: (n: number) => boolean): [number, number] {
let negRange: [number, number] | undefined;
if (low < 0) {
if (high > 0) {
if (isTooLow(0)) {
return scaleRange(0, high, 0.25, isTooLow);
} else {
const isTooHigh = (n: number) => !isTooLow(n);
negRange = scaleRange(0, -low, 0.25, isTooHigh);
}
} else {
const isTooHigh = (n: number) => !isTooLow(n);
negRange = scaleRange(-high, -low, 0.25, isTooHigh);
}
} else {
return scaleRange(low, high, 0.25, isTooLow);
}
// we have to negate the range
low = -negRange[1];
negRange[1] = -negRange[0];
negRange[0] = low;
return negRange;
}
/**
* Reduce a positive range until low/high >= minScale
* #returns [newlow, newhigh]
*/
function scaleRange(
low: number, high: number, minScale: number,
isTooLow: (n: number) => boolean): [number, number] {
if (!(minScale > 0 && low < high * minScale)) {
return [low, high];
}
const range = scaleRange(low, high, minScale * minScale, isTooLow);
[low, high] = range;
const test = high * minScale;
if (test > low && test < high) {
if (isTooLow(test)) {
range[0] = test;
} else {
range[1] = test;
}
}
return range;
}

We could also do some precomputation (code below works for nonnegative finite ranges, add branches ad lib to handle the other cases). We approximate the smallest useful fraction, increase it by square roots to effect binary search on the exponent, and then finish off with good old arithmetic mean to nail down the significand. I think the worst case is 65 queries, certainly not much more, though many inputs will take longer than the bit-munging algorithm.
const fractions = [];
const Guesser = {
fractions: null,
between(low, high) {
if (this.fractions === null) {
this.fractions = [];
let f = 0.25;
while (low + f * (high - low) > low) {
this.fractions.push(f);
f *= f;
}
}
return low + (this.fractions.pop() || 0.5) * (high - low);
},
};
for (let i = 0; i <= 101; ++i) {
let n = 0;
let g = Object.create(Guesser);
let low = 0;
let high = 1.7976931348623157e308;
for (;;) {
++n;
let mid = g.between(low, high);
if (mid <= low || high <= mid) break;
if (100 * Math.random() < i) low = mid;
else high = mid;
}
console.log(n);
}

Here’s an idea that I think meets your specs on IEEE doubles using primitive operations only (but seems probably worse than using square root assuming that it’s hardware-accelerated). Find the sign (if needed), find the top 7 bits of the 11-bit exponent using linear search (≈ 128 queries, plus 4 for subnormals, ugh), then switch to the arithmetic mean (≈ 53 + 211−7 = 69 queries), for a total of about 200 queries if I’m counting right. Untested JavaScript below.
const multiplicative_stride = 1 / 2 ** (2 ** (11 - 7));
function between(low, high) {
if (high <= 0) return -between(-high, -low);
if (low < 0) return 0;
const mid = multiplicative_stride * high;
return mid <= low ? low + 0.5 * (high - low) : mid;
}

Using 64 floating iterations is not a good idea because you forgot that 64 bit floats (double) are represented as 3 separated things:
1 bit sign
11 bit exponent
53 bit mantissa
and if you do not know the real range (where you can start) of the solution then you might be off by quite a few iterations as range of such number is much more then 2^64 ...
However if you do this on binary bits directly then its OK (but you have to handle special cases like NaN,Inf and maybe also Denormalized numbers).
So instead of using *0.5 you use binary operations on individual bits like x<<=1, x|=1, x^=1 ...
Here simple example C++:
double f64_sqrt(double x)
{
// IEEE 754 double MSW masks
const DWORD _f64_sig =0x80000000; // sign
const DWORD _f64_exp =0x7FF00000; // exponent
const DWORD _f64_exp_sig=0x40000000; // exponent sign
const DWORD _f64_exp_bia=0x3FF00000; // exponent bias
const DWORD _f64_exp_lsb=0x00100000; // exponent LSB
const DWORD _f64_man =0x000FFFFF; // mantisa
const DWORD _f64_man_msb=0x00080000; // mantisa MSB
const int h=1; // may be platform dependent MSB/LSB order
const int l=0;
DWORD b; // bit mask
union // semi result
{
double f; // 64bit floating point
DWORD u[2]; // 2x32 bit uint
} y;
// fabs
y.f=x; y.u[h]&=_f64_exp|_f64_man; x=y.f;
// set safe exponent (~ abs half) destroys mantisa,sign
b=(y.u[h]&_f64_exp)-_f64_exp_bia;
y.u[h]=((b>>1)|(b&_f64_exp_sig))+_f64_exp_bia;
// sign=`+` mantisa=0
y.u[h]&=_f64_exp;
// correct exponent if needed
if (y.f*y.f>x) y.u[h]=(y.u[h]-_f64_exp_lsb)&_f64_exp;
// binary search
for (b=_f64_man_msb;b;b>>=1) { y.u[h]|=b; if (y.f*y.f>x) y.u[h]^=b; }
for (b=0x80000000 ;b;b>>=1) { y.u[l]|=b; if (y.f*y.f>x) y.u[l]^=b; }
return y.f;
}
using "estimation" of solution exponent (exploiting the fact that result has ~ half of integer bits for sqrt) and binary access binary search of mantissa only (53 iterations)
In case you do not know the exponent range you have to bin search it too (starting from highest bit or one after if sign is known) ...

Related

Change the range of IRAND() in Fortran 77 [duplicate]

This is a follow on from a previously posted question:
How to generate a random number in C?
I wish to be able to generate a random number from within a particular range, such as 1 to 6 to mimic the sides of a die.
How would I go about doing this?
All the answers so far are mathematically wrong. Returning rand() % N does not uniformly give a number in the range [0, N) unless N divides the length of the interval into which rand() returns (i.e. is a power of 2). Furthermore, one has no idea whether the moduli of rand() are independent: it's possible that they go 0, 1, 2, ..., which is uniform but not very random. The only assumption it seems reasonable to make is that rand() puts out a Poisson distribution: any two nonoverlapping subintervals of the same size are equally likely and independent. For a finite set of values, this implies a uniform distribution and also ensures that the values of rand() are nicely scattered.
This means that the only correct way of changing the range of rand() is to divide it into boxes; for example, if RAND_MAX == 11 and you want a range of 1..6, you should assign {0,1} to 1, {2,3} to 2, and so on. These are disjoint, equally-sized intervals and thus are uniformly and independently distributed.
The suggestion to use floating-point division is mathematically plausible but suffers from rounding issues in principle. Perhaps double is high-enough precision to make it work; perhaps not. I don't know and I don't want to have to figure it out; in any case, the answer is system-dependent.
The correct way is to use integer arithmetic. That is, you want something like the following:
#include <stdlib.h> // For random(), RAND_MAX
// Assumes 0 <= max <= RAND_MAX
// Returns in the closed interval [0, max]
long random_at_most(long max) {
unsigned long
// max <= RAND_MAX < ULONG_MAX, so this is okay.
num_bins = (unsigned long) max + 1,
num_rand = (unsigned long) RAND_MAX + 1,
bin_size = num_rand / num_bins,
defect = num_rand % num_bins;
long x;
do {
x = random();
}
// This is carefully written not to overflow
while (num_rand - defect <= (unsigned long)x);
// Truncated division is intentional
return x/bin_size;
}
The loop is necessary to get a perfectly uniform distribution. For example, if you are given random numbers from 0 to 2 and you want only ones from 0 to 1, you just keep pulling until you don't get a 2; it's not hard to check that this gives 0 or 1 with equal probability. This method is also described in the link that nos gave in their answer, though coded differently. I'm using random() rather than rand() as it has a better distribution (as noted by the man page for rand()).
If you want to get random values outside the default range [0, RAND_MAX], then you have to do something tricky. Perhaps the most expedient is to define a function random_extended() that pulls n bits (using random_at_most()) and returns in [0, 2**n), and then apply random_at_most() with random_extended() in place of random() (and 2**n - 1 in place of RAND_MAX) to pull a random value less than 2**n, assuming you have a numerical type that can hold such a value. Finally, of course, you can get values in [min, max] using min + random_at_most(max - min), including negative values.
Following on from #Ryan Reich's answer, I thought I'd offer my cleaned up version. The first bounds check isn't required given the second bounds check, and I've made it iterative rather than recursive. It returns values in the range [min, max], where max >= min and 1+max-min < RAND_MAX.
unsigned int rand_interval(unsigned int min, unsigned int max)
{
int r;
const unsigned int range = 1 + max - min;
const unsigned int buckets = RAND_MAX / range;
const unsigned int limit = buckets * range;
/* Create equal size buckets all in a row, then fire randomly towards
* the buckets until you land in one of them. All buckets are equally
* likely. If you land off the end of the line of buckets, try again. */
do
{
r = rand();
} while (r >= limit);
return min + (r / buckets);
}
Here is a formula if you know the max and min values of a range, and you want to generate numbers inclusive in between the range:
r = (rand() % (max + 1 - min)) + min
unsigned int
randr(unsigned int min, unsigned int max)
{
double scaled = (double)rand()/RAND_MAX;
return (max - min +1)*scaled + min;
}
See here for other options.
Wouldn't you just do:
srand(time(NULL));
int r = ( rand() % 6 ) + 1;
% is the modulus operator. Essentially it will just divide by 6 and return the remainder... from 0 - 5
For those who understand the bias problem but can't stand the unpredictable run-time of rejection-based methods, this series produces a progressively less biased random integer in the [0, n-1] interval:
r = n / 2;
r = (rand() * n + r) / (RAND_MAX + 1);
r = (rand() * n + r) / (RAND_MAX + 1);
r = (rand() * n + r) / (RAND_MAX + 1);
...
It does so by synthesising a high-precision fixed-point random number of i * log_2(RAND_MAX + 1) bits (where i is the number of iterations) and performing a long multiplication by n.
When the number of bits is sufficiently large compared to n, the bias becomes immeasurably small.
It does not matter if RAND_MAX + 1 is less than n (as in this question), or if it is not a power of two, but care must be taken to avoid integer overflow if RAND_MAX * n is large.
Here is a slight simpler algorithm than Ryan Reich's solution:
/// Begin and end are *inclusive*; => [begin, end]
uint32_t getRandInterval(uint32_t begin, uint32_t end) {
uint32_t range = (end - begin) + 1;
uint32_t limit = ((uint64_t)RAND_MAX + 1) - (((uint64_t)RAND_MAX + 1) % range);
/* Imagine range-sized buckets all in a row, then fire randomly towards
* the buckets until you land in one of them. All buckets are equally
* likely. If you land off the end of the line of buckets, try again. */
uint32_t randVal = rand();
while (randVal >= limit) randVal = rand();
/// Return the position you hit in the bucket + begin as random number
return (randVal % range) + begin;
}
Example (RAND_MAX := 16, begin := 2, end := 7)
=> range := 6 (1 + end - begin)
=> limit := 12 (RAND_MAX + 1) - ((RAND_MAX + 1) % range)
The limit is always a multiple of the range,
so we can split it into range-sized buckets:
Possible-rand-output: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
Buckets: [0, 1, 2, 3, 4, 5][0, 1, 2, 3, 4, 5][X, X, X, X, X]
Buckets + begin: [2, 3, 4, 5, 6, 7][2, 3, 4, 5, 6, 7][X, X, X, X, X]
1st call to rand() => 13
→ 13 is not in the bucket-range anymore (>= limit), while-condition is true
→ retry...
2nd call to rand() => 7
→ 7 is in the bucket-range (< limit), while-condition is false
→ Get the corresponding bucket-value 1 (randVal % range) and add begin
=> 3
In order to avoid the modulo bias (suggested in other answers) you can always use:
arc4random_uniform(MAX-MIN)+MIN
Where "MAX" is the upper bound and "MIN" is lower bound. For example, for numbers between 10 and 20:
arc4random_uniform(20-10)+10
arc4random_uniform(10)+10
Simple solution and better than using "rand() % N".
While Ryan is correct, the solution can be much simpler based on what is known about the source of the randomness. To re-state the problem:
There is a source of randomness, outputting integer numbers in range [0, MAX) with uniform distribution.
The goal is to produce uniformly distributed random integer numbers in range [rmin, rmax] where 0 <= rmin < rmax < MAX.
In my experience, if the number of bins (or "boxes") is significantly smaller than the range of the original numbers, and the original source is cryptographically strong - there is no need to go through all that rigamarole, and simple modulo division would suffice (like output = rnd.next() % (rmax+1), if rmin == 0), and produce random numbers that are distributed uniformly "enough", and without any loss of speed. The key factor is the randomness source (i.e., kids, don't try this at home with rand()).
Here's an example/proof of how it works in practice. I wanted to generate random numbers from 1 to 22, having a cryptographically strong source that produced random bytes (based on Intel RDRAND). The results are:
Rnd distribution test (22 boxes, numbers of entries in each box):
1: 409443 4.55%
2: 408736 4.54%
3: 408557 4.54%
4: 409125 4.55%
5: 408812 4.54%
6: 409418 4.55%
7: 408365 4.54%
8: 407992 4.53%
9: 409262 4.55%
10: 408112 4.53%
11: 409995 4.56%
12: 409810 4.55%
13: 409638 4.55%
14: 408905 4.54%
15: 408484 4.54%
16: 408211 4.54%
17: 409773 4.55%
18: 409597 4.55%
19: 409727 4.55%
20: 409062 4.55%
21: 409634 4.55%
22: 409342 4.55%
total: 100.00%
This is as close to uniform as I need for my purpose (fair dice throw, generating cryptographically strong codebooks for WWII cipher machines such as http://users.telenet.be/d.rijmenants/en/kl-7sim.htm, etc). The output does not show any appreciable bias.
Here's the source of cryptographically strong (true) random number generator:
Intel Digital Random Number Generator
and a sample code that produces 64-bit (unsigned) random numbers.
int rdrand64_step(unsigned long long int *therand)
{
unsigned long long int foo;
int cf_error_status;
asm("rdrand %%rax; \
mov $1,%%edx; \
cmovae %%rax,%%rdx; \
mov %%edx,%1; \
mov %%rax, %0;":"=r"(foo),"=r"(cf_error_status)::"%rax","%rdx");
*therand = foo;
return cf_error_status;
}
I compiled it on Mac OS X with clang-6.0.1 (straight), and with gcc-4.8.3 using "-Wa,q" flag (because GAS does not support these new instructions).
As said before modulo isn't sufficient because it skews the distribution. Heres my code which masks off bits and uses them to ensure the distribution isn't skewed.
static uint32_t randomInRange(uint32_t a,uint32_t b) {
uint32_t v;
uint32_t range;
uint32_t upper;
uint32_t lower;
uint32_t mask;
if(a == b) {
return a;
}
if(a > b) {
upper = a;
lower = b;
} else {
upper = b;
lower = a;
}
range = upper - lower;
mask = 0;
//XXX calculate range with log and mask? nah, too lazy :).
while(1) {
if(mask >= range) {
break;
}
mask = (mask << 1) | 1;
}
while(1) {
v = rand() & mask;
if(v <= range) {
return lower + v;
}
}
}
The following simple code lets you look at the distribution:
int main() {
unsigned long long int i;
unsigned int n = 10;
unsigned int numbers[n];
for (i = 0; i < n; i++) {
numbers[i] = 0;
}
for (i = 0 ; i < 10000000 ; i++){
uint32_t rand = random_in_range(0,n - 1);
if(rand >= n){
printf("bug: rand out of range %u\n",(unsigned int)rand);
return 1;
}
numbers[rand] += 1;
}
for(i = 0; i < n; i++) {
printf("%u: %u\n",i,numbers[i]);
}
}
Will return a floating point number in the range [0,1]:
#define rand01() (((double)random())/((double)(RAND_MAX)))

Algorithm for calculating trigonometry, logarithms or something like that. ONLY addition-subtraction

I am restoring the Ascota 170 antique mechanical programmable computer. It is already working.
Now I’m looking for an algorithm to demonstrate its capabilities — like calculating trigonometric or logarithmic tables. Or something like that.
Unfortunately, from mathematical operations, a computer is only capable of adding and subtracting integers (55 registers from -1E12 to 1E12). There is not even a shift-to-digit operation — so that it can be programmatically implemented to multiply only by very small numbers.
But its logical operations are very well developed.
Could you advise me any suitable algorithm?
So what you're doing is really kinda awesome. And as it happens, I can explain quite a bit about how to implement fractional logarithms using only integer addition and subtraction! This post is going to be long, but there's lots of detail included, and a working implementation at the end, and it should be enough for you to do some fun things with your weird mechanical computer.
Implementing Comparisons
You're going to need to be able to compare numbers. While you said you can perform comparisons == 0 and > 0, that's not really quite enough for most of the interesting algorithms you'll want to implement. You need relative comparisons, which can be determined via subtraction:
isLessThan(a, b):
diff = b - a
if diff > 0 then return true
else return false
isGreaterThan(a, b):
diff = a - b
if diff > 0 then return true
else return false
isLessThanOrEqual(a, b):
diff = a - b
if diff > 0 then return false
else return true
isGreaterThanOrEqual(a, b):
diff = b - a
if diff > 0 then return false
else return true
For the rest of this post, I'm just going to write the simpler form of a > b, but if you can't do that directly, you can substitute in one of the operations above.
Implementing Shifts
Now, since you don't have digit-shifting hardware, you'll have to create "routines" to implement it. A left-shift is easy: Add a number to itself, and again, and again, and then add the original number, and then add it one more time; and that's the equivalent of shifting left by 1 digit.
So shift left by one digit, or multiply-by-ten:
shiftLeft(value):
value2 = value + value
value4 = value2 + value2
value5 = value4 + value
return value5 + value5
Shifting by many digits is just repeated invocation of shiftLeft():
shl(value, count):
repeat:
if count <= 0 then goto done
value = shiftLeft(value)
count = count - 1
done:
return value
Shifting right by one digit is a little harder: We need to do this with repeated subtraction and addition, as in the pseudocode below:
shr(value, count):
if count == 0 then return value
index = 11
shifted = 0
repeat1:
if index < 0 then goto done
adder = shl(1, index - count)
subtractor = shl(adder, count)
repeat2:
if value <= subtractor then goto next
value = value - subtractor
shifted = shifted + adder
goto repeat2
next:
index = index - 1
goto repeat1
done:
return count
Conveniently, since it's hard to shift right in the first place, the algorithm lets us directly choose how many digits to shift by.
Multiplication
It looks like your hardware might have multiplication? But if it doesn't, you can implement multiplication using repeated addition and shifting. Binary multiplication is the easiest form to implement that's actually efficient, and that requires us to first implement multiplyByTwo() and divideByTwo(), using the same basic techniques that we used to implement shiftLeft() and shr().
Once you have those implemented, multiplication involves repeatedly slicing off the last bit of one of the numbers, and if that bit is a 1, then adding a growing version of the other number to the running total:
multiply(a, b):
product = 0
repeat:
if b <= 0 then goto done
nextB = divideByTwo(b)
bit = b - multiplyByTwo(nextB)
if bit == 0 then goto skip
product = product + a
skip:
a = a + a
b = nextB
goto repeat
done:
return product
A full implementation of this is included below, if you need it.
Integer Logarithms
We can use our ability to shift right by a digit to calculate the integer part of the base-10 logarithm of a number — this is really just how many times you can shift the number right before you reach a number too small to shift.
integerLogarithm(value):
count = 0
repeat:
if value <= 9 then goto done
value = shiftRight(value)
count = count + 1
goto repeat
done:
return count
So for 0-9, this returns 0; for 10-99, this returns 1; for 100-999 this returns 2, and so on.
Integer Exponents
The opposite of the above algorithm is pretty trivial: To calculate 10 raised to an integer power, we just shift the digits left by the power.
integerExponent(count):
value = shl(1, count)
return value
So for 0, this returns 1; for 1, this return 10; for 2, this returns 100; for 3, this returns 1000; and so on.
Splitting the Integer and Fraction
Now that we can handle integer powers and logarithms, we're almost ready to handle the fractional part. But before we can really talk about how to compute the fractional part of the logarithm, we have to talk about how to divide up the problem so we can compute the fractional part separately from the integer part. Ideally, we only want to deal with computing logarithms for numbers in a fixed range — say, from 1 to 10, rather than from 1 to infinity.
We can use our integer logarithm and exponent routines to slice up the full logarithm problem so that we're always dealing with a value in the range of [1, 10), no matter what the input number was.
First, we calculate the integer logarithm, and then the integer exponent, and then we subtract that from the original number. Whatever is left over is the fractional part that we need to calculate: And then the only remaining exercise is to shift that fractional part so that it's always in a consistent range.
normalize(value):
intLog = integerLogarithm(value) // From 0 to 12 (meaningful digits)
if intLog <= 5 then goto lessThan
value = shr(value, intLog - 5)
goto done
lessThan:
value = shl(value, 5 - intLog)
done:
return value
You can convince yourself with relatively little effort that no matter what the original value was, its highest nonzero digit will be moved to column 7: So "12345" will become "000000123450" (i.e., "0000001.23450"). This allows us to pretend that there's always an invisible decimal point a little more than halfway down the number, so that now we only need to solve the problem of calculating logarithms of values in the range of [1, 10).
(Why "more than halfway"? We will need the upper half of the value to always be zero, and you'll see why in a moment.)
Fractional Logarithms
Knuth explains how to do this in The Art of Computer Programming, section 1.2.2. Our goal will be to calculate log10(x) so that for some values of b1, b2, b3 ... , where n is already 0 (because we split out the integer portion above):
log10(x) = n + b1/2 + b2/4 + b3/8 + b4/16 + ...
Knuth says that we can obtain b1, b2, b3 ... like this:
To obtain b1, b2, ..., we now set x0 = x / 10^n and, for k >= 1,
b[k] = 0, x[k] = x[k-1] ^ 2, if x[k-1] ^ 2 < 10;
b[k] = 1, x[k] = x[k-1] ^ 2 / 10, if x[k-1] ^ 2 >= 10.
That is to say, each step uses pseudocode loop something like this:
fractionalLogarithm(x):
for i = 1 to numberOfBinaryDigitsOfPrecision:
nextX = x * x
if nextX < 10 then:
b[i] = 0
else:
b[i] = 1
nextX = nextX / 10
In order for this to work using the fixed-point numbers we have above, we have to implement x * x using a shift to move the decimal point back into place, which will lose some digits. This will cause error to propagate, as Knuth says, but it will give enough accuracy that it's good enough for demonstration purposes.
So given a fractional value generated by normalize(value), we can compute its fractional binary logarithm like this:
fractionalLogarithm(value):
for i = 1 to 20:
value = shr(value * value, 6)
if value < 1000000 then:
b[i] = 0
else:
b[i] = 1
value = shr(value, 1)
But a binary fractional logarithm — individual bits! — isn't especially useful, especially since we computed an decimal version of the integer part of the logarithm in the earlier step. So we'll modify this one more time, to calculate a decimal fractional logarithm, to five places, instead of calculating an array of bits; for that, we'll need a table of 20 values that represent the conversions of each of those bits to decimal, and we'll store them as fixed-point as well:
table[1] = 1/(2^1) = 1/2 = 500000
table[2] = 1/(2^2) = 1/4 = 250000
table[3] = 1/(2^3) = 1/8 = 125000
table[4] = 1/(2^4) = 1/16 = 062500
table[5] = 1/(2^5) = 1/32 = 031250
table[6] = 1/(2^6) = 1/64 = 015625
...
table[17] = 1/(2^17) = 1/131072 = 000008
table[18] = 1/(2^18) = 1/262144 = 000004
table[19] = 1/(2^19) = 1/514288 = 000002
table[20] = 1/(2^20) = 1/1048576 = 000001
So now with that table, we can produce the whole fractional logarithm, using pure integer math:
fractionalLogarithm(value):
log = 0
for i = 1 to 20:
value = shr(value * value, 6)
if value >= 1000000 then:
log = log + table[i]
value = shr(value, 1)
return log
Putting It All Together
Finally, for a complete logarithm of any integer your machine can represent, this is the whole thing, which will compute the logarithm with six digits of precision, in the form "0000XX.XXXXXX":
log(value):
intPart = integerLogarithm(value)
value = normalize(value)
fracPart = fractionalLogarithm(value)
result = shl(intPart, 6) + fracPart
return result
Demonstration
To show that the math works — and that it works pretty well! — below is a JavaScript implementation of the above algorithm. It uses pure integer math: Only addition, subtraction, and relative comparison. Functions are used to organize the code, but they behave like subroutines: They're not recursive, and don't nest very deeply.
You can try it out live (click the 'Run' button and type 12345 in the input field). Compare the result to the standard Math.log() function, and you'll see how close the pure-integer version gets:
function shiftLeft(value) {
var value2 = value + value;
var value4 = value2 + value2;
var value5 = value4 + value;
return value5 + value5;
}
function shl(value, count) {
while (count > 0) {
value = shiftLeft(value);
count = count - 1;
}
return value;
}
function shr(value, count) {
if (count == 0) return value;
var index = 11;
var shifted = 0;
while (index >= 0) {
var adder = shl(1, index - count);
var subtractor = shl(adder, count);
while (value > subtractor) {
value = value - subtractor;
shifted = shifted + adder;
}
index = index - 1;
}
return shifted;
}
//-----------------------------------
function multiplyByTwo(value) {
return value + value;
}
function multiplyByPowerOfTwo(value, count) {
while (count > 0) {
value = value + value;
count = count - 1;
}
return value;
}
function divideByPowerOfTwo(value, count) {
if (count == 0) return value;
var index = 39; // lg(floor(pow(10, 12)))
var shifted = 0;
while (index >= 0) {
var adder = multiplyByPowerOfTwo(1, index - count);
var subtractor = multiplyByPowerOfTwo(adder, count);
while (value >= subtractor) {
value = value - subtractor;
shifted = shifted + adder;
}
index = index - 1;
}
return shifted;
}
function divideByTwo(value) {
return divideByPowerOfTwo(value, 1);
}
function multiply(a, b) {
var product = 0;
while (b > 0) {
nextB = divideByTwo(b);
bit = b - multiplyByTwo(nextB);
if (bit != 0) {
product += a;
}
a = a + a;
b = nextB;
}
return product;
}
//-----------------------------------
var logTable = {
"1": 500000,
"2": 250000,
"3": 125000,
"4": 62500,
"5": 31250,
"6": 15625,
"7": 7813,
"8": 3906,
"9": 1953,
"10": 977,
"11": 488,
"12": 244,
"13": 122,
"14": 61,
"15": 31,
"16": 15,
"17": 8,
"18": 4,
"19": 2,
"20": 1,
};
//-----------------------------------
function integerLogarithm(value) {
var count = 0;
while (value > 9) {
value = shr(value, 1);
count = count + 1;
}
return count;
}
function normalize(value) {
var intLog = integerLogarithm(value);
if (intLog > 5)
value = shr(value, intLog - 5);
else
value = shl(value, 5 - intLog);
return value;
}
function fractionalLogarithm(value) {
var log = 0;
for (i = 1; i < 20; i++) {
var squaredValue = multiply(value, value);
value = shr(squaredValue, 5);
if (value >= 1000000) {
log = log + logTable[i];
value = shr(value, 1);
}
}
return log;
}
function log(value) {
var intPart = integerLogarithm(value);
value = normalize(value);
var fracPart = fractionalLogarithm(value);
var result = shl(intPart, 6) + fracPart;
return result;
}
//-----------------------------------
// Just a little jQuery event handling to wrap a UI around the above functions.
$("#InputValue").on("keydown keyup keypress focus blur", function(e) {
var inputValue = Number(this.value.replace(/[^0-9]+/g, ''));
var outputValue = log(inputValue);
$("#OutputValue").text(outputValue / 1000000);
var trueResult = Math.floor((Math.log(inputValue) / Math.log(10)) * 1000000 + 0.5) / 1000000
$("#TrueResult").text(trueResult);
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
Input integer: <input type="text" id="InputValue" /><br /><br />
Result using integer algorithm: <span id="OutputValue"></span><br /><br />
True logarithm: <span id="TrueResult"></span><br />
As I mentioned in your Original question on SE/RC for pow,sqrt,n-root,log,exp see:
Power by squaring for negative exponents
and all the sub-links in there.
Once you got working *,/,<<,>> (which the other answer covers well) and may fixed point instead of floating you can also start computing goniometrics. For that the best is use Chebyshev series but as I lack the math behind them I can use only already precomputed ones ... Taylor is a common knowledge so computing that should be easy here what I code for my arithmetics template to cover math for arbitrary math data types (bignums):
// Taylor goniometric https://en.wikipedia.org/wiki/Taylor_series
friend T sin (const T &x) // = sin(x)
{
int i; T z,dz,x2,a,b;
x2=x/(pi+pi); x2-=::integer(x2); x2*=pi+pi;
for (z=x2,a=x2,b=1,x2*=x2,i=2;;)
{
a*=x2; b*=i; i++; b*=i; i++; dz=a/b; z-=dz;
a*=x2; b*=i; i++; b*=i; i++; dz=a/b; z+=dz;
if (::abs(dz)<zero) break;
}
return z;
}
friend T cos (const T &x) // = cos(x)
{
int i; T z,dz,x2,a,b;
x2=x/(pi+pi); x2-=::integer(x2); x2*=pi+pi;
for (z=1,a=1,b=1,x2*=x2,i=1;;)
{
a*=x2; b*=i; i++; b*=i; i++; dz=a/b; z-=dz;
a*=x2; b*=i; i++; b*=i; i++; dz=a/b; z+=dz;
if (::abs(dz)<zero) break;
}
return z;
}
friend T tan (const T &x) // = tan(x)
{
int i; T z0,z1,dz,x1,x2,a,b;
x1=x/pi; x1-=::integer(x1); x1*=pi; x2=x1*x1;
for (z0=1,z1=1,a=1,b=1,i=2;;)
{
a*=x2; b*=i; i++; dz=a/b; z0-=dz; // z0=cos(x)
b*=i; i++; dz=a/b; z1-=dz; // z1=sin(x)/x
a*=x2; b*=i; i++; dz=a/b; z0+=dz;
b*=i; i++; dz=a/b; z1+=dz;
if (::abs(dz)<zero) break;
}
return (x1*z1)/z0;
}
friend T ctg (const T &x) // = cotan(x)
{
int i; T z0,z1,dz,x1,x2,a,b;
x1=x/pi; x1-=::integer(x1); x1*=pi; x2=x1*x1;
for (z0=1,z1=1,a=1,b=1,i=2;;)
{
a*=x2; b*=i; i++; dz=a/b; z0-=dz; // z0=cos(x)
b*=i; i++; dz=a/b; z1-=dz; // z1=sin(x)/x
a*=x2; b*=i; i++; dz=a/b; z0+=dz;
b*=i; i++; dz=a/b; z1+=dz;
if (::abs(dz)<zero) break;
}
return z0/(x1*z1);
}
friend T asin (const T &x) // = asin(x)
{
if (x<=-1.0) return -0.5*pi;
if (x>=+1.0) return +0.5*pi;
return ::atan(x/::sqrt(1.0-(x*x)));
}
friend T acos (const T &x){ T z; z=0.5*pi-::asin(x); return z; } // = acos(x)
friend T atan (const T &x) // = atan(x)
{
bool _shift=false;
bool _invert=false;
bool _negative=false;
T z,dz,x1,x2,a,b; x1=x;
if (x1<0.0) { _negative=true; x1=-x1; }
if (x1>1.0) { _invert=true; x1=1.0/x1; }
if (x1>0.7) { _shift=true; b=::sqrt(3.0)/3.0; x1=(x1-b)/(1.0+(x1*b)); }
for (x2=x1*x1,z=x1,a=x1,b=1;;) // if x1>0.8 convergence is slow
{
a*=x2; b+=2; dz=a/b; z-=dz;
a*=x2; b+=2; dz=a/b; z+=dz;
if (::abs(dz)<zero) break;
}
if (_shift) z+=pi/6.0;
if (_invert) z=0.5*pi-z;
if (_negative) z=-z;
return z;
}
friend T actg (const T &x){ T z; z=::atan(1.0/x); return z; } // = acotan(x)
friend T atan2 (const T &y,const T &x){ return atanxy(x,y); } // = atan(y/x)
friend T atanxy (const T &x,const T &y) // = atan(y/x)
{
int sx,sy; T a;
T _zero=1.0e-30;
sx=0; if (x<-_zero) sx=-1; if (x>+_zero) sx=+1;
sy=0; if (y<-_zero) sy=-1; if (y>+_zero) sy=+1;
if ((sy==0)&&(sx==0)) return 0.0;
if ((sx==0)&&(sy> 0)) return 0.5*x.pi;
if ((sx==0)&&(sy< 0)) return 1.5*x.pi;
if ((sy==0)&&(sx> 0)) return 0.0;
if ((sy==0)&&(sx< 0)) return x.pi;
a=y/x; if (a<0) a=-a;
a=::atan(a);
if ((sx>0)&&(sy>0)) a=a;
if ((sx<0)&&(sy>0)) a=x.pi-a;
if ((sx<0)&&(sy<0)) a=x.pi+a;
if ((sx>0)&&(sy<0)) a=x.pi+x.pi-a;
return a;
}
As I mentioned you need to use floating or fixed point for this as the results are not integers !!!
But as I mentioned before CORDIC is better suited for computing on integers (if you search there where some QAs here on SE/SO with C++ code for this).
IIRC it exploit some (arc)tan angle summation identity that leads to a nicely computable on integers delta angle something like sqrt(1+x*x) which is easily computable on integers. With binary search or approximation/iteration you can compute the tan of any angle and using goniometric identities you can compute any cotan sin and cos ... But I might be wrong as I do not use CORDIC and read about it a long time ago
Anyway once you got some function its inverse can be usually computed with binary search.

Can this function be refactored to be O(1)

I have this function which is used to calculate a value with diminishing returns. It counts how often an ever increasing value can be subtracted from the input value and returns the number of subtractions. It is currently implemented iteratively with an infinite loop:
// inputValue is our parameter. It is manipulated in the method body.
// step counts how many subtractions have been performed so far. It is also our returned value.
// loss is the value that is being subtracted from the inputValue at each step. It grows polynomially with each step.
public int calculate(int inputValue) {
for (int step = 1; true; step++) {// infinite java for-each loop
int loss = (int) (1 + 0.0006 * step*step + 0.2 * step);
if (inputValue > loss) {
inputValue -= loss;
} else {
return step;
}
}
}
This function is used in various places within the larger application and sometimes in performance critical code. I would prefer it to be refactored in a way which does not require the loop anymore.
I am fairly sure that it is possible to somehow calculate the result more directly. But my mathematical skills seem to be insufficient to do this.
Can anybody show me a function which produces identical results without the need for a loop or recursion? It is OK if the refactored code may produce different results for extreme values and corner cases. Negative inputs need not be considered.
Thank you all in advance.
I don't think you can make the code faster preserving the exact logic. Particularly you have some hard to emulate rounding at
int loss = (int) (1 + 0.0006 * step*step + 0.2 * step);
If this is a requirement of your business logic rather than a bug, I don't think you can do significantly better. On the other hand if what you really want is something like (from the syntax I assumed you use Java):
public static int calculate_double(int inputValue) {
double value = inputValue;
for (int step = 1; true; step++) {// infinite java for-each loop
double loss = (1 + 0.0006 * step * step + 0.2 * step); // no rounding!
if (value > loss) {
value -= loss;
} else {
return step;
}
}
}
I.e. the same logic but without a rounding at every step, then there are some hopes.
Note: unfortunately this rounding does make a difference. For example, according to my test the output of calculate and calculate_double are slightly different for every inputValue in the range of [4, 46465] (sometimes even more than by +1, for example for inputValue = 1000 it is calculate = 90 vs calculate_double = 88). For bigger inputValue the results are more consistent. For example for the result of 519/520 the range of difference is only [55294, 55547]. Still for every results there is some range of different results.
First of all, the sum of loss in the case of no rounding for a given max step (let's call it n) has a closed formula:
sum(n) = n + 0.0006*n*(n+1)*(2n+1)/6 + 0.2*n*(n+1)/2
So theoretically finding such n so that sum(n) < inputValue < sum(n+1) can by done by solving the cubic equation sum(x) = inputValue which has a closed formula and then checking values like floor(x) and ceil(x). However the math behind this is a bit complicated so I didn't went that route.
Please also note that since int has a limited range, theoretically even your implementation of the algorithm is O(1) (because it will never take more steps than to compute calculate(Integer.MAX_VALUE) which is a constant). So probably what you really want is just a significant speed up.
Unfortunately the coefficients 0.0006 and 0.2 are small enough to make different summands the dominant part of the sum for different n. Still you can use binary search for a much better performance:
static int sum(int step) {
// n + 0.2 * n*(n+1)/2 + 0.0006 * n*(n+1)*(2n+1)/6
// ((0.0001*(2n+1) + 0.1) * (n+1) + 1) * n
double s = ((0.0001 * (2 * step + 1) + 0.1) * (step + 1) + 1) * step;
return (int) s;
}
static int calc_bin_search2(int inputValue) {
int left = 0;
// inputValue / 2 is a safe estimate, the answer for 100 is 27 or 28
int right = inputValue < 100 ? inputValue : inputValue / 2;
// for big inputValue reduce right more aggressively before starting the binary search
if (inputValue > 1000) {
while (true) {
int test = right / 8;
int tv = sum(test);
if (tv > inputValue)
right = test;
else {
left = test;
break;
}
}
}
// just usual binary search
while (true) {
int mid = (left + right) / 2;
int mv = sum(mid);
if (mv == inputValue)
return mid;
else if (mid == left) {
return mid + 1;
} else if (mv < inputValue)
left = mid;
else
right = mid;
}
}
Note: the return mid + 1 is the copy of your original logic that returns one step after the last loss was subtracted.
In my tests this implementation matches the output of calculate_double and has roughly the same performance for inputValue under 1000, is x50 faster for values around 1_000_000, and x200 faster for values around 1_000_000_000

Finding the number of digits of an integer

What is the best method to find the number of digits of a positive integer?
I have found this 3 basic methods:
conversion to string
String s = new Integer(t).toString();
int len = s.length();
for loop
for(long long int temp = number; temp >= 1;)
{
temp/=10;
decimalPlaces++;
}
logaritmic calculation
digits = floor( log10( number ) ) + 1;
where you can calculate log10(x) = ln(x) / ln(10) in most languages.
First I thought the string method is the dirtiest one but the more I think about it the more I think it's the fastest way. Or is it?
There's always this method:
n = 1;
if ( i >= 100000000 ) { n += 8; i /= 100000000; }
if ( i >= 10000 ) { n += 4; i /= 10000; }
if ( i >= 100 ) { n += 2; i /= 100; }
if ( i >= 10 ) { n += 1; }
Well the correct answer would be to measure it - but you should be able to make a guess about the number of CPU steps involved in converting strings and going through them looking for an end marker
Then think how many FPU operations/s your processor can do and how easy it is to calculate a single log.
edit: wasting some more time on a monday morning :-)
String s = new Integer(t).toString();
int len = s.length();
One of the problems with high level languages is guessing how much work the system is doing behind the scenes of an apparently simple statement. Mandatory Joel link
This statement involves allocating memory for a string, and possibly a couple of temporary copies of a string. It must parse the integer and copy the digits of it into a string, possibly having to reallocate and move the existing memory if the number is large. It might have to check a bunch of locale settings to decide if your country uses "," or ".", it might have to do a bunch of unicode conversions.
Then finding the length has to scan the entire string, again considering unicode and any local specific settings such as - are you in a right->left language?.
Alternatively:
digits = floor( log10( number ) ) + 1;
Just because this would be harder for you to do on paper doesn't mean it's hard for a computer! In fact a good rule in high performance computing seems to have been - if something is hard for a human (fluid dynamics, 3d rendering) it's easy for a computer, and if it's easy for a human (face recognition, detecting a voice in a noisy room) it's hard for a computer!
You can generally assume that the builtin maths functions log/sin/cos etc - have been an important part of computer design for 50years. So even if they don't map directly into a hardware function in the FPU you can bet that the alternative implementation is pretty efficient.
I don't know, and the answer may well be different depending on how your individual language is implemented.
So, stress test it! Implement all three solutions. Run them on 1 through 1,000,000 (or some other huge set of numbers that's representative of the numbers the solution will be running against) and time how long each of them takes.
Pit your solutions against one another and let them fight it out. Like intellectual gladiators. Three algorithms enter! One algorithm leaves!
Test conditions
Decimal numeral system
Positive integers
Up to 10 digits
Language: ActionScript 3
Results
digits: [1,10],
no. of runs: 1,000,000
random sample: 8777509,40442298,477894,329950,513,91751410,313,3159,131309,2
result: 7,8,6,6,3,8,3,4,6,1
CONVERSION TO STRING: 724ms
LOGARITMIC CALCULATION: 349ms
DIV 10 ITERATION: 229ms
MANUAL CONDITIONING: 136ms
Note: Author refrains from making any conclusions for numbers with more than 10 digits.
Script
package {
import flash.display.MovieClip;
import flash.utils.getTimer;
/**
* #author Daniel
*/
public class Digits extends MovieClip {
private const NUMBERS : uint = 1000000;
private const DIGITS : uint = 10;
private var numbers : Array;
private var digits : Array;
public function Digits() {
// ************* NUMBERS *************
numbers = [];
for (var i : int = 0; i < NUMBERS; i++) {
var number : Number = Math.floor(Math.pow(10, Math.random()*DIGITS));
numbers.push(number);
}
trace('Max digits: ' + DIGITS + ', count of numbers: ' + NUMBERS);
trace('sample: ' + numbers.slice(0, 10));
// ************* CONVERSION TO STRING *************
digits = [];
var time : Number = getTimer();
for (var i : int = 0; i < numbers.length; i++) {
digits.push(String(numbers[i]).length);
}
trace('\nCONVERSION TO STRING - time: ' + (getTimer() - time));
trace('sample: ' + digits.slice(0, 10));
// ************* LOGARITMIC CALCULATION *************
digits = [];
time = getTimer();
for (var i : int = 0; i < numbers.length; i++) {
digits.push(Math.floor( Math.log( numbers[i] ) / Math.log(10) ) + 1);
}
trace('\nLOGARITMIC CALCULATION - time: ' + (getTimer() - time));
trace('sample: ' + digits.slice(0, 10));
// ************* DIV 10 ITERATION *************
digits = [];
time = getTimer();
var digit : uint = 0;
for (var i : int = 0; i < numbers.length; i++) {
digit = 0;
for(var temp : Number = numbers[i]; temp >= 1;)
{
temp/=10;
digit++;
}
digits.push(digit);
}
trace('\nDIV 10 ITERATION - time: ' + (getTimer() - time));
trace('sample: ' + digits.slice(0, 10));
// ************* MANUAL CONDITIONING *************
digits = [];
time = getTimer();
var digit : uint;
for (var i : int = 0; i < numbers.length; i++) {
var number : Number = numbers[i];
if (number < 10) digit = 1;
else if (number < 100) digit = 2;
else if (number < 1000) digit = 3;
else if (number < 10000) digit = 4;
else if (number < 100000) digit = 5;
else if (number < 1000000) digit = 6;
else if (number < 10000000) digit = 7;
else if (number < 100000000) digit = 8;
else if (number < 1000000000) digit = 9;
else if (number < 10000000000) digit = 10;
digits.push(digit);
}
trace('\nMANUAL CONDITIONING: ' + (getTimer() - time));
trace('sample: ' + digits.slice(0, 10));
}
}
}
This algorithm might be good also, assuming that:
Number is integer and binary encoded (<< operation is cheap)
We don't known number boundaries
var num = 123456789L;
var len = 0;
var tmp = 1L;
while(tmp < num)
{
len++;
tmp = (tmp << 3) + (tmp << 1);
}
This algorithm, should have speed comparable to for-loop (2) provided, but a bit faster due to (2 bit-shifts, add and subtract, instead of division).
As for Log10 algorithm, it will give you only approximate answer (that is close to real, but still), since analytic formula for computing Log function have infinite loop and can't be calculated precisely Wiki.
Use the simplest solution in whatever programming language you're using. I can't think of a case where counting digits in an integer would be the bottleneck in any (useful) program.
C, C++:
char buffer[32];
int length = sprintf(buffer, "%ld", (long)123456789);
Haskell:
len = (length . show) 123456789
JavaScript:
length = String(123456789).length;
PHP:
$length = strlen(123456789);
Visual Basic (untested):
length = Len(str(123456789)) - 1
conversion to string: This will have to iterate through each digit, find the character that maps to the current digit, add a character to a collection of characters. Then get the length of the resulting String object. Will run in O(n) for n=#digits.
for-loop: will perform 2 mathematical operation: dividing the number by 10 and incrementing a counter. Will run in O(n) for n=#digits.
logarithmic: Will call log10 and floor, and add 1. Looks like O(1) but I'm not really sure how fast the log10 or floor functions are. My knowledge of this sort of things has atrophied with lack of use so there could be hidden complexity in these functions.
So I guess it comes down to: is looking up digit mappings faster than multiple mathematical operations or whatever is happening in log10? The answer will probably vary. There could be platforms where the character mapping is faster, and others where doing the calculations is faster. Also to keep in mind is that the first method will creats a new String object that only exists for the purpose of getting the length. This will probably use more memory than the other two methods, but it may or may not matter.
You can obviously eliminate the method 1 from the competition, because the atoi/toString algorithm it uses would be similar to method 2.
Method 3's speed depends on whether the code is being compiled for a system whose instruction set includes log base 10.
For very large integers, the log method is much faster. For instance, with a 2491327 digit number (the 11920928th Fibonacci number, if you care), Python takes several minutes to execute the divide-by-10 algorithm, and milliseconds to execute 1+floor(log(n,10)).
import math
def numdigits(n):
return ( int(math.floor(math.log10(n))) + 1 )
Regarding the three methods you propose for "determining the number of digits necessary to represent a given number in a given base", I don't like any of them, actually; I prefer the method I give below instead.
Re your method #1 (strings): Anything involving converting back-and-forth between strings and numbers is usually very slow.
Re your method #2 (temp/=10): This is fatally flawed because it assumes that x/10 always means "x divided by 10". But in many programming languages (eg: C, C++), if "x" is an integer type, then "x/10" means "integer division", which isn't the same thing as floating-point division, and it introduces round-off errors at every iteration, and they accumulate in a recursive formula such as your solution #2 uses.
Re your method #3 (logs): it's buggy for large numbers (at least in C, and probably other languages as well), because floating-point data types tend not to be as precise as 64-bit integers.
Hence I dislike all 3 of those methods: #1 works but is slow, #2 is broken, and #3 is buggy for large numbers. Instead, I prefer this, which works for numbers from 0 up to about 18.44 quintillion:
unsigned NumberOfDigits (uint64_t Number, unsigned Base)
{
unsigned Digits = 1;
uint64_t Power = 1;
while ( Number / Power >= Base )
{
++Digits;
Power *= Base;
}
return Digits;
}
Keep it simple:
long long int a = 223452355415634664;
int x;
for (x = 1; a >= 10; x++)
{
a = a / 10;
}
printf("%d", x);
You can use a recursive solution instead of a loop, but somehow similar:
#tailrec
def digits (i: Long, carry: Int=1) : Int = if (i < 10) carry else digits (i/10, carry+1)
digits (8345012978643L)
With longs, the picture might change - measure small and long numbers independently against different algorithms, and pick the appropriate one, depending on your typical input. :)
Of course nothing beats a switch:
switch (x) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9: return 1;
case 10: case 11: // ...
case 99: return 2;
case 100: // you get the point :)
default: return 10; // switch only over int
}
except a plain-o-array:
int [] size = {1,1,1,1,1,1,1,1,1,2,2,2,2,2,... };
int x = 234561798;
return size [x];
Some people will tell you to optimize the code-size, but yaknow, premature optimization ...
log(x,n)-mod(log(x,n),1)+1
Where x is a the base and n is the number.
Here is the measurement in Swift 4.
Algorithms code:
extension Int {
var numberOfDigits0: Int {
var currentNumber = self
var n = 1
if (currentNumber >= 100000000) {
n += 8
currentNumber /= 100000000
}
if (currentNumber >= 10000) {
n += 4
currentNumber /= 10000
}
if (currentNumber >= 100) {
n += 2
currentNumber /= 100
}
if (currentNumber >= 10) {
n += 1
}
return n
}
var numberOfDigits1: Int {
return String(self).count
}
var numberOfDigits2: Int {
var n = 1
var currentNumber = self
while currentNumber > 9 {
n += 1
currentNumber /= 10
}
return n
}
}
Measurement code:
var timeInterval0 = Date()
for i in 0...10000 {
i.numberOfDigits0
}
print("timeInterval0: \(Date().timeIntervalSince(timeInterval0))")
var timeInterval1 = Date()
for i in 0...10000 {
i.numberOfDigits1
}
print("timeInterval1: \(Date().timeIntervalSince(timeInterval1))")
var timeInterval2 = Date()
for i in 0...10000 {
i.numberOfDigits2
}
print("timeInterval2: \(Date().timeIntervalSince(timeInterval2))")
Output
timeInterval0: 1.92149806022644
timeInterval1: 0.557608008384705
timeInterval2: 2.83262193202972
On this measurement basis String conversion is the best option for the Swift language.
I was curious after seeing #daniel.sedlacek results so I did some testing using Swift for numbers having more than 10 digits. I ran the following script in the playground.
let base = [Double(100090000000), Double(100050000), Double(100050000), Double(100000200)]
var rar = [Double]()
for i in 1...10 {
for d in base {
let v = d*Double(arc4random_uniform(UInt32(1000000000)))
rar.append(v*Double(arc4random_uniform(UInt32(1000000000))))
rar.append(Double(1)*pow(1,Double(i)))
}
}
print(rar)
var timeInterval = NSDate().timeIntervalSince1970
for d in rar {
floor(log10(d))
}
var newTimeInterval = NSDate().timeIntervalSince1970
print(newTimeInterval-timeInterval)
timeInterval = NSDate().timeIntervalSince1970
for d in rar {
var c = d
while c > 10 {
c = c/10
}
}
newTimeInterval = NSDate().timeIntervalSince1970
print(newTimeInterval-timeInterval)
Results of 80 elements
0.105069875717163 for floor(log10(x))
0.867973804473877 for div 10 iterations
Adding one more approach to many of the already mentioned approaches.
The idea is to use binarySearch on an array containing the range of integers based on the digits of the int data type.
The signature of Java Arrays class binarySearch is :
binarySearch(dataType[] array, dataType key) which returns the index of the search key, if it is contained in the array; otherwise, (-(insertion point) – 1).
The insertion point is defined as the point at which the key would be inserted into the array.
Below is the implementation:
static int [] digits = {9,99,999,9999,99999,999999,9999999,99999999,999999999,Integer.MAX_VALUE};
static int digitsCounter(int N)
{
int digitCount = Arrays.binarySearch(digits , N<0 ? -N:N);
return 1 + (digitCount < 0 ? ~digitCount : digitCount);
}
Please note that the above approach only works for : Integer.MIN_VALUE <= N <= Integer.MAX_VALUE, but can be easily extended for Long data type by adding more values to the digits array.
For example,
I) for N = 555, digitCount = Arrays.binarySearch(digits , 555) returns -3 (-(2)-1) as it's not present in the array but is supposed to be inserted at point 2 between 9 & 99 like [9, 55, 99].
As the index we got is negative we need to take the bitwise compliment of the result.
At last, we need to add 1 to the result to get the actual number of digits in the number N.
In Swift 5.x, you get the number of digit in integer as below :
Convert to string and then count number of character in string
let nums = [1, 7892, 78, 92, 90]
for i in nums {
let ch = String(describing: i)
print(ch.count)
}
Calculating the number of digits in integer using loop
var digitCount = 0
for i in nums {
var tmp = i
while tmp >= 1 {
tmp /= 10
digitCount += 1
}
print(digitCount)
}
let numDigits num =
let num = abs(num)
let rec numDigitsInner num =
match num with
| num when num < 10 -> 1
| _ -> 1 + numDigitsInner (num / 10)
numDigitsInner num
F# Version, without casting to a string.

Multiplication of very long integers

Is there an algorithm for accurately multiplying two arbitrarily long integers together? The language I am working with is limited to 64-bit unsigned integer length (maximum integer size of 18446744073709551615). Realistically, I would like to be able to do this by breaking up each number, processing them somehow using the unsigned 64-bit integers, and then being able to put them back together in to a string (which would solve the issue of multiplied result storage).
Any ideas?
Most languages have functions or libraries that do this, usually called a Bignum library (GMP is a good one.)
If you want to do it yourself, I would do it the same way that people do long multiplication on paper. To do this you could either work with strings containing the number, or do it in binary using bitwise operations.
Example:
45
x67
---
315
+270
----
585
Or in binary:
101
x101
----
101
000
+101
------
11001
Edit: After doing it in binary I realized that it would be much simpler (and faster of course) to code using bitwise operations instead of strings containing the base-10 numbers. I've edited my binary multiplying example to show a pattern: for each 1-bit in the bottom number, add the top number, bit-shifted left the position of the 1-bit times to a variable. At the end, that variable will contain the product.
To store the product, you'll have to have two 64-bit numbers and imagine one of them being the first 64 bits and the other one the second 64 bits of the product. You'll have to write code that carries the addition from bit 63 of the second number to bit 0 of the first number.
If you can't use an existing bignum library like GMP, check out Wikipedia's article on binary multiplication with computers. There are a number of good, efficient algorithms for this.
The simplest way would be to use the schoolbook mechanism, splitting your arbitrarily sized numbers into chunks of 32-bit each.
Given A B C D * E F G H (each chunk 32-bit, for a total 128 bit)
You need an output array 9 dwords wide.
Set Out[0..8] to 0
You'd start by doing: H * D + out[8] => 64 bit result.
Store the low 32-bits in out[8] and take the high 32-bits as carry
Next: (H * C) + out[7] + carry
Again, store low 32-bit in out[7], use the high 32-bits as carry
after doing H*A + out[4] + carry, you need to continue looping until you have no carry.
Then repeat with G, F, E.
For G, you'd start at out[7] instead of out[8], and so forth.
Finally, walk through and convert the large integer into digits (which will require a "divide large number by a single word" routine)
Yes, you do it using a datatype that is effectively a string of digits (just like a normal 'string' is a string of characters). How you do this is highly language-dependent. For instance, Java uses BigDecimal. What language are you using?
This is often given as a homework assignment. The algorithm you learned in grade school will work. Use a library (several are mentioned in other posts) if you need this for a real application.
Here is my code piece in C. Good old multiply method
char *multiply(char s1[], char s2[]) {
int l1 = strlen(s1);
int l2 = strlen(s2);
int i, j, k = 0, c = 0;
char *r = (char *) malloc (l1+l2+1); // add one byte for the zero terminating string
int temp;
strrev(s1);
strrev(s2);
for (i = 0;i <l1+l2; i++) {
r[i] = 0 + '0';
}
for (i = 0; i <l1; i ++) {
c = 0; k = i;
for (j = 0; j < l2; j++) {
temp = get_int(s1[i]) * get_int(s2[j]);
temp = temp + c + get_int(r[k]);
c = temp /10;
r[k] = temp%10 + '0';
k++;
}
if (c!=0) {
r[k] = c + '0';
k++;
}
}
r[k] = '\0';
strrev(r);
return r;
}
//Here is a JavaScript version of an Karatsuba Algorithm running with less time than the usual multiplication method
function range(start, stop, step) {
if (typeof stop == 'undefined') {
// one param defined
stop = start;
start = 0;
}
if (typeof step == 'undefined') {
step = 1;
}
if ((step > 0 && start >= stop) || (step < 0 && start <= stop)) {
return [];
}
var result = [];
for (var i = start; step > 0 ? i < stop : i > stop; i += step) {
result.push(i);
}
return result;
};
function zeroPad(numberString, zeros, left = true) {
//Return the string with zeros added to the left or right.
for (var i in range(zeros)) {
if (left)
numberString = '0' + numberString
else
numberString = numberString + '0'
}
return numberString
}
function largeMultiplication(x, y) {
x = x.toString();
y = y.toString();
if (x.length == 1 && y.length == 1)
return parseInt(x) * parseInt(y)
if (x.length < y.length)
x = zeroPad(x, y.length - x.length);
else
y = zeroPad(y, x.length - y.length);
n = x.length
j = Math.floor(n/2);
//for odd digit integers
if ( n % 2 != 0)
j += 1
var BZeroPadding = n - j
var AZeroPadding = BZeroPadding * 2
a = parseInt(x.substring(0,j));
b = parseInt(x.substring(j));
c = parseInt(y.substring(0,j));
d = parseInt(y.substring(j));
//recursively calculate
ac = largeMultiplication(a, c)
bd = largeMultiplication(b, d)
k = largeMultiplication(a + b, c + d)
A = parseInt(zeroPad(ac.toString(), AZeroPadding, false))
B = parseInt(zeroPad((k - ac - bd).toString(), BZeroPadding, false))
return A + B + bd
}
//testing the function here
example = largeMultiplication(12, 34)
console.log(example)

Resources