Making human readable representations of an Integer - algorithm

Here's a coding problem for those that like this kind of thing. Let's see your implementations (in your language of choice, of course) of a function which returns a human readable String representation of a specified Integer. For example:
humanReadable(1) returns "one".
humanReadable(53) returns "fifty-three".
humanReadable(723603) returns "seven hundred and twenty-three thousand, six hundred and three".
humanReadable(1456376562) returns "one billion, four hundred and fifty-six million, three hundred and seventy-six thousand, five hundred and sixty-two".
Bonus points for particularly clever/elegant solutions!
It might seem like a pointless exercise, but there are number of real world applications for this kind of algorithm (although supporting numbers as high as a billion may be overkill :-)

There was already a question about this:
Convert integers to written numbers
The answer is for C#, but I think you can figure it out.

import math
def encodeOnesDigit(num):
return ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'][num]
def encodeTensDigit(num):
return ['twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'][num-2]
def encodeTeens(num):
if num < 10:
return encodeOnesDigit(num)
else:
return ['ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen'][num-10]
def encodeTriplet(num):
if num == 0: return ''
str = ''
if num >= 100:
str = encodeOnesDigit(num / 100) + ' hundred'
tens = num % 100
if tens >= 20:
if str != '': str += ' '
str += encodeTensDigit(tens / 10)
if tens % 10 > 0:
str += '-' + encodeOnesDigit(tens % 10)
elif tens != 0:
if str != '': str += ' '
str += encodeTeens(tens)
return str
def zipNumbers(numList):
if len(numList) == 1:
return numList[0]
strList = ['', ' thousand', ' million', ' billion'] # Add more as needed
strList = strList[:len(numList)]
strList.reverse()
joinedList = zip(numList, strList)
joinedList = [item for item in joinedList if item[0] != '']
return ', '.join(''.join(item) for item in joinedList)
def humanReadable(num):
if num == 0: return 'zero'
negative = False
if num < 0:
num *= -1
negative = True
numString = str(num)
tripletCount = int(math.ceil(len(numString) / 3.0))
numString = numString.zfill(tripletCount * 3)
tripletList = [int(numString[i*3:i*3+3]) for i in range(tripletCount)]
readableList = [encodeTriplet(num) for num in tripletList]
readableStr = zipNumbers(readableList)
return 'negative ' + readableStr if negative else readableStr

Supports up to 999 million, but no negative numbers:
String humanReadable(int inputNumber) {
if (inputNumber == -1) {
return "";
}
int remainder;
int quotient;
quotient = inputNumber / 1000000;
remainder = inputNumber % 1000000;
if (quotient > 0) {
return humanReadable(quotient) + " million, " + humanReadable(remainder);
}
quotient = inputNumber / 1000;
remainder = inputNumber % 1000;
if (quotient > 0) {
return humanReadable(quotient) + " thousand, " + humanReadable(remainder);
}
quotient = inputNumber / 100;
remainder = inputNumber % 100;
if (quotient > 0) {
return humanReadable(quotient) + " hundred, " + humanReadable(remainder);
}
quotient = inputNumber / 10;
remainder = inputNumber % 10;
if (remainder == 0) {
//hackish way to flag the algorithm to not output something like "twenty zero"
remainder = -1;
}
if (quotient == 1) {
switch(inputNumber) {
case 10:
return "ten";
case 11:
return "eleven";
case 12:
return "twelve";
case 13:
return "thirteen";
case 14:
return "fourteen";
case 15:
return "fifteen";
case 16:
return "sixteen";
case 17:
return "seventeen";
case 18:
return "eighteen";
case 19:
return "nineteen";
}
}
switch(quotient) {
case 2:
return "twenty " + humanReadable(remainder);
case 3:
return "thirty " + humanReadable(remainder);
case 4:
return "forty " + humanReadable(remainder);
case 5:
return "fifty " + humanReadable(remainder);
case 6:
return "sixty " + humanReadable(remainder);
case 7:
return "seventy " + humanReadable(remainder);
case 8:
return "eighty " + humanReadable(remainder);
case 9:
return "ninety " + humanReadable(remainder);
}
switch(inputNumber) {
case 0:
return "zero";
case 1:
return "one";
case 2:
return "two";
case 3:
return "three";
case 4:
return "four";
case 5:
return "five";
case 6:
return "six";
case 7:
return "seven";
case 8:
return "eight";
case 9:
return "nine";
}
}

using System;
namespace HumanReadable
{
public static class HumanReadableExt
{
private static readonly string[] _digits = {
"", "one", "two", "three", "four", "five",
"six", "seven", "eight", "nine", "eleven", "twelve",
"thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
"eighteen", "nineteen"
};
private static readonly string[] _teens = {
"", "", "twenty", "thirty", "forty", "fifty",
"sixty", "seventy", "eighty", "ninety"
};
private static readonly string[] _illions = {
"", "thousand", "million", "billion", "trillion"
};
private static string Seg(int number)
{
var work = string.Empty;
if (number >= 100)
work += _digits[number / 100] + " hundred ";
if ((number % 100) < 20)
work += _digits[number % 100];
else
work += _teens[(number % 100) / 10] + "-" + _digits[number % 10];
return work;
}
public static string HumanReadable(this int number)
{
if (number == 0)
return "zero";
var work = string.Empty;
var parts = new string[_illions.Length];
for (var ind = 0; ind < parts.Length; ind++)
parts[ind] = Seg((int) (number % Math.Pow(1000, ind + 1) / Math.Pow(1000, ind)));
for (var ind = 0; ind < parts.Length; ind++)
if (!string.IsNullOrEmpty(parts[ind]))
work = parts[ind] + " " + _illions[ind] + ", " + work;
work = work.TrimEnd(',', ' ');
var lastSpace = work.LastIndexOf(' ');
if (lastSpace >= 0)
work = work.Substring(0, lastSpace) + " and" + work.Substring(lastSpace);
return work;
}
}
class Program
{
static void Main(string[] args)
{
Console.WriteLine(1.HumanReadable());
Console.WriteLine(53.HumanReadable());
Console.WriteLine(723603.HumanReadable());
Console.WriteLine(1456376562.HumanReadable());
Console.ReadLine();
}
}
}

There's one huge problem about this function implementation. It is it's future localization. That function, written by english native speaker, most probably wouldn't work right for any other language than english. It is nearly impossible to write general easy localizable function for any human language dialect in a world, unless you really need to keep it general. Actually in real world you do not need to operate with huge integer numbers, so you can just keep all the numbers in a big (or even not so big) string array.

agreed that there are a number of real world applications.
as such there's already a number of real world implementations.
it's been part of bsdgames since pretty much forever...
> man number

Related

Remove consecutive duplicates in a string to make the smallest string

Given a string and the constraint of matching on >= 3 characters, how can you ensure that the result string will be as small as possible?
edit with gassa's explicitness:
E.G.
'AAAABBBAC'
If I remove the B's first,
AAAA[BBB]AC -- > AAAAAC, then I can remove all of the A's from the resultant string and be left with:
[AAAAA]C --> C
'C'
If I just remove what is available first (the sequence of A's), I get:
[AAAA]BBBAC -- > [BBB]AC --> AC
'AC'
A tree would definitely get you the shortest string(s).
The tree solution:
Define a State (node) for each current string Input and all its removable sub-strings' int[] Indexes.
Create the tree: For each int index create another State and add it to the parent state State[] Children.
A State with no possible removable sub-strings has no children Children = null.
Get all Descendants State[] of your root State. Order them by their shortest string Input. And that is/are your answer(s).
Test cases:
string result = FindShortest("AAAABBBAC"); // AC
string result2 = FindShortest("AABBAAAC"); // AABBC
string result3 = FindShortest("BAABCCCBBA"); // B
The Code:
Note: Of-course everyone is welcome to enhance the following code in terms of performance and/or fixing any bug.
class Program
{
static void Main(string[] args)
{
string result = FindShortest("AAAABBBAC"); // AC
string result2 = FindShortest("AABBAAAC"); // AABBC
string result3 = FindShortest("BAABCCCBBA"); // B
}
// finds the FIRST shortest string for a given input
private static string FindShortest(string input)
{
// all possible removable strings' indexes
// for this given input
int[] indexes = RemovableIndexes(input);
// each input string and its possible removables are a state
var state = new State { Input = input, Indexes = indexes };
// create the tree
GetChildren(state);
// get the FIRST shortest
// i.e. there would be more than one answer sometimes
// this could be easily changed to get all possible results
var result =
Descendants(state)
.Where(d => d.Children == null || d.Children.Length == 0)
.OrderBy(d => d.Input.Length)
.FirstOrDefault().Input;
return result;
}
// simple get all descendants of a node/state in a tree
private static IEnumerable<State> Descendants(State root)
{
var states = new Stack<State>(new[] { root });
while (states.Any())
{
State node = states.Pop();
yield return node;
if (node.Children != null)
foreach (var n in node.Children) states.Push(n);
}
}
// creates the tree
private static void GetChildren(State state)
{
// for each an index there is a child
state.Children = state.Indexes.Select(
i =>
{
var input = RemoveAllAt(state.Input, i);
return input.Length < state.Input.Length && input.Length > 0
? new State
{
Input = input,
Indexes = RemovableIndexes(input)
}
: null;
}).ToArray();
foreach (var c in state.Children)
GetChildren(c);
}
// find all possible removable strings' indexes
private static int[] RemovableIndexes(string input)
{
var indexes = new List<int>();
char d = input[0];
int count = 1;
for (int i = 1; i < input.Length; i++)
{
if (d == input[i])
count++;
else
{
if (count >= 3)
indexes.Add(i - count);
// reset
d = input[i];
count = 1;
}
}
if (count >= 3)
indexes.Add(input.Length - count);
return indexes.ToArray();
}
// remove all duplicate chars starting from an index
private static string RemoveAllAt(string input, int startIndex)
{
string part1, part2;
int endIndex = startIndex + 1;
int i = endIndex;
for (; i < input.Length; i++)
if (input[i] != input[startIndex])
{
endIndex = i;
break;
}
if (i == input.Length && input[i - 1] == input[startIndex])
endIndex = input.Length;
part1 = startIndex > 0 ? input.Substring(0, startIndex) : string.Empty;
part2 = endIndex <= (input.Length - 1) ? input.Substring(endIndex) : string.Empty;
return part1 + part2;
}
// our node, which is
// an input string &
// all possible removable strings' indexes
// & its children
public class State
{
public string Input;
public int[] Indexes;
public State[] Children;
}
}
I propose O(n^2) solution with dynamic programming.
Let's introduce notation. Prefix and suffix of length l of string A denoted by P[l] and S[l]. And we call our procedure Rcd.
Rcd(A) = Rcd(Rcd(P[n-1])+S[1])
Rcd(A) = Rcd(P[1]+Rcd(S[n-1]))
Note that outer Rcd in the RHS is trivial. So, that's our optimal substructure. Based on this i came up with the following implementation:
#include <iostream>
#include <string>
#include <vector>
#include <cassert>
using namespace std;
string remdupright(string s, bool allowEmpty) {
if (s.size() >= 3) {
auto pos = s.find_last_not_of(s.back());
if (pos == string::npos && allowEmpty) s = "";
else if (pos != string::npos && s.size() - pos > 3) s = s.substr(0, pos + 1);
}
return s;
}
string remdupleft(string s, bool allowEmpty) {
if (s.size() >= 3) {
auto pos = s.find_first_not_of(s.front());
if (pos == string::npos && allowEmpty) s = "";
else if (pos != string::npos && pos >= 3) s = s.substr(pos);
}
return s;
}
string remdup(string s, bool allowEmpty) {
return remdupleft(remdupright(s, allowEmpty), allowEmpty);
}
string run(const string in) {
vector<vector<string>> table(in.size());
for (int i = 0; i < (int)table.size(); ++i) {
table[i].resize(in.size() - i);
}
for (int i = 0; i < (int)table[0].size(); ++i) {
table[0][i] = in.substr(i,1);
}
for (int len = 2; len <= (int)table.size(); ++len) {
for (int pos = 0; pos < (int)in.size() - len + 1; ++pos) {
string base(table[len - 2][pos]);
const char suffix = in[pos + len - 1];
if (base.size() && suffix != base.back()) {
base = remdupright(base, false);
}
const string opt1 = base + suffix;
base = table[len - 2][pos+1];
const char prefix = in[pos];
if (base.size() && prefix != base.front()) {
base = remdupleft(base, false);
}
const string opt2 = prefix + base;
const string nodupopt1 = remdup(opt1, true);
const string nodupopt2 = remdup(opt2, true);
table[len - 1][pos] = nodupopt1.size() > nodupopt2.size() ? opt2 : opt1;
assert(nodupopt1.size() != nodupopt2.size() || nodupopt1 == nodupopt2);
}
}
string& res = table[in.size() - 1][0];
return remdup(res, true);
}
void testRcd(string s, string expected) {
cout << s << " : " << run(s) << ", expected: " << expected << endl;
}
int main()
{
testRcd("BAABCCCBBA", "B");
testRcd("AABBAAAC", "AABBC");
testRcd("AAAA", "");
testRcd("AAAABBBAC", "C");
}
You can check default and run your tests here.
Clearly we are not concerned about any block of repeated characters longer than 2 characters. And there is only one way two blocks of the same character where at least one of the blocks is less than 3 in length can be combined - namely, if the sequence between them can be removed.
So (1) look at pairs of blocks of the same character where at least one is less than 3 in length, and (2) determine if the sequence between them can be removed.
We want to decide which pairs to join so as to minimize the total length of blocks less than 3 characters long. (Note that the number of pairs is bound by the size (and distribution) of the alphabet.)
Let f(b) represent the minimal total length of same-character blocks remaining up to the block b that are less than 3 characters in length. Then:
f(b):
p1 <- previous block of the same character
if b and p1 can combine:
if b.length + p1.length > 2:
f(b) = min(
// don't combine
(0 if b.length > 2 else b.length) +
f(block before b),
// combine
f(block before p1)
)
// b.length + p1.length < 3
else:
p2 <- block previous to p1 of the same character
if p1 and p2 can combine:
f(b) = min(
// don't combine
b.length + f(block before b),
// combine
f(block before p2)
)
else:
f(b) = b.length + f(block before b)
// b and p1 cannot combine
else:
f(b) = b.length + f(block before b)
for all p1 before b
The question is how can we efficiently determine if a block can be combined with the previous block of the same character (aside from the obvious recursion into the sub-block-list between the two blocks).
Python code:
import random
import time
def parse(length):
return length if length < 3 else 0
def f(string):
chars = {}
blocks = [[string[0], 1, 0]]
chars[string[0]] = {'indexes': [0]}
chars[string[0]][0] = {'prev': -1}
p = 0 # pointer to current block
for i in xrange(1, len(string)):
if blocks[len(blocks) - 1][0] == string[i]:
blocks[len(blocks) - 1][1] += 1
else:
p += 1
# [char, length, index, f(i), temp]
blocks.append([string[i], 1, p])
if string[i] in chars:
chars[string[i]][p] = {'prev': chars[string[i]]['indexes'][ len(chars[string[i]]['indexes']) - 1 ]}
chars[string[i]]['indexes'].append(p)
else:
chars[string[i]] = {'indexes': [p]}
chars[string[i]][p] = {'prev': -1}
#print blocks
#print
#print chars
#print
memo = [[None for j in xrange(len(blocks))] for i in xrange(len(blocks))]
def g(l, r, top_level=False):
####
####
#print "(l, r): (%s, %s)" % (l,r)
if l == r:
return parse(blocks[l][1])
if memo[l][r]:
return memo[l][r]
result = [parse(blocks[l][1])] + [None for k in xrange(r - l)]
if l < r:
for i in xrange(l + 1, r + 1):
result[i - l] = parse(blocks[i][1]) + result[i - l - 1]
for i in xrange(l, r + 1):
####
####
#print "\ni: %s" % i
[char, length, index] = blocks[i]
#p1 <- previous block of the same character
p1_idx = chars[char][index]['prev']
####
####
#print "(p1_idx, l, p1_idx >= l): (%s, %s, %s)" % (p1_idx, l, p1_idx >= l)
if p1_idx < l and index > l:
result[index - l] = parse(length) + result[index - l - 1]
while p1_idx >= l:
p1 = blocks[p1_idx]
####
####
#print "(b, p1, p1_idx, l): (%s, %s, %s, %s)\n" % (blocks[i], p1, p1_idx, l)
between = g(p1[2] + 1, index - 1)
####
####
#print "between: %s" % between
#if b and p1 can combine:
if between == 0:
if length + p1[1] > 2:
result[index - l] = min(
result[index - l],
# don't combine
parse(length) + (result[index - l - 1] if index - l > 0 else 0),
# combine: f(block before p1)
result[p1[2] - l - 1] if p1[2] > l else 0
)
# b.length + p1.length < 3
else:
#p2 <- block previous to p1 of the same character
p2_idx = chars[char][p1[2]]['prev']
if p2_idx < l:
p1_idx = chars[char][p1_idx]['prev']
continue
between2 = g(p2_idx + 1, p1[2] - 1)
#if p1 and p2 can combine:
if between2 == 0:
result[index - l] = min(
result[index - l],
# don't combine
parse(length) + (result[index - l - 1] if index - l > 0 else 0),
# combine the block, p1 and p2
result[p2_idx - l - 1] if p2_idx - l > 0 else 0
)
else:
#f(b) = b.length + f(block before b)
result[index - l] = min(
result[index - l],
parse(length) + (result[index - l - 1] if index - l > 0 else 0)
)
# b and p1 cannot combine
else:
#f(b) = b.length + f(block before b)
result[index - l] = min(
result[index - l],
parse(length) + (result[index - l - 1] if index - l > 0 else 0)
)
p1_idx = chars[char][p1_idx]['prev']
#print l,r,result
memo[l][r] = result[r - l]
"""if top_level:
return (result, blocks)
else:"""
return result[r - l]
if len(blocks) == 1:
return ([parse(blocks[0][1])], blocks)
else:
return g(0, len(blocks) - 1, True)
"""s = ""
for i in xrange(300):
s = s + ['A','B','C'][random.randint(0,2)]"""
print f("abcccbcccbacccab") # b
print
print f("AAAABBBAC"); # C
print
print f("CAAAABBBA"); # C
print
print f("AABBAAAC"); # AABBC
print
print f("BAABCCCBBA"); # B
print
print f("aaaa")
print
The string answers for these longer examples were computed using jdehesa's answer:
t0 = time.time()
print f("BCBCCBCCBCABBACCBABAABBBABBBACCBBBAABBACBCCCACABBCAABACBBBBCCCBBAACBAABACCBBCBBAABCCCCCAABBBBACBBAAACACCBCCBBBCCCCCCCACBABACCABBCBBBBBCBABABBACCAACBCBBAACBBBBBCCBABACBBABABAAABCCBBBAACBCACBAABAAAABABB")
# BCBCCBCCBCABBACCBABCCAABBACBACABBCAABACAACBAABACCBBCBBCACCBACBABACCABBCCBABABBACCAACBCBBAABABACBBABABBCCAACBCACBAABBABB
t1 = time.time()
total = t1-t0
print total
t0 = time.time()
print f("CBBACAAAAABBBBCAABBCBAABBBCBCBCACACBAABCBACBBABCABACCCCBACBCBBCBACBBACCCBAAAACACCABAACCACCBCBCABAACAABACBABACBCBAACACCBCBCCCABACABBCABBAAAAABBBBAABAABBCACACABBCBCBCACCCBABCAACBCAAAABCBCABACBABCABCBBBBABCBACABABABCCCBBCCBBCCBAAABCABBAAABBCAAABCCBAABAABCAACCCABBCAABCBCBCBBAACCBBBACBBBCABAABCABABABABCA")
# CBBACCAABBCBAACBCBCACACBAABCBACBBABCABABACBCBBCBACBBABCACCABAACCACCBCBCABAACAABACBABACBCBAACACCBCBABACABBCBBCACACABBCBCBCABABCAACBCBCBCABACBABCABCABCBACABABACCBBCCBBCACBCCBAABAABCBBCAABCBCBCBBAACCACCABAABCABABABABCA
t1 = time.time()
total = t1-t0
print total
t0 = time.time()
print f("AADBDBEBBBBCABCEBCDBBBBABABDCCBCEBABADDCABEEECCECCCADDACCEEAAACCABBECBAEDCEEBDDDBAAAECCBBCEECBAEBEEEECBEEBDACDDABEEABEEEECBABEDDABCDECDAABDAEADEECECEBCBDDAEEECCEEACCBBEACDDDDBDBCCAAECBEDAAAADBEADBAAECBDEACDEABABEBCABDCEEAABABABECDECADCEDAEEEBBBCEDECBCABDEDEBBBABABEEBDAEADBEDABCAEABCCBCCEDCBBEBCECCCA")
# AADBDBECABCEBCDABABDCCBCEBABADDCABCCEADDACCEECCABBECBAEDCEEBBECCBBCEECBAEBCBEEBDACDDABEEABCBABEDDABCDECDAABDAEADEECECEBCBDDACCEEACCBBEACBDBCCAAECBEDDBEADBAAECBDEACDEABABEBCABDCEEAABABABECDECADCEDACEDECBCABDEDEABABEEBDAEADBEDABCAEABCCBCCEDCBBEBCEA
t1 = time.time()
total = t1-t0
print total
Another scala answer, using memoization and tailcall optimization (partly) (updated).
import scala.collection.mutable.HashSet
import scala.annotation._
object StringCondense extends App {
#tailrec
def groupConsecutive (s: String, sofar: List[String]): List[String] = s.toList match {
// def groupConsecutive (s: String): List[String] = s.toList match {
case Nil => sofar
// case Nil => Nil
case c :: str => {
val (prefix, rest) = (c :: str).span (_ == c)
// Strings of equal characters, longer than 3, don't make a difference to just 3
groupConsecutive (rest.mkString(""), (prefix.take (3)).mkString ("") :: sofar)
// (prefix.take (3)).mkString ("") :: groupConsecutive (rest.mkString(""))
}
}
// to count the effect of memoization
var count = 0
// recursively try to eliminate every group of 3 or more, brute forcing
// but for "aabbaabbaaabbbaabb", many reductions will lead sooner or
// later to the same result, so we try to detect these and avoid duplicate
// work
def moreThan2consecutive (s: String, seenbefore: HashSet [String]): String = {
if (seenbefore.contains (s)) s
else
{
count += 1
seenbefore += s
val sublists = groupConsecutive (s, Nil)
// val sublists = groupConsecutive (s)
val atLeast3 = sublists.filter (_.size > 2)
atLeast3.length match {
case 0 => s
case 1 => {
val res = sublists.filter (_.size < 3)
moreThan2consecutive (res.mkString (""), seenbefore)
}
case _ => {
val shrinked = (
for {idx <- (0 until sublists.size)
if (sublists (idx).length >= 3)
pre = (sublists.take (idx)).mkString ("")
post= (sublists.drop (idx+1)).mkString ("")
} yield {
moreThan2consecutive (pre + post, seenbefore)
}
)
(shrinked.head /: shrinked.tail) ((a, b) => if (a.length <= b.length) a else b)
}
}
}
}
// don't know what Rcd means, adopted from other solution but modified
// kind of a unit test **update**: forgot to reset count
testRcd (s: String, expected: String) : Boolean = {
count = 0
val seenbefore = HashSet [String] ()
val result = moreThan2consecutive (s, seenbefore)
val hit = result.equals (expected)
println (s"Input: $s\t result: ${result}\t expected ${expected}\t $hit\t count: $count");
hit
}
// some test values from other users with expected result
// **upd:** more testcases
def testgroup () : Unit = {
testRcd ("baabcccbba", "b")
testRcd ("aabbaaac", "aabbc")
testRcd ("aaaa", "")
testRcd ("aaaabbbac", "c")
testRcd ("abcccbcccbacccab", "b")
testRcd ("AAAABBBAC", "C")
testRcd ("CAAAABBBA", "C")
testRcd ("AABBAAAC", "AABBC")
testRcd ("BAABCCCBBA", "B")
testRcd ("AAABBBAAABBBAAABBBC", "C") // 377 subcalls reported by Yola,
testRcd ("AAABBBAAABBBAAABBBAAABBBC", "C") // 4913 when preceeded with AAABBB
}
testgroup
def testBigs () : Unit = {
/*
testRcd ("BCBCCBCCBCABBACCBABAABBBABBBACCBBBAABBACBCCCACABBCAABACBBBBCCCBBAACBAABACCBBCBBAABCCCCCAABBBBACBBAAACACCBCCBBBCCCCCCCACBABACCABBCBBBBBCBABABBACCAACBCBBAACBBBBBCCBABACBBABABAAABCCBBBAACBCACBAABAAAABABB",
"BCBCCBCCBCABBACCBABCCAABBACBACABBCAABACAACBAABACCBBCBBCACCBACBABACCABBCCBABABBACCAACBCBBAABABACBBABABBCCAACBCACBAABBABB")
*/
testRcd ("CBBACAAAAABBBBCAABBCBAABBBCBCBCACACBAABCBACBBABCABACCCCBACBCBBCBACBBACCCBAAAACACCABAACCACCBCBCABAACAABACBABACBCBAACACCBCBCCCABACABBCABBAAAAABBBBAABAABBCACACABBCBCBCACCCBABCAACBCAAAABCBCABACBABCABCBBBBABCBACABABABCCCBBCCBBCCBAAABCABBAAABBCAAABCCBAABAABCAACCCABBCAABCBCBCBBAACCBBBACBBBCABAABCABABABABCA",
"CBBACCAABBCBAACBCBCACACBAABCBACBBABCABABACBCBBCBACBBABCACCABAACCACCBCBCABAACAABACBABACBCBAACACCBCBABACABBCBBCACACABBCBCBCABABCAACBCBCBCABACBABCABCABCBACABABACCBBCCBBCACBCCBAABAABCBBCAABCBCBCBBAACCACCABAABCABABABABCA")
/*testRcd ("AADBDBEBBBBCABCEBCDBBBBABABDCCBCEBABADDCABEEECCECCCADDACCEEAAACCABBECBAEDCEEBDDDBAAAECCBBCEECBAEBEEEECBEEBDACDDABEEABEEEECBABEDDABCDECDAABDAEADEECECEBCBDDAEEECCEEACCBBEACDDDDBDBCCAAECBEDAAAADBEADBAAECBDEACDEABABEBCABDCEEAABABABECDECADCEDAEEEBBBCEDECBCABDEDEBBBABABEEBDAEADBEDABCAEABCCBCCEDCBBEBCECCCA",
"AADBDBECABCEBCDABABDCCBCEBABADDCABCCEADDACCEECCABBECBAEDCEEBBECCBBCEECBAEBCBEEBDACDDABEEABCBABEDDABCDECDAABDAEADEECECEBCBDDACCEEACCBBEACBDBCCAAECBEDDBEADBAAECBDEACDEABABEBCABDCEEAABABABECDECADCEDACEDECBCABDEDEABABEEBDAEADBEDABCAEABCCBCCEDCBBEBCEA")
*/
}
// for generated input, but with fixed seed, to compare the count with
// and without memoization
import util.Random
val r = new Random (31415)
// generate Strings but with high chances to produce some triples and
// longer sequences of char clones
def genRandomString () : String = {
(1 to 20).map (_ => r.nextInt (6) match {
case 0 => "t"
case 1 => "r"
case 2 => "-"
case 3 => "tt"
case 4 => "rr"
case 5 => "--"
}).mkString ("")
}
def testRandom () : Unit = {
(1 to 10).map (i=> testRcd (genRandomString, "random mode - false might be true"))
}
testRandom
testgroup
testRandom
// testBigs
}
Comparing the effect of memoization lead to interesting results:
Updated measurements. In the old values, I forgot to reset the counter, which leaded to much higher results. Now the spreading of results
is much more impressive and in total, the values are smaller.
No seenbefore:
Input: baabcccbba result: b expected b true count: 4
Input: aabbaaac result: aabbc expected aabbc true count: 2
Input: aaaa result: expected true count: 2
Input: aaaabbbac result: c expected c true count: 5
Input: abcccbcccbacccab result: b expected b true count: 34
Input: AAAABBBAC result: C expected C true count: 5
Input: CAAAABBBA result: C expected C true count: 5
Input: AABBAAAC result: AABBC expected AABBC true count: 2
Input: BAABCCCBBA result: B expected B true count: 4
Input: AAABBBAAABBBAAABBBC res: C expected C true count: 377
Input: AAABBBAAABBBAAABBBAAABBBC r: C expected C true count: 4913
Input: r--t----ttrrrrrr--tttrtttt--rr----result: rr--rr expected ? unknown ? false count: 1959
Input: ttrtt----tr---rrrtttttttrtr--rr result: r--rr expected ? unknown ? false count: 213
Input: tt----r-----ttrr----ttrr-rr--rr-- result: ttrttrrttrr-rr--rr-- ex ? unknown ? false count: 16
Input: --rr---rrrrrrr-r--rr-r--tt--rrrrr result: rr-r--tt-- expected ? unknown ? false count: 32
Input: tt-rrrrr--r--tt--rrtrrr------- result: ttr--tt--rrt expected ? unknown ? false count: 35
Input: --t-ttt-ttt--rrrrrt-rrtrttrr result: --tt-rrtrttrr expected ? unknown ? false count: 35
Input: rrt--rrrr----trrr-rttttrrtttrr result: rrtt- expected ? unknown ? false count: 1310
Input: ---tttrrrrrttrrttrr---tt-----tt result: rrttrr expected ? unknown ? false count: 1011
Input: -rrtt--rrtt---t-r--r---rttr-- result: -rrtt--rr-r--rrttr-- ex ? unknown ? false count: 9
Input: rtttt--rrrrrrrt-rrttt--tt--t result: r--t-rr--tt--t expectd ? unknown ? false count: 16
real 0m0.607s (without testBigs)
user 0m1.276s
sys 0m0.056s
With seenbefore:
Input: baabcccbba result: b expected b true count: 4
Input: aabbaaac result: aabbc expected aabbc true count: 2
Input: aaaa result: expected true count: 2
Input: aaaabbbac result: c expected c true count: 5
Input: abcccbcccbacccab result: b expected b true count: 11
Input: AAAABBBAC result: C expected C true count: 5
Input: CAAAABBBA result: C expected C true count: 5
Input: AABBAAAC result: AABBC expected AABBC true count: 2
Input: BAABCCCBBA result: B expected B true count: 4
Input: AAABBBAAABBBAAABBBC rest: C expected C true count: 28
Input: AAABBBAAABBBAAABBBAAABBBC C expected C true count: 52
Input: r--t----ttrrrrrr--tttrtttt--rr----result: rr--rr expected ? unknown ? false count: 63
Input: ttrtt----tr---rrrtttttttrtr--rr result: r--rr expected ? unknown ? false count: 48
Input: tt----r-----ttrr----ttrr-rr--rr-- result: ttrttrrttrr-rr--rr-- xpe? unknown ? false count: 8
Input: --rr---rrrrrrr-r--rr-r--tt--rrrrr result: rr-r--tt-- expected ? unknown ? false count: 19
Input: tt-rrrrr--r--tt--rrtrrr------- result: ttr--tt--rrt expected ? unknown ? false count: 12
Input: --t-ttt-ttt--rrrrrt-rrtrttrr result: --tt-rrtrttrr expected ? unknown ? false count: 16
Input: rrt--rrrr----trrr-rttttrrtttrr result: rrtt- expected ? unknown ? false count: 133
Input: ---tttrrrrrttrrttrr---tt-----tt result: rrttrr expected ? unknown ? false count: 89
Input: -rrtt--rrtt---t-r--r---rttr-- result: -rrtt--rr-r--rrttr-- ex ? unknown ? false count: 6
Input: rtttt--rrrrrrrt-rrttt--tt--t result: r--t-rr--tt--t expected ? unknown ? false count: 8
real 0m0.474s (without testBigs)
user 0m0.852s
sys 0m0.060s
With tailcall:
real 0m0.478s (without testBigs)
user 0m0.860s
sys 0m0.060s
For some random strings, the difference is bigger than a 10fold.
For long Strings with many groups one could, as an improvement, eliminate all groups which are the only group of that character, for instance:
aa bbb aa ccc xx ddd aa eee aa fff xx
The groups bbb, ccc, ddd, eee and fff are unique in the string, so they can't fit to something else and could all be eliminated, and the order of removal is will not matter. This would lead to the intermediate result
aaaa xx aaaa xx
and a fast solution. Maybe I try to implement it too. However, I guess, it will be possible to produce random Strings, where this will have a big impact and by a different form of random generated strings, to distributions, where the impact is low.
Here is a Python solution (function reduce_min), not particularly smart but I think fairly easy to understand (excessive amount of comments added for answer clarity):
def reductions(s, min_len):
"""
Yields every possible reduction of s by eliminating contiguous blocks
of l or more repeated characters.
For example, reductions('AAABBCCCCBAAC', 3) yields
'BBCCCCBAAC' and 'AAABBBAAC'.
"""
# Current character
curr = ''
# Length of current block
n = 0
# Start position of current block
idx = 0
# For each character
for i, c in enumerate(s):
if c != curr:
# New block begins
if n >= min_len:
# If previous block was long enough
# yield reduced string without it
yield s[:idx] + s[i:]
# Start new block
curr = c
n = 1
idx = i
else:
# Still in the same block
n += 1
# Yield reduction without last block if it was long enough
if n >= min_len:
yield s[:idx]
def reduce_min(s, min_len):
"""
Finds the smallest possible reduction of s by successive
elimination of contiguous blocks of min_len or more repeated
characters.
"""
# Current set of possible reductions
rs = set([s])
# Current best solution
result = s
# While there are strings to reduce
while rs:
# Get one element
r = rs.pop()
# Find reductions
r_red = list(reductions(r, min_len))
# If no reductions are found it is irreducible
if len(r_red) == 0 and len(r) < len(result):
# Replace if shorter than current best
result = r
else:
# Save reductions for next iterations
rs.update(r_red)
return result
assert reduce_min("BAABCCCBBA", 3) == "B"
assert reduce_min("AABBAAAC", 3) == "AABBC"
assert reduce_min("AAAA", 3) == ""
assert reduce_min("AAAABBBAC", 3) == "C"
EDIT: Since people seem to be posting C++ solutions, here is mine in C++ (again, function reduce_min):
#include <string>
#include <vector>
#include <unordered_set>
#include <iterator>
#include <utility>
#include <cassert>
using namespace std;
void reductions(const string &s, unsigned int min_len, vector<string> &rs)
{
char curr = '\0';
unsigned int n = 0;
unsigned int idx = 0;
for (auto it = s.begin(); it != s.end(); ++it)
{
if (curr != *it)
{
auto i = distance(s.begin(), it);
if (n >= min_len)
{
rs.push_back(s.substr(0, idx) + s.substr(i));
}
curr = *it;
n = 1;
idx = i;
}
else
{
n += 1;
}
}
if (n >= min_len)
{
rs.push_back(s.substr(0, idx));
}
}
string reduce_min(const string &s, unsigned int min_len)
{
unordered_set<string> rs { s };
string result = s;
vector<string> rs_new;
while (!rs.empty())
{
auto it = rs.begin();
auto r = *it;
rs.erase(it);
rs_new.clear();
reductions(r, min_len, rs_new);
if (rs_new.empty() && r.size() < result.size())
{
result = move(r);
}
else
{
rs.insert(rs_new.begin(), rs_new.end());
}
}
return result;
}
int main(int argc, char **argv)
{
assert(reduce_min("BAABCCCBBA", 3) == "B");
assert(reduce_min("AABBAAAC", 3) == "AABBC");
assert(reduce_min("AAAA", 3) == "");
assert(reduce_min("AAAABBBAC", 3) == "C");
return 0;
}
If you can use C++17 you can save memory by using string views.
EDIT 2: About the complexity of the algorithm. It is not straightforward to figure out, and as I said the algorithm is meant to be simple more than anything, but let's see. In the end, it is more or less the same as a breadth-first search. Let's say the string length is n, and, for generality, let's say the minimum block length (value 3 in the question) is m. In the first level, we can generate up to n / m reductions in the worst case. For each of these, we can generate up to (n - m) / m reductions, and so on. So basically, at "level" i (loop iteration i) we create up to (n - i * m) / m reductions per string we had, and each of these will take O(n - i * m) time to process. The maximum number of levels we can have is, again, n / m. So the complexity of the algorithm (if I'm not making mistakes) should have the form:
O( sum {i = 0 .. n / m} ( O(n - i * m) * prod {j = 0 .. i} ((n - i * m) / m) ))
|-Outer iters--| |---Cost---| |-Prev lvl-| |---Branching---|
Whew. So this should be something like:
O( sum {i = 0 .. n / m} (n - i * m) * O(n^i / m^i) )
Which in turn would collapse to:
O((n / m)^(n / m))
So yeah, the algorithm is more or less simple, but it can run into exponential cost cases (the bad cases would be strings made entirely of exactly m-long blocks, like AAABBBCCCAAACCC... for m = 3).

After Effects: Return value if variable equals part of a set of numbers

I'm writing an AE Expression that will spit out a number based on an Effect's keyframed value on the same layer. That is, if it's 1, value is 100, if it's 2, value is 101, if it's 3, 99, etc. Here's what I've got working:
x = effect("Mouth")("Slider");
if (x == 7 || x == 11 || x == 16) {
103
} else if (x == 6 || x == 10 || x == 15 || x == 25 || x == 26){
102
} else if (x == 5 || x == 9 || x == 12 || x == 14 || x == 19 || x == 24 || x == 27 || x == 28){
101
} else {
100
}
Surely there is a more elegant way to do this? I've tried writing it
if (x == 7 || 11 || 16)
but telling After Effects X absolutely equals "this" OR "that" just makes it assume it also equals "everything". Argh.
This is a little weird (arguably elegant?), but if you're just looking for something more compact, you could do:
x = effect("Mouth")("Slider");
if (",7,11,16,".indexOf(","+x+",") != -1) {
103
} else if (",6,10,15,25,26,".indexOf(","+x+",") != -1) {
102
} else if (",5,9,12,14,19,24,27,28,".indexOf(","+x+",") != -1) {
101
} else {
100
}
but don't forget the commas at both ends!
I thought about using ECMA's array.indexOf(x), but it is not supported in AE's JS expressions. [edit: mistakedly wrote 'offsetOf']
Here are two other approaches you could use. You can compress this code more, but I left it spread out for readability.
For shorter lists you can use a switch/case and have multiple options execute the same code. Like so:
thingToTest = effect("Mouth")("Slider");
switch (thingToTest) {
case 7:
case 11:
case 16:
result = 103;
break;
case 6:
case 10:
case 15:
case 25:
case 26:
result = 102;
break;
case 5:
case 9:
case 12:
case 14:
case 19:
case 24:
case 27:
case 28:
result = 101;
break;
default:
result = 100;
break;
}
The problem is that if you have a lot of possible outcomes to check for, that could become quite unwieldy. In which case, I'd make the values for each outcome case be an array and loop through them.
thingToTest = effect("Mouth")("Slider");
mySet1 = [7, 11, 16];
mySet2 = [6, 10, 15, 25, 26];
mySet3 = [5, 9, 12, 14, 19, 24, 27, 28];
result = 100; // Default
foundIt = 0;
for (i = 0; i < mySet1.length; i++) {
if (mySet1[i] == thingToTest) {
result = 103;
foundIt = 1;
break;
}
}
if(foundIt) {
for (i = 0; i < mySet2.length; i++) {
if (mySet2[i] == thingToTest) {
result = 102;
foundIt = 1;
break;
}
}
}
if(foundIt) {
for (i = 0; i < mySet3.length; i++) {
if (mySet3[i] == thingToTest) {
result = 101;
foundIt = 1;
break;
}
}
}
result;
Putting the successive groups in a conditional statement marginally improves performance by not iterating through those arrays if a match has already been found. For efficiency, it would make sense to test against the contents of your longest list of numbers first, because it's more likely to contain the match.
These solutions may not be as compact but are definitely scaleable.

How to convert a decimal base (10) to a negabinary base (-2)?

I want to write a program to convert from decimal to negabinary.
I cannot figure out how to convert from decimal to negabinary.
I have no idea about how to find the rule and how it works.
Example: 7(base10)-->11011(base-2)
I just know it is 7 = (-2)^0*1 + (-2)^1*1 + (-2)^2*0 + (-2)^3*1 + (-2)^4*1.
The algorithm is described in http://en.wikipedia.org/wiki/Negative_base#Calculation. Basically, you just pick the remainder as the positive base case and make sure the remainder is nonnegative and minimal.
7 = -3*-2 + 1 (least significant digit)
-3 = 2*-2 + 1
2 = -1*-2 + 0
-1 = 1*-2 + 1
1 = 0*-2 + 1 (most significant digit)
def neg2dec(arr):
n = 0
for i, num in enumerate(arr[::-1]):
n+= ((-2)**i)*num
return n
def dec2neg(num):
if num == 0:
digits = ['0']
else:
digits = []
while num != 0:
num, remainder = divmod(num, -2)
if remainder < 0:
num, remainder = num + 1, remainder + 2
digits.append(str(remainder))
return ''.join(digits[::-1])
Just my two cents (C#):
public static int[] negaBynary(int value)
{
List<int> result = new List<int> ();
while (value != 0)
{
int remainder = value % -2;
value = value / -2;
if (remainder < 0)
{
remainder += 2;
value += 1;
}
Console.WriteLine (remainder);
result.Add(remainder);
}
return result.ToArray();
}
There is a method (attributed to Librik/Szudzik/Schröppel) that is much more efficient:
uint64_t negabinary(int64_t num) {
const uint64_t mask = 0xAAAAAAAAAAAAAAAA;
return (mask + num) ^ mask;
}
The conversion method and its reverse are described in more detail in this answer.
Here is some code that solves it and display the math behind it.
Some code taken from "Birender Singh"
#https://onlinegdb.com/xR1E5Cj7L
def neg2dec(arr):
n = 0
for i, num in enumerate(arr[::-1]):
n+= ((-2)**i)*num
return n
def dec2neg(num):
oldNum = num
if num == 0:
digits = ['0']
else:
digits = []
while num != 0:
num, remainder = divmod(num, -10)
if remainder < 0:
num, remainder = num + 1, remainder + 10
print(str(oldNum) + " = " + str(num) + " * -10 + " + str(remainder))
oldNum = num
digits.append(str(remainder))
return ''.join(digits[::-1])
print(dec2neg(-8374932))
Output:
-8374932 = 837494 * -10 + 8
837494 = -83749 * -10 + 4
-83749 = 8375 * -10 + 1
8375 = -837 * -10 + 5
-837 = 84 * -10 + 3
84 = -8 * -10 + 4
-8 = 1 * -10 + 2
1 = 0 * -10 + 1
12435148

Algorithm that converts numeric amount into English words

What is the most efficient way to convert numeric amount into English words
e.g. 12 to twelve
127 to one hundred twenty-seven
That didn't take long. This is an implementation written in Java.
http://snippets.dzone.com/posts/show/3685
Code
public class IntToEnglish {
static String[] to_19 = { "zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen",
"fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
static String[] tens = { "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
static String[] denom = { "",
"thousand", "million", "billion", "trillion", "quadrillion",
"quintillion", "sextillion", "septillion", "octillion", "nonillion",
"decillion", "undecillion", "duodecillion", "tredecillion", "quattuordecillion",
"sexdecillion", "septendecillion", "octodecillion", "novemdecillion", "vigintillion" };
public static void main(String[] argv) throws Exception {
int tstValue = Integer.parseInt(argv[0]);
IntToEnglish itoe = new IntToEnglish();
System.out.println(itoe.english_number(tstValue));
/* for (int i = 0; i < 2147483647; i++) {
System.out.println(itoe.english_number(i));
} */
}
// convert a value < 100 to English.
private String convert_nn(int val) throws Exception {
if (val < 20)
return to_19[val];
for (int v = 0; v < tens.length; v++) {
String dcap = tens[v];
int dval = 20 + 10 * v;
if (dval + 10 > val) {
if ((val % 10) != 0)
return dcap + "-" + to_19[val % 10];
return dcap;
}
}
throw new Exception("Should never get here, less than 100 failure");
}
// convert a value < 1000 to english, special cased because it is the level that kicks
// off the < 100 special case. The rest are more general. This also allows you to
// get strings in the form of "forty-five hundred" if called directly.
private String convert_nnn(int val) throws Exception {
String word = "";
int rem = val / 100;
int mod = val % 100;
if (rem > 0) {
word = to_19[rem] + " hundred";
if (mod > 0) {
word = word + " ";
}
}
if (mod > 0) {
word = word + convert_nn(mod);
}
return word;
}
public String english_number(int val) throws Exception {
if (val < 100) {
return convert_nn(val);
}
if (val < 1000) {
return convert_nnn(val);
}
for (int v = 0; v < denom.length; v++) {
int didx = v - 1;
int dval = new Double(Math.pow(1000, v)).intValue();
if (dval > val) {
int mod = new Double(Math.pow(1000, didx)).intValue();
int l = val / mod;
int r = val - (l * mod);
String ret = convert_nnn(l) + " " + denom[didx];
if (r > 0) {
ret = ret + ", " + english_number(r);
}
return ret;
}
}
throw new Exception("Should never get here, bottomed out in english_number");
}
}
One way to accomplish this would be to use look up tables. It would be somewhat brute force, but easy to setup. For instance, you could have the words for 0-99 in one table, then a table for tens, hundreds, thousands, etc. Some simple math will give you the index of the word you need from each table.
This is some old python code on my hard drive. There might be bugs but it should show the basic idea:
class Translator:
def transformInt(self, x):
translateNumbers(0,[str(x)])
def threeDigitsNumber(self,number):
snumber=self.twoDigitsNumber(number/100)
if number/100!=0:
snumber+=" hundred "
snumber+self.twoDigitsNumber(number)
return snumber+self.twoDigitsNumber(number)
def twoDigitsNumber(self,number):
snumber=""
if number%100==10:
snumber+="ten"
elif number%100==11:
snumber+="eleven"
elif number%100==12:
snumber+="twelve"
elif number%100==13:
snumber+="thirteen"
elif number%100==14:
snumber+="fourteen"
elif number%100==15:
snumber+="fifteen"
elif number%100==16:
snumber+="sixteen"
elif number%100==17:
snumber+="seventeen"
elif number%100==18:
snumber+="eighteen"
elif number%100==19:
snumber+="nineteen"
else:
if (number%100)/10==2:
snumber+="twenty-"
elif (number%100)/10==3:
snumber+="thirty-"
elif (number%100)/10==4:
snumber+="forty-"
elif (number%100)/10==5:
snumber+="fifty-"
elif (number%100)/10==6:
snumber+="sixty-"
elif (number%100)/10==7:
snumber+="seventy-"
elif (number%100)/10==8:
snumber+="eighty-"
elif (number%100)/10==9:
snumber+="ninety-"
if (number%10)==1:
snumber+="one"
elif (number%10)==2:
snumber+="two"
elif (number%10)==3:
snumber+="three"
elif (number%10)==4:
snumber+="four"
elif (number%10)==5:
snumber+="five"
elif (number%10)==6:
snumber+="six"
elif (number%10)==7:
snumber+="seven"
elif (number%10)==8:
snumber+="eight"
elif (number%10)==9:
snumber+="nine"
elif (number%10)==0:
if snumber!="":
if snumber[len(snumber)-1]=="-":
snumber=snumber[0:len(snumber)-1]
return snumber
def translateNumbers(self,counter,words):
if counter+1<len(words):
self.translateNumbers(counter+1,words)
else:
if counter==len(words):
return True
k=0
while k<len(words[counter]):
if (not (ord(words[counter][k])>47 and ord(words[counter][k])<58)):
break
k+=1
if (k!=len(words[counter]) or k==0):
return 1
number=int(words[counter])
from copy import copy
if number==0:
self.translateNumbers(counter+1,copy(words[0:counter]+["zero"]+words[counter+1:len(words)]))
self.next.append(copy(words[0:counter]+["zero"]+words[counter+1:len(words)]))
return 1
if number<10000:
self.translateNumbers(counter+1,copy(words[0:counter]
+self.seperatewords(self.threeDigitsNumber(number))
+words[counter+1:len(words)]))
self.next.append(copy(words[0:counter]
+self.seperatewords(self.threeDigitsNumber(number))
+words[counter+1:len(words)]))
if number>999:
snumber=""
if number>1000000000:
snumber+=self.threeDigitsNumber(number/1000000000)+" billion "
number=number%1000000000
if number>1000000:
snumber+=self.threeDigitsNumber(number/1000000)+" million "
number=number%1000000
if number>1000:
snumber+=self.threeDigitsNumber(number/1000)+" thousand "
number=number%1000
snumber+=self.threeDigitsNumber(number)
self.translateNumbers(counter+1,copy(words[0:counter]+self.seperatewords(snumber)
+words[counter+1:len(words)]))
self.next.append(copy(words[0:counter]+self.seperatewords(snumber)
+words[counter+1:len(words)]))
This solution doesn't attempt to account for trailing spaces, but it is pretty fast.
typedef const char* cstring;
using std::string;
using std::endl;
std::ostream& GetOnes(std::ostream &output, int onesValue)
{
cstring ones[] = { "zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine" };
output << ones[onesValue];
return output;
}
std::ostream& GetSubMagnitude(std::ostream &output, int subMagnitude)
{
cstring tens[] = { "zeroty", "ten", "twenty", "thirty", "fourty", "fifty",
"sixty", "seventy", "eighty", "ninety"};
if (subMagnitude / 100 != 0)
{
GetOnes(output, subMagnitude / 100) << " hundred ";
GetSubMagnitude(output, subMagnitude - subMagnitude / 100 * 100);
}
else
{
if (subMagnitude >= 20)
{
output << tens[subMagnitude / 10] << " ";
GetOnes(output, subMagnitude - subMagnitude / 10 * 10);
}
else if (subMagnitude >= 10)
{
cstring teens[] = { "ten", "eleven", "twelve", "thirteen",
"fourteen", "fifteen", "sixteen", "seventeen",
"eighteen", "nineteen" };
output << teens[subMagnitude - 10] << " ";
}
else
{
GetOnes(output, subMagnitude) << " ";
}
}
return output;
}
std::ostream& GetLongNumber(std::ostream &output, double input)
{
cstring magnitudes[] = {"", "hundred", "thousand", "million", "billion",
"trillion"};
double magnitudeTests[] = {1, 100.0, 1000.0, 1000000.0, 1000000000.0,
1000000000000.0 };
int magTestIndex = 0;
while (floor(input / magnitudeTests[magTestIndex++]) != 0);
magTestIndex -= 2;
if (magTestIndex >= 0)
{
double subMagnitude = input / magnitudeTests[magTestIndex];
GetSubMagnitude(output, (int)subMagnitude);
if (magTestIndex) {
output << magnitudes[magTestIndex] << " ";
double remainder = input - (floor(input /
magnitudeTests[magTestIndex]) *
magnitudeTests[magTestIndex]);
if (floor(remainder) > 0)
{
GetLongNumber(output, remainder);
}
}
}
else
{
output << "zero";
}
return output;
}
Start by solving 1-99, using a list of numbers for 1-20, and then 30, 40, ..., 90. Then add hundreds to get 1-999. Then use that routine to give the number of each power of 1,000 for as high as you want to go (I think the highest standard nomenclature is for decillion, which is 10^33).
One slight caveat is that it's a little tricky to get the blanks right in all cases if you're trying to start and end without an excess blank. The easy solution is to put a blank after every word, and then strip off the trailing blank when you're all done. If you try to be more precise while building the string, you're likely to end up with missing blanks or excess blanks.
.) make a library of all numbers & positions (e.g. 1 has other notation than 10, another than 100 etc)
.) make a list of exceptions (e.g. for 12) and be aware, that in your algorythm, the same exception are for 112, 1012 etc.
if you want even more speed, make a cached set of numbers that you need.
Note some rules:
Tens numbers (twenty, thirty, etc.) ending in y are followed by hyphens.
Teens are special (except 15-19, but they're still special).
Everything else is just some combination of digit place like "three thousand".
You can get the place of a number by using floor division of integers: 532 / 100 -> 5
This is part of Common Lisp!
Here's how GNU CLISP does it, and here's how CMUCL does it (easier to read, IMHO).
Doing a code search for "format million billion" will turn up lots of them.

EAN 8 : How to calculate checksum digit?

I need to create EAN 8 bar code programmatically.
I search an algorithm to calculate the checksum digit.
The algorithm is covered in this wikipedia article on EAN, note that EAN-8 is calculated in the same way as EAN-13.
Here's a worked example from http://www.barcodeisland.com/ean8.phtml :
Assuming we wish to encode the 7-digit message "5512345", we would calculate the checksum in the following manner:
Barcode 5 5 1 2 3 4 5
Odd/Even Pos? O E O E O E O
Weighting 3 1 3 1 3 1 3
Calculation 5*3 5*1 1*3 2*1 3*3 4*1 5*3
Weighted Sum 15 5 3 2 9 4 15
The total is 15 + 5 + 3 + 2 + 9 + 4 + 15 = 53. 7 must be added to 53 to produce a number evenly divisible by 10, thus the checksum digit is 7 and the completed bar code value is "55123457".
string code="55123457";
int sum1 = code[1] + code[3] + code[5]
int sum2 = 3 * (code[0] + code[2] + code[4] + code[6]);
int checksum_value = sum1 + sum2;
int checksum_digit = 10 - (checksum_value % 10);
if (checksum_digit == 10)
checksum_digit = 0;
int checkSum(const std::vector<int>& code) const
{
if (code.size() < 8) return false;
for( SIZE_T i = 0; i< code.size(); i++ )
{
if( code[i] < 0 ) return false;
}
int sum1 = code[1] + code[3] + code[5]
int sum2 = 3 * (code[0] + code[2] + code[4] + code[6]);
int checksum_value = sum1 + sum2;
int checksum_digit = 10 - (checksum_value % 10);
if (checksum_digit == 10) checksum_digit = 0;
return checksum_digit;
}
Sorry for re-opening
JAVA VERSION
public int checkSum(String code){
int val=0;
for(int i=0;i<code.length();i++){
val+=((int)Integer.parseInt(code.charAt(i)+""))*((i%2==0)?1:3);
}
int checksum_digit = 10 - (val % 10);
if (checksum_digit == 10) checksum_digit = 0;
return checksum_digit;
}
Reawakened with a C# version:
public static bool IsValidEan13(string eanBarcode)
{
return IsValidEan(eanBarcode, 13);
}
public static bool IsValidEan12(string eanBarcode)
{
return IsValidEan(eanBarcode, 12);
}
public static bool IsValidEan14(string eanBarcode)
{
return IsValidEan(eanBarcode, 14);
}
public static bool IsValidEan8(string eanBarcode)
{
return IsValidEan(eanBarcode, 8);
}
private static bool IsValidEan(string eanBarcode, int length)
{
if (eanBarcode.Length != length) return false;
var allDigits = eanBarcode.Select(c => int.Parse(c.ToString(CultureInfo.InvariantCulture))).ToArray();
var s = length%2 == 0 ? 3 : 1;
var s2 = s == 3 ? 1 : 3;
return allDigits.Last() == (10 - (allDigits.Take(length-1).Select((c, ci) => c*(ci%2 == 0 ? s : s2)).Sum()%10))%10;
}
Here is a MySQL version for EAN13:
SET #first12digits="123456789012";
SELECT #first12digits,
IF (
(#check:=10-MOD(
(CAST(SUBSTRING(#first12digits, 1, 1) AS DECIMAL))+
(CAST(SUBSTRING(#first12digits, 2, 1) AS DECIMAL) * 3)+
(CAST(SUBSTRING(#first12digits, 3, 1) AS DECIMAL))+
(CAST(SUBSTRING(#first12digits, 4, 1) AS DECIMAL) * 3)+
(CAST(SUBSTRING(#first12digits, 5, 1) AS DECIMAL))+
(CAST(SUBSTRING(#first12digits, 6, 1) AS DECIMAL) * 3)+
(CAST(SUBSTRING(#first12digits, 7, 1) AS DECIMAL))+
(CAST(SUBSTRING(#first12digits, 8, 1) AS DECIMAL) * 3)+
(CAST(SUBSTRING(#first12digits, 9, 1) AS DECIMAL))+
(CAST(SUBSTRING(#first12digits, 10, 1) AS DECIMAL) * 3)+
(CAST(SUBSTRING(#first12digits, 11, 1) AS DECIMAL))+
(CAST(SUBSTRING(#first12digits, 12, 1) AS DECIMAL) * 3)
,10)) = 10, 0, #check
) AS checkDigit;
There was a bug. If Calc result = 10 then check digit = 0.
Here below a better version for EAN14.
SET #first13digits="1234567890123";
SELECT #txCode13:=#first13digits,
#iCheck := (
10 - (
(
MID(#txCode13, 2, 1) +
MID(#txCode13, 4, 1) +
MID(#txCode13, 6, 1) +
MID(#txCode13, 8, 1) +
MID(#txCode13, 10, 1) +
MID(#txCode13, 12, 1)
) + (
MID(#txCode13, 1, 1) +
MID(#txCode13, 3, 1) +
MID(#txCode13, 5, 1) +
MID(#txCode13, 7, 1) +
MID(#txCode13, 9, 1) +
MID(#txCode13, 11, 1) +
MID(#txCode13, 13, 1)
) * 3 ) % 10
) AS iCheck,
#iCheckDigit := IF(#iCheck = 10, 0, #iCheck) AS checkDigit,
CONCAT(#t
xCode13, CAST(#iCheckDigit AS CHAR)) AS EAN14WithCheck
Here is the Java version for EAN13
private int calcChecksum(String first12digits) {
char[] char12digits = first12digits.toCharArray();
int[] ean13 = {1,3};
int sum = 0;
for(int i = 0 ; i<char12digits.length; i++){
sum += Character.getNumericValue(char12digits[i]) * ean13[i%2];
}
int checksum = 10 - sum%10;
if(checksum == 10){
checksum = 0;
}
return checksum;
}
class GTIN(object):
def __init__(self, barcode=''):
self.barcode = barcode
def __checkDigit(self, digits):
total = sum(digits) + sum(map(lambda d: d*2, digits[-1::-2]))
return (10 - (total % 10)) % 10
def validateCheckDigit(self, barcode=''):
barcode = (barcode if barcode else self.barcode)
if len(barcode) in (8,12,13,14) and barcode.isdigit():
digits = map(int, barcode)
checkDigit = self.__checkDigit( digits[0:-1] )
return checkDigit == digits[-1]
return False
def addCheckDigit(self, barcode=''):
barcode = (barcode if barcode else self.barcode)
if len(barcode) in (7,11,12,13) and barcode.isdigit():
digits = map(int, barcode)
return barcode + str(self.__checkDigit(digits))
return ''
Today I need a PHP version, I remember about this page and copy from the Java version. Thank you.
function getEAN13($txEan12)
{
$iVal=0;
for($i=0; $i<strlen($txEan12); $i++)
{
$iSingleCharVal = intval(substr($txEan12, $i, 1)); // extract value of one char
$iSingleCharMult = $iSingleCharVal * ($i%2==0 ? 1 : 3); // calculate depending from position
$iVal+= $iSingleCharMult; // sum
}
$iCheckDigit = 10 - ($iVal % 10);
if ($iCheckDigit == 10) $iCheckDigit = 0;
return $txEan12 . $iCheckDigit;
}
Java Version:
It works perfectly
public static int checkSum(String code){
int val=0;
for(int i=0; i<code.length()-1; i++){
val+=((int)Integer.parseInt(code.charAt(i)+""))*((i%2==0)?1:3);
}
int checksum_digit = (10 - (val % 10)) % 10;
return checksum_digit;
}
Python EAN13 check-digit calculation based on Najoua Mahi's Java function:
def generateEAN13CheckDigit(self, first12digits):
charList = [char for char in first12digits]
ean13 = [1,3]
total = 0
for order, char in enumerate(charList):
total += int(char) * ean13[order % 2]
checkDigit = 10 - total % 10
if (checkDigit == 10):
return 0
return checkDigit
This works on both EAN 13 and EAN8:
public static String generateEAN(String barcode) {
int first = 0;
int second = 0;
if(barcode.length() == 7 || barcode.length() == 12) {
for (int counter = 0; counter < barcode.length() - 1; counter++) {
first = (first + Integer.valueOf(barcode.substring(counter, counter + 1)));
counter++;
second = (second + Integer.valueOf(barcode.substring(counter, counter + 1)));
}
second = second * 3;
int total = second + first;
int roundedNum = Math.round((total + 9) / 10 * 10);
barcode = barcode + String.valueOf(roundedNum - total);
}
return barcode;
}
This is a code I wrote in VFP (Visual FoxPro 9), for both EAN-8 and EAN-13
Lparameters lcBarcode,llShowErrorMessage
If Vartype(m.lcBarcode)<>'C'
If m.llShowErrorMessage
MessageBox([Type of parameter is incorect!],0+16,[Error Message])
EndIf
Return .f.
EndIf
If Len(Chrtran(Alltrim(m.lcBarcode),[0123456789],[]))>0
If m.llShowErrorMessage
MessageBox([Provided barcode contains invalid characters!],0+16,[Error Message])
EndIf
Return .f.
EndIf
If Len(Alltrim(m.lcBarcode))=0
If m.llShowErrorMessage
MessageBox([The length of provided barcode is 0 (zero)!],0+16,[Error Message])
EndIf
Return .f.
EndIf
If !InList(Len(Alltrim(m.lcBarcode)),8,13)
If m.llShowErrorMessage
MessageBox([Provided barcode is not an EAN-8 or EAN-13 barcode!],0+16,[Error Message])
EndIf
Return .f.
EndIf
Local lnCheck as Integer, lnSum as Integer, lnOriginalCheck as Integer,jj as Integer
jj=0
lnSum=0
m.lnOriginalCheck = Cast(Right(Alltrim(m.lcBarcode),1) as Integer)
m.lcBarcode = Left(Alltrim(m.lcBarcode),Len(Alltrim(m.lcBarcode))-1)
For ii = Len(m.lcBarcode) to 1 step -1
jj=jj+1
lnSum = lnSum + Cast(Substr(m.lcBarcode,ii,1) as Integer) * Iif(Mod(jj,2)=0,1,3)
Next
lnCheck = 10-Mod(lnSum,10)
lnCheck = Iif(lnCheck =10,0,lnCheck)
Return (lnCheck = lnOriginalCheck)
JavaScript version for EAN-8 and EAN-13
function checksum(code) {
const sum = code.split('').reverse().reduce((sum, char, idx) => {
let digit = Number.parseInt(char);
let weight = (idx + 1) % 2 === 0 ? 1 : 3;
let partial = digit * weight;
return sum + partial;
}, 0);
const remainder = sum % 10;
const checksum = remainder ? (10 - remainder) : 0;
return checksum;
}
Mini Javascript Version
function checksum(code){
return (10 - (code.split('').reduce((s, e, i) => { return s + parseInt(e) * ((i%2==0)?1:3) },0) % 10)) % 10;
}
=INT(CONCAT([#Code],MOD(10 - MOD((MID([#Code], 2, 1) + MID([#Code], 4, 1) + MID([#Code], 6, 1)) + (3*(MID([#Code], 1, 1) + MID([#Code], 3, 1) + MID([#Code], 5, 1) + MID([#Code], 7, 1))),10), 10)))
The above formula will calculate the check character without the need to use a macro or change to XLSM.
Note: Only works for EAN-8.

Resources