[2, 6, 13, 99, 27].include?(2) works well for checking if the array includes one value. But what if I want to check if an array includes any one from a list of multiple values? Is there a shorter way than doing Array.include?(a) or Array.include?(b) or Array.include?(c) ...?
You could take the intersection of two arrays, and see if it's not empty:
([2, 6, 13, 99, 27] & [2, 6]).any?
You can use the Enumerable#any? method with a code block to test for inclusion of multiple values. For example, to check for either 6 or 13:
[2, 6, 13, 99, 27].any? { |i| [6, 13].include? i }
I was interested in seeing how these various approach compared in performance, not so much for the problem at hand, but more for general comparisons of array vs set intersection, array vs set include? and include? vs index for arrays. I will edit to add other methods that are suggested, and let me know if you'd like to see different benchmark parameters.
I for one would like to see more benchmarking of SO answers done. It's not difficult or time-consuming, and it can provide useful insights. I find most of the time is preparing the test cases. Notice I've put the methods to be tested in a module, so if another method is to be benchmarked, I need only add that method to the module.
Methods compared
module Methods
require 'set'
def august(a,b) (a&b).any? end
def gnome_inc(a,b) a.any? { |i| b.include? i } end
def gnome_ndx(a,b) a.any? { |i| b.index i } end
def gnome_set(a,b) bs=b.to_set; a.any? { |i| bs.include? i } end
def vii_stud(a,b) as, bs = Set.new(a), Set.new(b); as.intersect?(bs) end
end
include Methods
#methods = Methods.instance_methods(false)
#=> [:august, :gnome_inc, :gnome_ndx, :gnome_set, :vii_stud]
Test data
def test_data(n,m,c,r)
# n: nbr of elements in a
# m: nbr of elements in b
# c: nbr of elements common to a & b
# r: repetitions
r.times.each_with_object([]) { |_,a|
a << [n.times.to_a.shuffle, [*(n-c..n-c-1+m)].shuffle] }
end
d = test_data(10,4,2,2)
#=> [[[7, 8, 0, 3, 2, 9, 1, 6, 5, 4], [11, 10, 9, 8]],
# [[2, 6, 3, 4, 7, 8, 0, 9, 1, 5], [ 9, 11, 10, 8]]]
# Before `shuffle`, each of the two elements is:
#=> [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [8, 9, 10, 11]]
def compute(d, m)
d.each_with_object([]) { |(a,b),arr| arr << send(m, a, b) }
end
compute(d, :august)
#=> [true, true]
Confirm methods return the same values
d = test_data(1000,100,10,3)
r0 = compute(d, #methods.first)
puts #methods[1..-1].all? { |m| r0 == compute(d, m) }
#=> true
Benchmark code
require 'benchmark'
#indent = methods.map { |m| m.to_s.size }.max
def test(n, m, c, r, msg)
puts "\n#{msg}"
puts "n = #{n}, m = #{m}, overlap = #{c}, reps = #{r}"
d = test_data(n, m, c, r)
Benchmark.bm(#indent) do |bm|
#methods.each do |m|
bm.report m.to_s do
compute(d, m)
end
end
end
end
Tests
n = 100_000
m = 1000
test(n, m, 0, 1, "Zero overlap")
test(n, m, 1000, 1, "Complete overlap")
test(n, m, 1, 20, "Overlap of 1")
test(n, m, 5, 20, "Overlap of 5")
test(n, m, 10, 20, "Overlap of 10")
test(n, m, 20, 20, "Overlap of 20")
test(n, m, 50, 20, "Overlap of 50")
test(n, m, 100, 20, "Overlap of 100")
Zero overlap
n = 100000, m = 1000, overlap = 0, reps = 1
user system total real
august 0.010000 0.000000 0.010000 ( 0.005491)
gnome_inc 4.480000 0.010000 4.490000 ( 4.500531)
gnome_ndx 0.810000 0.000000 0.810000 ( 0.822412)
gnome_set 0.030000 0.000000 0.030000 ( 0.031668)
vii_stud 0.080000 0.010000 0.090000 ( 0.084283)
Complete overlap
n = 100000, m = 1000, overlap = 1000, reps = 1
user system total real
august 0.000000 0.000000 0.000000 ( 0.005841)
gnome_inc 0.010000 0.000000 0.010000 ( 0.002521)
gnome_ndx 0.000000 0.000000 0.000000 ( 0.000350)
gnome_set 0.000000 0.000000 0.000000 ( 0.000655)
vii_stud 0.090000 0.000000 0.090000 ( 0.097850)
Overlap of 1
n = 100000, m = 1000, overlap = 1, reps = 20
user system total real
august 0.110000 0.000000 0.110000 ( 0.116276)
gnome_inc 61.790000 0.100000 61.890000 ( 62.058320)
gnome_ndx 10.100000 0.020000 10.120000 ( 10.144649)
gnome_set 0.360000 0.000000 0.360000 ( 0.357878)
vii_stud 1.450000 0.050000 1.500000 ( 1.501705)
Overlap of 5
n = 100000, m = 1000, overlap = 5, reps = 20
user system total real
august 0.110000 0.000000 0.110000 ( 0.113747)
gnome_inc 16.550000 0.050000 16.600000 ( 16.728505)
gnome_ndx 2.470000 0.000000 2.470000 ( 2.475111)
gnome_set 0.100000 0.000000 0.100000 ( 0.099874)
vii_stud 1.630000 0.060000 1.690000 ( 1.703650)
Overlap of 10
n = 100000, m = 1000, overlap = 10, reps = 20
user system total real
august 0.110000 0.000000 0.110000 ( 0.112674)
gnome_inc 10.090000 0.020000 10.110000 ( 10.131339)
gnome_ndx 1.470000 0.000000 1.470000 ( 1.478400)
gnome_set 0.060000 0.000000 0.060000 ( 0.062762)
vii_stud 1.430000 0.050000 1.480000 ( 1.476961)
Overlap of 20
n = 100000, m = 1000, overlap = 20, reps = 20
user system total real
august 0.100000 0.000000 0.100000 ( 0.108350)
gnome_inc 4.020000 0.000000 4.020000 ( 4.026290)
gnome_ndx 0.660000 0.010000 0.670000 ( 0.663001)
gnome_set 0.030000 0.000000 0.030000 ( 0.024606)
vii_stud 1.380000 0.050000 1.430000 ( 1.437340)
Overlap of 50
n = 100000, m = 1000, overlap = 50, reps = 20
user system total real
august 0.120000 0.000000 0.120000 ( 0.121278)
gnome_inc 2.170000 0.000000 2.170000 ( 2.236737)
gnome_ndx 0.310000 0.000000 0.310000 ( 0.308336)
gnome_set 0.020000 0.000000 0.020000 ( 0.015326)
vii_stud 1.220000 0.040000 1.260000 ( 1.259828)
Overlap of 100
n = 100000, m = 1000, overlap = 100, reps = 20
user system total real
august 0.110000 0.000000 0.110000 ( 0.112739)
gnome_inc 0.720000 0.000000 0.720000 ( 0.712265)
gnome_ndx 0.100000 0.000000 0.100000 ( 0.105420)
gnome_set 0.010000 0.000000 0.010000 ( 0.009398)
vii_stud 1.400000 0.050000 1.450000 ( 1.447110)
Simple way:
([2, 6] - [2, 6, 13, 99, 27]).empty?
I extend Array with these:
class Array
def include_exactly?(values)
self.include_all?(values) && (self.length == values.length)
end
def include_any?(values)
values.any? {|value| self.include?(value)}
end
def include_all?(values)
values.all? {|value| self.include?(value)}
end
def exclude_all?(values)
values.all? {|value| self.exclude?(value)}
end
end
One of my favourite methods of doing that in specs is to convert an array and a value to the Set and check it via #superset? & #subset? methods.
For example:
[1, 2, 3, 4, 5].to_set.superset?([1, 2, 3].to_set) # true
[1, 2, 3].to_set.subset?([1, 2, 3, 4, 5].to_set) # true
[1, 2].to_set.subset?([1, 2].to_set) # true
[1, 2].to_set.superset?([1, 2].to_set) # true
However, being a set means that all values in a collection are unique, so it may not always be appropriate:
[1, 1, 1, 1, 1].to_set.subset? [1, 2].to_set # true
To avoid calling .to_set every time I usually define a matcher for that:
it 'returns array of "shown" proposals' do
expect(body_parsed.first.keys).to be_subset_of(hidden_prop_attrs)
end
In my humble opinion, being a superset or a subset is just more readable than doing:
([1, 2, 3] & [1, 2]).any?
However, converting an array to a set may be a less performant. Tradeoffs ¯\_(ツ)_/¯
If you want to check two elements are present in the array.
2.4.1 :221 > ([2, 6, 13, 99, 27] & [2, 6]).many?
=> true
require 'set'
master = Set.new [2, 6, 13, 99, 27]
data = Set.new [27, -3, -4]
#puts data.subset?(master) ? 'yes' : 'no' #per #meager comment
puts data.intersect?(master) ? 'yes' : 'no'
--output:--
yes
This works - if any of the value matches:
arr = [2, 6, 13, 99, 27]
if (arr - [2, 6]).size < arr.size
puts 'element match found'
else
puts 'element not found'
end
I extend Array with these:
class Array
def include_any?(arr)
(self & Array(arr)).any?
end
end
Given a set with n elements, I need to find all the partitions of that set where there are k subsets of almost equal size.
For example, for a set with 7 elements and 3 subsets I only want partitions where there are two subsets with 2 elements each and one subset with 3 elements. I do not want a partition that has subsets with 1, 2, and 4 elements.
Put another way, there are 877 possible partitions for a set of 7 elements, but I'm only interested in the 105 (?) partitions that are made up subsets with 2/2/3 elements:
In reality n is around 35, which means that there are approximately 2.81 * 1027 partitions, "only" 8,338,573,669,964,101 partitions with three subsets. As such, I can't possibly calculate them all and wade through to find the ones that I want.
What's an algorithm for calculating just the partitions of interest? (Not the number of partitions, but that actual members in each sub-set for each partition.)
Here's a good way to do it, generating all possibilities only once by taking care of keeping them sorted, as well as a quick way to calculate lazily the number of answers:
def enum(n, k)
# Pick smaller_size items from the list, repeat smaller_n times
# then pick larger_size items from the list, repeat larger_n times.
smaller_n = n.div k
larger_times = n % k
smaller_times = k - larger_times
larger_n = smaller_n + 1
return to_enum(:enum, n, k) { calc_size(n, smaller_n, smaller_times, larger_n, larger_times) } unless block_given?
all = [*1..n]
# split all into one subset to group with the smaller_n and another
# to group with the larger_n.
all.combination(smaller_n * smaller_times).each do |smaller|
larger = all - smaller
subdivide(smaller, smaller_n) do |small|
subdivide(larger, larger_n) do |large|
yield [*small, *large]
end
end
end
end
# Subdivides elems into groups of n, keeping the elements sorted
# and generating only the sorted such combinations.
def subdivide(elems, n)
return yield [] if elems.empty?
# No choice for the first element, because we want to keep things sorted.
first, *rest = elems
rest.combination(n - 1).each do |comb|
remain = rest - comb
subdivide(remain, n) do |sub|
yield [[first, *comb], *sub]
end
end
end
def calc_size(n, smaller_n, smaller_times, larger_n, larger_times)
all = [
smaller_times.times.map do |i|
Array.new(n - i*smaller_n).combination(smaller_n)
end,
larger_times.times.map do |i|
Array.new(n - smaller_times*smaller_n - i*larger_n).combination(larger_n)
end
]
# Multiply everything, but divide by the number of symmetries, because
# we don't want to distinguish (1,2), (3,4), ... from (3,4), (1,2), ...
all.map do |enums|
enums.map(&:size).inject(1, :*) / enums.permutation.size
end.inject(:*)
end
p enum(7, 3).size # => 105 (instant)
p enum(7, 3).first(5) # => [[[1, 2], [3, 4], [5, 6, 7]],
# [[1, 3], [2, 4], [5, 6, 7]],
# [[1, 4], [2, 3], [5, 6, 7]],
# [[1, 2], [3, 5], [4, 6, 7]],
# [[1, 3], [2, 5], [4, 6, 7]]]
p enum(7, 3).count # => 105 (quick)
p enum(35, 3).size # => 564121960420200 (instant)
p enum(35, 3).first(2) # => [[[1..11], [12..23], [24..35]],
# [[1..11], [12..22, 24], [23, 25..35]]]
p enum(35, 3).count # => will take forever, should return 564121960420200
Note: Just for kicks, this can also calculate the size lazily by building some enumerators and using size, without iterating through them. This will work in Ruby 2.0+ only though, as it require Enumerator#size.
For added fun:
require 'with_progress'
enum(16, 3).with_progress.count # => enjoy!
Important observation: Under the condition that the partition sizes must not differ by more than 1, the multiset of partition sizes is uniquely defined. We have (n % k) parts of size ceil(n / k) and (k - n % k) partitions of size floor(n / k).
So let's fix the partition sizes and just enumerate all possible subsets for all partitions:
#n = n = 7
#k = k = 3
#used = n.times.map { false }
# #sizes = [2,2,3] for n = 7, k = 3
#sizes = (k - n % k).times.map { n / k } + (n % k).times.map { (n + k - 1) / k }
#parts = #sizes.map { |i| [0]*i }
def enumerate_part(i, j, lo, y)
# i = index of the current part
# j = index of the current number in the current part
# lo = lower bound for numbers to chose
# y = output yielder
if i == #parts.size
y << #parts.map(&:dup)
return
end
if j == #sizes[i]
enumerate_part(i + 1, 0, #sizes[i + 1] == #sizes[i] ? #parts[i][0] : 1, y)
return
end
(lo..#n).each do |x|
next if #used[x]
#used[x] = true
#parts[i][j] = x
enumerate_part(i, j + 1, x + 1, y)
#used[x] = false
end
end
results = Enumerator.new { |y| enumerate_part(0,0,1,y) }
puts results.count
Note that for the partitions of equal size, I assign them increasing minima to achieve uniqueness. This can cause a bit of backtracking (at most 2 levels), so it's not optimal. We might want to avoid assigning numbers where we already know that the lower bound will be too high for the next, equally sized partition(s). It gets a bit complex at that point, so I kept it simple. It is definitely possible to improve this.
It uses only O(n) space because I mutate global state instead of copying stuff around (yes, I yield a defensive copy to the enumerator, but that is not necessary if you process the results immediately in every iteration). Maybe not the most elegant solution, but the goal is to achieve good speed.
Output:
105
It is no fun running this with larger n (beyond 20). I blame Ruby for that, but fortunately this particular implementation will look very similar in almost any other language.
UPDATE: Just for fun, I improved the algorithm to be completely linear in the output size (apart from the potential n factor due to maintaining the set of leftover numbers). It should be several times faster that way:
def dfs(i, j, lo, eq, sz, state)
parts, y, n, k, left = state
if i == k
y << parts
return
end
if j == sz
mid = i+1 == n % k
neweq, newsz = mid ? [k - i - 1, sz-1] : [eq - 1, sz]
dfs(i+1, 0, mid ? 1 : parts[i][0], neweq, newsz, state)
return
end
higher = ((j == 0) ? (eq * sz - j - 1) : (sz - j - 1))
left.drop(higher).each_with_index do |x,idx|
break if x < lo
parts[i][j] = x
left.delete_at(idx + higher)
dfs(i, j + 1, x + 1, eq, sz, state)
left.insert(idx + higher, x)
end
end
def niklas(n, k)
parts = k.times.map{|i| [0]*(i < n%k ? (n+k-1)/k : n/k) }
left = n.downto(1).to_a
results = Enumerator.new { |y|
dfs(0, 0, 1, (n % k == 0) ? k : n % k, (n + k - 1) / k, [parts, y, n, k, left])
}
cnt = results.count
puts "count = #{cnt} != #{#sz}" unless cnt == #sz
end
I believe this solution (referred to as 'cary1' in the benchmarks) will do the job:
Code
require 'set'
def doit(items, subs, so_far=[], combos=Set.new)
return combos << (so_far+[items]).to_set if subs.size == 1
items.combination(subs.first){|c|doit(items-c,subs[1..-1],so_far+[c],combos)}
combos
end
Demo
doit([1,2,3,4,5], [2,3]).to_a.map(&:to_a) # 10 combinations
#=> [[[1, 2], [3, 4, 5]], [[1, 3], [2, 4, 5]], [[1, 4], [2, 3, 5]],
# [[1, 5], [2, 3, 4]], [[2, 3], [1, 4, 5]], [[2, 4], [1, 3, 5]],
# [[2, 5], [1, 3, 4]], [[3, 4], [1, 2, 5]], [[3, 5], [1, 2, 4]],
# [[4, 5], [1, 2, 3]]]
doit([1,2,3,4,5], [2,1,2]).to_a.map(&:to_a) # 15 combinations
#=> [[[1, 2], [3], [4, 5]], [[1, 2], [4], [3, 5]], [[1, 2], [5], [3, 4]],
# [[1, 3], [2], [4, 5]], [[1, 3], [4], [2, 5]], [[1, 3], [5], [2, 4]],
# [[1, 4], [2], [3, 5]], [[1, 4], [3], [2, 5]], [[1, 4], [5], [2, 3]],
# [[1, 5], [2], [3, 4]], [[1, 5], [3], [2, 4]], [[1, 5], [4], [2, 3]],
# [[2, 3], [1], [4, 5]], [[2, 4], [1], [3, 5]], [[2, 5], [1], [3, 4]]]
doit([1,2,3,4,5,6,7], [2,2,3]).to_a.map(&:to_a) # 105 combinations
# => [[[1, 2], [3, 4], [5, 6, 7]], [[1, 2], [3, 5], [4, 6, 7]],
# [[1, 2], [3, 6], [4, 5, 7]], [[1, 2], [3, 7], [4, 5, 6]],
# [[1, 2], [4, 5], [3, 6, 7]], [[1, 2], [4, 6], [3, 5, 7]],
# ...
# [[4, 5], [6, 7], [1, 2, 3]], [[4, 6], [5, 7], [1, 2, 3]],
# [[4, 7], [5, 6], [1, 2, 3]]]
Explanaton
I took the array subs as a given, as constructing subs seems like a separate and not very difficult problem.
To avoid [[1],[2,3],[4]] and [[4],[2,3],[1]] from both being included in the result, I converted both of these to sets. When attempting to add these to a set of results, only the first would be added. I used sets because nothing was said of the elements of the given set, specifically whether each pair could be compared with <=>. Depending on how the results are to be used, it may be sufficient to leave the result as a set of sets. Alternatively, the set of sets can easily be converted to an array of arrays, as I have done in the "Demo" section.
If subs in doit() is not given, it can be be created like so, before calling doit:
def divide_up(items, k)
m, r = items.size.divmod(k)
puts "m = #{m}, r = #{r}"
doit(items, Array.new(k-r, m) + Array.new(r, m+1))
end
Edit: I made a small change to have the method return a set of sets, thinking these probably could be used directly. If not, they can easily be converted to an array of arrays, as I have done in the demo section.
Here's an approach that avoids the unnecessary enumerations that were performed by my first solution. This solution is referred to as 'cary2' in the benchmarks.
Code
def doit(list, group_sizes)
#group_sizes = group_sizes
#combos = []
#empty_group_filled_by_size = group_sizes.uniq.product([false]).to_h
recurse(list, group_sizes.map { |sub| [] })
#combos
end
def recurse(remaining_items_to_assign, assigned_so_far)
empty_group_filled_by_size = #empty_group_filled_by_size.dup
assigned_so_far.each_with_index do |group, ndx|
next if group.size == #group_sizes[ndx] # all positions in group allocated
if group.empty?
group_size = #group_sizes[ndx]
empty_group_filled_by_size[group_size] ? next :
empty_group_filled_by_size[group_size] = true
end
assigned_so_far[ndx] << remaining_items_to_assign.first
if remaining_items_to_assign.size == 1
#combos << assigned_so_far.map { |group| group.dup } # deep copy
else
recurse(remaining_items_to_assign[1..-1], assigned_so_far)
end
assigned_so_far[ndx].pop
end
end
Explanation
Suppose we have 12 items to allocate to three bins of size 2 (B2a, B2b and B2c) and two bins of size 3 (B3a and B3b). Items are sequentially assigned to bins.
The first item can be assigned to one of the size 2 bins (say B2a) or to one of the size 3 bins (say B3a). It cannot be sequentially assigned to each empty bin of a given size, as that would result in double-counting. For each given item being assigned to bins, the array empty_group_filled_by_size is used to keep track of whether the item has already been assigned to the first empty bin of a particular size.
The allocation possibilities for the second item depend on which bin item 1 has assigned to. If item 1 has been assigned to B2a, item 2 can be assigned to B2a, to one of the other two (still empty) size 2 bins (say B2b) or to to one of the two (empty) size 3 bins (say B3a). If item 1 has been assigned to B3a, item 2 can be assigned to any one of the three size 2 bins (say B2a), to B3a or to B3b .
These assignments continue until there remains only one item to assign, at which time there is only one bin that is not full, and it has space for just one more item. After saving that assignment of items to bins in an array (#combos), the recursion backtracks to consider other possibilities.
I thought it might be interesting to benchmark the three solutions that have been been contributed to date. If anyone wants to see additional tests, please let me know and I'll add them.
Niklas1
def enumerate_part(i, j, lo, y)
# i = index of the current part
# j = index of the current number in the current part
# lo = lower bound for numbers to chose
# y = output yielder
if i == #parts.size
y << #parts.map(&:dup)
return
end
if j == #sizes[i]
enumerate_part(i + 1, 0, #sizes[i + 1] == #sizes[i] ? #parts[i][0] : 1, y)
return
end
(lo..#n).each do |x|
next if #used[x]
#used[x] = true
#parts[i][j] = x
enumerate_part(i, j + 1, x + 1, y)
#used[x] = false
end
end
def niklas1(n, k)
#n, #k = n, k
#used = n.times.map { false }
# #sizes = [2,2,3] for n = 7, k = 3
#sizes = (k - n % k).times.map { n / k } + (n % k).times.map { (n + k - 1) / k }
#parts = #sizes.map { |i| [0]*i }
results = Enumerator.new { |y| enumerate_part(0,0,1,y) }
cnt = results.count
puts "count = #{cnt} != #{#sz}" unless cnt == #sz
end
Niklas2
def dfs(i, j, lo, eq, sz, state)
parts, y, n, k, left = state
if i == k
y << parts
return
end
if j == sz
mid = i+1 == n % k
neweq, newsz = mid ? [k - i - 1, sz-1] : [eq - 1, sz]
dfs(i+1, 0, mid ? 1 : parts[i][0], neweq, newsz, state)
return
end
higher = ((j == 0) ? (eq * sz - j - 1) : (sz - j - 1))
left.drop(higher).each_with_index do |x,idx|
break if x < lo
parts[i][j] = x
left.delete_at(idx + higher)
dfs(i, j + 1, x + 1, eq, sz, state)
left.insert(idx + higher, x)
end
end
def niklas2(n, k)
parts = k.times.map{|i| [0]*(i < n%k ? (n+k-1)/k : n/k) }
left = n.downto(1).to_a
results = Enumerator.new { |y|
dfs(0, 0, 1, (n % k == 0) ? k : n % k, (n + k - 1) / k, [parts, y, n, k, left])
}
cnt = results.count
puts "count = #{cnt} != #{#sz}" unless cnt == #sz
end
Marc-André
def enum(n, k)
# Pick smaller_size items from the list, repeat smaller_n times
# then pick larger_size items from the list, repeat larger_n times.
smaller_n = n.div k
larger_times = n % k
smaller_times = k - larger_times
larger_n = smaller_n + 1
return to_enum(:enum, n, k) { calc_size(n, smaller_n, smaller_times, larger_n, larger_times) } unless block_given?
all = [*1..n]
# split all into one subset to group with the smaller_n and another
# to group with the larger_n.
all.combination(smaller_n * smaller_times).each do |smaller|
larger = all - smaller
subdivide(smaller, smaller_n) do |small|
subdivide(larger, larger_n) do |large|
yield [*small, *large]
end
end
end
end
# Subdivides elems into groups of n, keeping the elements sorted
# and generating only the sorted such combinations.
def subdivide(elems, n)
return yield [] if elems.empty?
# No choice for the first element, because we want to keep things sorted.
first, *rest = elems
rest.combination(n - 1).each do |comb|
remain = rest - comb
subdivide(remain, n) do |sub|
yield [[first, *comb], *sub]
end
end
end
def calc_size(n, smaller_n, smaller_times, larger_n, larger_times)
all = [
smaller_times.times.map do |i|
Array.new(n - i*smaller_n).combination(smaller_n)
end,
larger_times.times.map do |i|
Array.new(n - smaller_times*smaller_n - i*larger_n).combination(larger_n)
end
]
# Multiply everything, but divide by the number of symmetries, because
# we don't want to distinguish (1,2), (3,4), ... from (3,4), (1,2), ...
all.map do |enums|
enums.map(&:size).inject(1, :*) / enums.permutation.size
end.inject(:*)
end
def marc_andre(n, k)
a = enum(n, k).to_a
a.size
end
Cary1
require 'set'
def cary1(n, k)
m, r = n.divmod(k)
cnt = doit([*1..n], Array.new(k-r, m) + Array.new(r, m+1)).to_a.map(&:to_a).size
puts "count = #{cnt} != #{#sz}" unless cnt == #sz
end
def doit(items, subs, so_far=[], combos=Set.new)
return combos << (so_far+[items]).to_set if subs.size == 1
items.combination(subs.first){|c|doit(items-c,subs[1..-1],so_far+[c],combos)}
combos
end
Cary2
def cary2(n, k)
m, r = n.divmod(k)
cnt = doit2([*1..n], Array.new(k-r, m) + Array.new(r, m+1)).to_a.map(&:to_a).size
puts "count = #{cnt} != #{#sz}" unless cnt == #sz
end
def doit2(list, group_sizes)
#group_sizes = group_sizes
#combos = []
#empty_group_filled_by_size = group_sizes.uniq.product([false]).to_h
recurse(list, group_sizes.map { |sub| [] })
#combos
end
def recurse(remaining_items_to_assign, so_far)
empty_group_filled_by_size = #empty_group_filled_by_size.dup
so_far.each_with_index do |group, ndx|
next if group.size == #group_sizes[ndx] # all positions in group allocated
if group.empty?
group_size = #group_sizes[ndx]
empty_group_filled_by_size[group_size] ? next :
empty_group_filled_by_size[group_size] = true
end
so_far[ndx] << remaining_items_to_assign.first
if remaining_items_to_assign.size == 1
#combos << so_far.map { |group| group.dup } # deep copy
else
recurse(remaining_items_to_assign[1..-1], so_far)
end
so_far[ndx].pop
end
end
Benchmark code
require 'benchmark'
def bench_it(n, k, iterations)
puts "\nn = #{n}, k = #{k}, size = #{#sz = marc_andre(n,k)}\n"
Benchmark.bm(%w[Niklas Marc-André Cary].map(&:size).max) do |bm|
bm.report('Niklas1') do
iterations.times do
niklas1(n, k)
end
end
bm.report('Niklas2') do
iterations.times do
niklas2(n, k)
end
end
bm.report('Marc-André') do
iterations.times do
marc_andre(n, k)
end
end
bm.report('Cary1') do
iterations.times do
cary1(n, k)
end
end
bm.report('Cary2') do
iterations.times do
cary2(n, k)
end
end
end
end
iterations = 1
bench_it( 7, 3, iterations)
bench_it(10, 2, iterations)
bench_it(10, 3, iterations)
bench_it(10, 4, iterations)
bench_it(13, 2, iterations)
bench_it(13, 3, iterations)
bench_it(13, 4, iterations)
bench_it(13, 5, iterations)
bench_it(16, 2, iterations)
bench_it(16, 3, iterations)
bench_it(16, 4, iterations)
bench_it(18, 2, iterations)
bench_it(18, 3, iterations)
bench_it(20, 2, iterations)
Results
The results are best read with this clip playing in the background.
n = 7, k = 3, size = 105
user system total real
Niklas1 0.000000 0.000000 0.000000 ( 0.001131)
Niklas2 0.000000 0.000000 0.000000 ( 0.000595)
Marc-André 0.000000 0.000000 0.000000 ( 0.000911)
Cary1 0.000000 0.000000 0.000000 ( 0.003241)
Cary2 0.000000 0.000000 0.000000 ( 0.000922)
n = 10, k = 2, size = 126
user system total real
Niklas1 0.010000 0.000000 0.010000 ( 0.004986)
Niklas2 0.000000 0.000000 0.000000 ( 0.001031)
Marc-André 0.000000 0.000000 0.000000 ( 0.000880)
Cary1 0.000000 0.000000 0.000000 ( 0.003850)
Cary2 0.000000 0.000000 0.000000 ( 0.001607)
n = 10, k = 3, size = 2100
user system total real
Niklas1 0.040000 0.000000 0.040000 ( 0.042930)
Niklas2 0.010000 0.000000 0.010000 ( 0.012296)
Marc-André 0.020000 0.000000 0.020000 ( 0.017632)
Cary1 0.080000 0.000000 0.080000 ( 0.081082)
Cary2 0.020000 0.000000 0.020000 ( 0.018739)
n = 10, k = 4, size = 6300
user system total real
Niklas1 0.090000 0.000000 0.090000 ( 0.094029)
Niklas2 0.030000 0.000000 0.030000 ( 0.030908)
Marc-André 0.040000 0.000000 0.040000 ( 0.038247)
Cary1 0.510000 0.000000 0.510000 ( 0.512819)
Cary2 0.060000 0.000000 0.060000 ( 0.059061)
n = 13, k = 2, size = 1716
user system total real
Niklas1 0.210000 0.000000 0.210000 ( 0.219898)
Niklas2 0.020000 0.000000 0.020000 ( 0.017828)
Marc-André 0.020000 0.000000 0.020000 ( 0.015917)
Cary1 0.030000 0.000000 0.030000 ( 0.029272)
Cary2 0.020000 0.000000 0.020000 ( 0.022058)
n = 13, k = 3, size = 45045
user system total real
Niklas1 1.480000 0.010000 1.490000 ( 1.484895)
Niklas2 0.350000 0.000000 0.350000 ( 0.343872)
Marc-André 0.470000 0.010000 0.480000 ( 0.493646)
Cary1 1.890000 0.010000 1.900000 ( 1.895684)
Cary2 0.520000 0.010000 0.530000 ( 0.531843)
n = 13, k = 4, size = 200200
user system total real
Niklas1 4.160000 0.070000 4.230000 ( 4.225072)
Niklas2 1.210000 0.000000 1.210000 ( 1.216814)
Marc-André 2.170000 0.030000 2.200000 ( 2.203629)
Cary1 29.930000 0.580000 30.510000 ( 30.545276)
Cary2 1.720000 0.040000 1.760000 ( 1.757895)
n = 13, k = 5, size = 600600
user system total real
Niklas1 12.750000 0.040000 12.790000 ( 12.800083)
Niklas2 3.070000 0.010000 3.080000 ( 3.085927)
Marc-André 3.630000 0.010000 3.640000 ( 3.637411)
Cary1 191.200000 0.890000 192.090000 (192.319270)
Cary2 5.290000 0.300000 5.590000 ( 5.625138)
n = 16, k = 2, size = 6435
user system total real
Niklas1 2.120000 0.010000 2.130000 ( 2.131065)
Niklas2 0.090000 0.010000 0.100000 ( 0.092635)
Marc-André 0.080000 0.000000 0.080000 ( 0.076312)
Cary1 0.290000 0.000000 0.290000 ( 0.295062)
Cary2 0.070000 0.000000 0.070000 ( 0.071995)
n = 16, k = 3, size = 1009008
user system total real
Niklas1 62.370000 0.940000 63.310000 ( 63.404404)
Niklas2 8.070000 0.020000 8.090000 ( 8.099837)
Marc-André 14.080000 0.330000 14.410000 ( 14.424437)
Cary1 48.930000 0.220000 49.150000 ( 49.227445)
Cary2 13.540000 0.280000 13.820000 ( 13.856880)
n = 16, k = 4, size = 2627625
user system total real
Niklas2 22.530000 0.290000 22.820000 ( 23.252738)
Marc-André 35.850000 1.160000 37.010000 ( 37.159520)
Cary2 39.850000 0.860000 40.710000 ( 40.780489)
n = 18, k = 2, size = 24310
user system total real
Niklas2 0.280000 0.000000 0.280000 ( 0.288286)
Marc-André 0.240000 0.020000 0.260000 ( 0.262669)
Cary2 0.240000 0.010000 0.250000 ( 0.245008)
n = 18, k = 3, size = 2858856
user system total real
Niklas2 37.150000 2.670000 39.820000 ( 48.484321)
Marc-André 68.010000 1.350000 69.360000 ( 70.249403)
Cary2 48.300000 0.840000 49.140000 ( 49.591279)
n = 20, k = 2, size = 92378
user system total real
Niklas2 1.260000 0.000000 1.260000 ( 1.271094)
Marc-André 1.180000 0.040000 1.220000 ( 1.205478)
Cary2 1.210000 0.010000 1.220000 ( 1.232024)
Interpretation
We appear to have a clear winner. Congratulations, Marc-André! Niklas, no gold medal this time, but silver's not bad.
I will speak to my code and let #Niklas and #Marc-André speak to theirs, as they better understand how their code works. My code appears to work reasonable well when the value of k is small (2 or 3). As that parameter increases, however, my code spends an increasing percentage of time throwing out duplicates (e.g., dismissing [[2,3],[3,4,5],[6,7] when I already "have" [[6,7,[3,4,5][2,3]]. In particular, see the results for n = 13, k = 5. That's in part because I designed the code for arbitrary partitions, not just for those that look like [m, m,...,m, m+1, m+1,...m+1]. I will have a look at whether there is anything I can do to exploit that structure (without drastically changing my code), but I'm not optimistic about the outcome.
Edit
I've updated the benchmarks to include the results for my second solution ("cary2"). It posts solution times that are roughly comparable to #Marc-André's, which is not surprising considering that both algorithms avoid unnecessary enumerations of combinations.
Edit 2
I've updated the benchmarks to include #Niklas' second solution. I've labelled the new one "Niklas2" and the original one "Nicklas1". I also added two tests for n = 18. I tried running n = 20, k = 3 (for "Niklas1", "Marc-André" and "Cary2"), but cancelled it after 5 minutes or so, as it had not obtained an initial solution.
I believe the results speak for themselves.
What's the efficient way to multiply two arrays and get sum of multiply values in Ruby?
I have two arrays in Ruby:
array_A = [1, 2, 1, 4, 5, 3, 2, 6, 5, 8, 9]
array_B = [3, 2, 4, 2, 5, 1, 3, 3, 7, 5, 4]
My aim is to get the sum value of array_A * array_B, i.e., 1*3 + 2*2 + 1*4 + ... + 8*5 + 9*4.
Because I need to calculate them million times in my apps, what's the most efficient way to do such calculations?
It's just like a matrix calcuation: 1* N matrix * N*1 matrix or a vector dot product.
Update
I've just updated benchmarks according to new comments. Following Joshua's comment, the inject method will gain a 25% speedup, see array walking without to_a in the table below.
However since speed is the primary goal for the OP we have a new winner for the contest which reduces runtime from .34 to .22 in my benchmarks.
I still prefer inject method because it's more ruby-ish, but if speed matters then the while loop seems to be the way.
New Answer
You can always benchmark all these answers, I did it for curiosity:
> ./matrix.rb
Rehearsal --------------------------------------------------------------
matrix method 1.500000 0.000000 1.500000 ( 1.510685)
array walking 0.470000 0.010000 0.480000 ( 0.475307)
array walking without to_a 0.340000 0.000000 0.340000 ( 0.337244)
array zip 0.590000 0.000000 0.590000 ( 0.594954)
array zip 2 0.500000 0.000000 0.500000 ( 0.509500)
while loop 0.220000 0.000000 0.220000 ( 0.219851)
----------------------------------------------------- total: 3.630000sec
user system total real
matrix method 1.500000 0.000000 1.500000 ( 1.501340)
array walking 0.480000 0.000000 0.480000 ( 0.480052)
array walking without to_a 0.340000 0.000000 0.340000 ( 0.338614)
array zip 0.610000 0.010000 0.620000 ( 0.625805)
array zip 2 0.510000 0.000000 0.510000 ( 0.506430)
while loop 0.220000 0.000000 0.220000 ( 0.220873)
Simple array walking wins, Matrix method is worse because it includes object instantiation. I think that if you want to beat the inject while method (to beat here means an order of magnitude fastest) you need to implement a C extension and bind it in your ruby program.
Here it's the script I've used
#!/usr/bin/env ruby
require 'benchmark'
require 'matrix'
array_A = [1, 2, 1, 4, 5, 3, 2, 6, 5, 8, 9]
array_B = [3, 2, 4, 2, 5, 1, 3, 3, 7, 5, 4]
def matrix_method a1, a2
(Matrix.row_vector(a1) * Matrix.column_vector(a2)).element(0,0)
end
n = 100000
Benchmark.bmbm do |b|
b.report('matrix method') { n.times { matrix_method(array_A, array_B) } }
b.report('array walking') { n.times { (0...array_A.count).to_a.inject(0) {|r, i| r + array_A[i]*array_B[i]} } }
b.report('array walking without to_a') { n.times { (0...array_A.count).inject(0) {|r, i| r + array_A[i]*array_B[i]} } }
b.report('array zip') { n.times { array_A.zip(array_B).map{|i,j| i*j }.inject(:+) } }
b.report('array zip 2') { n.times { array_A.zip(array_B).inject(0) {|r, (a, b)| r + (a * b)} } }
b.report('while loop') do
n.times do
sum, i, size = 0, 0, array_A.size
while i < size
sum += array_A[i] * array_B[i]
i += 1
end
sum
end
end
end
Walking through each element should be a must
(0...array_A.count).inject(0) {|r, i| r + array_A[i]*array_B[i]}
I would start simple and use the Ruby matrix class:
require 'matrix'
a = Matrix.row_vector( [1, 2, 1, 4, 5, 3, 2, 6, 5, 8, 9])
b = Matrix.column_vector([3, 2, 4, 2, 5, 1, 3, 3, 7, 5, 4])
result= a * b
puts result.element(0,0)
If this turns out to be too slow, then do the exact same method but with an external math library.
This is how I would do it
array_A.zip(array_B).map{|i,j| i*j }.inject(:+)
This is another way:
array_A.zip(array_B).inject(0) {|r, (a, b)| r + (a * b)}
Since speed is our primary criterion, I'm going to submit this method as it's fastest according to Peter's benchmarks.
sum, i, size = 0, 0, a1.size
while i < size
sum += a1[i] * a2[i]
i += 1
end
sum
Try the NMatrix gem. It is a numerical computation library. I think it uses the same C and C++ libraries that Octave and Matlab uses.
You would be able to do the matrix multiplication like this:
require 'nmatrix'
array_A = [1, 2, 1, 4, 5, 3, 2, 6, 5, 8, 9]
array_B = [3, 2, 4, 2, 5, 1, 3, 3, 7, 5, 4]
vec_a = array_A.to_nm([1,array_A.length]) # create an NMatrix
vec_b = array_B.to_nm([1,array_B.length])
sum = vec_a.dot(vec_b.transpose)
I am not sure how the speed will compare using pure Ruby but I imagine it to be faster, especially for large and sparse vectors.
array1.zip(array2).map{|x| x.inject(&:*)}.sum
EDIT: Vector is not fastest (Marc Bollinger is totally right).
Here is the modified code with vector and n-times:
require 'benchmark'
require 'matrix'
array_A = [1, 2, 1, 4, 5, 3, 2, 6, 5, 8, 9]
array_B = [3, 2, 4, 2, 5, 1, 3, 3, 7, 5, 4]
vector_A = Vector[*array_A]
vector_B = Vector[*array_B]
def matrix_method a1, a2
(Matrix.row_vector(a1) * Matrix.column_vector(a2)).element(0,0)
end
def vector_method a1, a2
a1.inner_product(a2)
end
n = 100000
Benchmark.bmbm do |b|
b.report('matrix method') { n.times { matrix_method(array_A, array_B) } }
b.report('array walking') { n.times { (0...array_A.count).to_a.inject(0) {|r, i| r + array_A[i]*array_B[i]} } }
b.report('array walking without to_a') { n.times { (0...array_A.count).inject(0) {|r, i| r + array_A[i]*array_B[i]} } }
b.report('array zip') { n.times { array_A.zip(array_B).map{|i,j| i*j }.inject(:+) } }
b.report('array zip 2') { n.times { array_A.zip(array_B).inject(0) {|r, (a, b)| r + (a * b)} } }
b.report('while loop') do
n.times do
sum, i, size = 0, 0, array_A.size
while i < size
sum += array_A[i] * array_B[i]
i += 1
end
sum
end
end
b.report('vector') { n.times { vector_method(vector_A, vector_B) } }
end
And the results:
Rehearsal --------------------------------------------------------------
matrix method 0.860000 0.010000 0.870000 ( 0.911755)
array walking 0.290000 0.000000 0.290000 ( 0.294779)
array walking without to_a 0.190000 0.000000 0.190000 ( 0.215780)
array zip 0.420000 0.010000 0.430000 ( 0.441830)
array zip 2 0.340000 0.000000 0.340000 ( 0.352058)
while loop 0.080000 0.000000 0.080000 ( 0.085314)
vector 0.310000 0.000000 0.310000 ( 0.325498)
----------------------------------------------------- total: 2.510000sec
user system total real
matrix method 0.870000 0.020000 0.890000 ( 0.952630)
array walking 0.290000 0.000000 0.290000 ( 0.340443)
array walking without to_a 0.220000 0.000000 0.220000 ( 0.240651)
array zip 0.400000 0.010000 0.410000 ( 0.441829)
array zip 2 0.330000 0.000000 0.330000 ( 0.359365)
while loop 0.080000 0.000000 0.080000 ( 0.090099)
vector 0.300000 0.010000 0.310000 ( 0.325903)
------
Too bad. :(