Fill Hash with nil values if no values given - ruby

I have these arrays:
positions = [[0, 1, 2], [2, 3]]
values = [[15, 15, 15], [7, 7]]
keys = [1, 4]
I need to create a hash whose keys are from keys and the values are from values. Values must be at indices defined in positions. If no index is defined,nil` should be added to that index.
The three arrays contain the same number of elements; keys has two elements, values two, and positions two. So it's ok.
Expected output:
hash = {1=>[15, 15, 15, nil], 4=>[nil, nil, 7, 7]}

Let the zippery begin 🤐 (answer to the original question):
row_size = positions.flatten.max.next
rows = positions.zip(values).map do |row_positions, row_values|
row = Array.new(row_size)
row_positions.zip(row_values).each_with_object(row) do |(position, value), row|
row[position] = value
end
end
keys.zip(rows).to_h # => {1=>[15, 15, 15, nil], 4=>[nil, nil, 7, 7]}

Just out of curiosity:
nils = (0..positions.flatten.max).zip([nil]).to_h
keys.zip(positions, values).group_by(&:shift).map do |k, v|
[k, nils.merge(v.shift.reduce(&:zip).to_h).values]
end.to_h
#⇒ {1=>[15, 15, 15, nil], 4=>[nil, nil, 7, 7]}

Not the cleanest.. but works :P
max = positions.flatten.max + 1
pv = positions.zip(values).map { |o| o.transpose.to_h }
h = {}
pv.each_with_index do |v, idx|
h[keys[idx]] = Array.new(max).map.with_index { |_, i| v[i] }
end
# h
# {1=>[15, 15, 15, nil], 4=>[nil, nil, 7, 7]}
or if you prefer a more compressed but less readable one..
keys.zip(positions.zip(values).map { |o| o.transpose.to_h }).reduce({}) do |h, (k, v)|
h[k] = Array.new(max).map.with_index { |_, i| v[i] }
h
end

new_hash = {}
keys.each_with_index do |key, index|
new_hash[key] = Array.new(positions.flatten.max + 1)
value_array = values[index]
position_array = positions[index]
position_array.each_with_index.map { |element, i| new_hash[key][element] = value_array[i]}
end
new_hash
I hope this will work.

Related

How to use collect and include for multidimensional array

I have:
array1 = [[1,2,3,4,5],[7,8,9,10],[11,12,13,14]]
#student_ids = [1,2,3]
I want to replace elements in array1 that are included in #student_ids with 'X'. I want to see:
[['X','X','X',4,5],[7,8,9,10],[11,12,13,14]]
I have code that is intended to do this:
array1.collect! do |i|
if i.include?(#student_ids) #
i[i.index(#student_ids)] = 'X'; i # I want to replace all with X
else
i
end
end
If #student_ids is 1, then it works, but if #student_ids has more than one element such as 1,2,3, it raises errors. Any help?
It's faster to use a hash or a set than to repeatedly test [1,2,3].include?(n).
arr = [[1,2,3,4,5],[7,8,9,10],[11,12,13,14]]
ids = [1,2,3]
Use a hash
h = ids.product(["X"]).to_h
#=> {1=>"X", 2=>"X", 3=>"X"}
arr.map { |a| a.map { |n| h.fetch(n, n) } }
#=> [["X", "X", "X", 4, 5], [7, 8, 9, 10], [11, 12, 13, 14]]
See Hash#fetch.
Use a set
require 'set'
ids = ids.to_set
#=> #<Set: {1, 2, 3}>
arr.map { |a| a.map { |n| ids.include?(n) ? "X" : n } }
#=> [["X", "X", "X", 4, 5], [7, 8, 9, 10], [11, 12, 13, 14]]
Replace both maps with map! if the array is to be modified in place (mutated).
Try following, (taking #student_ids = [1, 2, 3])
array1.inject([]) { |m,a| m << a.map { |x| #student_ids.include?(x) ? 'X' : x } }
# => [["X", "X", "X", 4, 5], [7, 8, 9, 10], [11, 12, 13, 14]]
You can use each_with_index and replace the item you want:
array1 = [[1,2,3,4,5],[7,8,9,10],[11,12,13,14]]
#student_ids = [1,2,3]
array1.each_with_index do |sub_array, index|
sub_array.each_with_index do |item, index2|
array1[index][index2] = 'X' if #student_ids.include?(item)
end
end
You can do the following:
def remove_student_ids(arr)
arr.each_with_index do |value, index|
arr[index] = 'X' if #student_ids.include?(value) }
end
end
array1.map{ |sub_arr| remove_student_ids(sub_arr)}

How to merge hash of hashes and set default value if value don't exists

I need to merge values of hash a into out with sort keys in a.
a = {"X"=>{12=>1, 11=>4}, "Y"=>{11=>5}, "Z"=>{12=>5}}
out = [
{"X": [4, 1]},
{"Y": [5, 0]},
{"Z": [0, 5]},
]
I would do something like this:
a = {"X"=>{12=>1, 11=>4}, "Y"=>{11=>5}, "Z"=>{12=>5}}
sorted_keys = a.values.flat_map(&:keys).uniq.sort
#=> [11, 12]
a.map { |k, v| { k => v.values_at(*sorted_keys).map(&:to_i) } }
#=> [ { "X" => [4, 1] }, { "Y" => [5, 0] }, { "Z" => [0, 5] }]
Code
def modify_values(g)
sorted_keys = g.reduce([]) {|arr,(_,v)| arr | v.keys}.sort
g.each_with_object({}) {|(k,v),h| h[k] = Hash.new(0).merge(v).values_at(*sorted_keys)}
end
Example
g = {"X"=>{12=>1, 11=>4}, "Y"=>{11=>5}, "Z"=>{12=>5}}
modify_values(g)
#=> {"X"=>[4, 1], "Y"=>[5, 0], "Z"=>[0, 5]}
Explanation
The steps are as follows (for the hash a in the example). First obtain an array of the unique keys from g's values (see Enumerable#reduce and Array#|), then sort that array.
b = a.reduce([]) {|arr,(_,v)| arr | v.keys}
#=> [12, 11]
sorted_keys = b.sort
#=> [11, 12]
The first key-value pair of a, together with an empty hash, is passed to each_with_object's block. The block variables are computed using parallel assignment:
(k,v),h = [["X", {12=>1, 11=>4}], {}]
k #=> "X"
v #=> {12=>1, 11=>4}
h #=> {}
The block calculation is then performed. First an empty hash with a default value 0 is created:
f = Hash.new(0)
#=> {}
The hash v is then merged into f. The result is hash with the same key-value pairs as v but with a default value of 0. The significance of the default value is that if f does not have a key k, f[k] returns the default value. See Hash::new.
g = f.merge(v)
#=> {12=>1, 11=>4}
g.default
#=> 0 (yup)
Then extract the values corresponding to sorted_keys:
h[k] = g.values_at(*sorted_keys)
#=> {12=>1, 11=>4}.values_at(11, 12)
#=> [4, 1]
When a's next key-value pair is passed to the block, the calculations are as follows.
(k,v),h = [["Y", {11=>5}], {"X"=>[4, 1]}] # Note `h` has been updated
k #=> "Y"
v #=> {11=>5}
h #=> {"X"=>[4, 1]}
f = Hash.new(0)
#=> {}
g = f.merge(v)
#=> {11=>5}
h[k] = g.values_at(*sorted_keys)
#=> {11=>5}.values_at(11, 12)
#=> [5, 0] (Note h[12] equals h's default value)
and now
h #=> {"X"=>[4, 1], "Y"=>[5, 0]}
The calculation for the third key-value pair of a is similar.

Ruby: How to find the most frequent substring of length n? [duplicate]

I have this program with a class DNA. The program counts the most frequent k-mer in a string. So, it is looking for the most common substring in a string with a length of k.
An example would be creating a dna1 object with a string of AACCAATCCG. The count k-mer method will look for a subtring with a length of k and output the most common answer. So, if we set k = 1 then 'A' and 'C' will be the most occurrence in the string because it appears four times. See example below:
dna1 = DNA.new('AACCAATCCG')
=> AACCAATCCG
>> dna1.count_kmer(1)
=> [#<Set: {"A", "C"}>, 4]
>> dna1.count_kmer(2)
=> [#<Set: {"AA", "CC"}>, 2]
Here is my DNA class :
class DNA
def initialize (nucleotide)
#nucleotide = nucleotide
end
def length
#nucleotide.length
end
protected
attr_reader :nucleotide
end
Here is my count kmer method that I am trying to implement:
# I have k as my only parameter because I want to pass the nucleotide string in the method
def count_kmer(k)
# I created an array as it seems like a good way to split up the nucleotide string.
counts = []
#this tries to count how many kmers of length k there are
num_kmers = self.nucleotide.length- k + 1
#this should try and look over the kmer start positions
for i in num_kmers
#Slice the string, so that way we can get the kmer
kmer = self.nucleotide.split('')
end
#add kmer if its not present
if !kmer = counts
counts[kmer] = 0
#increment the count for kmer
counts[kmer] +=1
end
#return the final count
return counts
end
#end dna class
end
I'm not sure where my method went wrong.
Something like this?
require 'set'
def count_kmer(k)
max_kmers = kmers(k)
.each_with_object(Hash.new(0)) { |value, count| count[value] += 1 }
.group_by { |_,v| v }
.max
[Set.new(max_kmers[1].map { |e| e[0] }), max_kmers[0]]
end
def kmers(k)
nucleotide.chars.each_cons(k).map(&:join)
end
EDIT: Here's the full text of the class:
require 'set'
class DNA
def initialize (nucleotide)
#nucleotide = nucleotide
end
def length
#nucleotide.length
end
def count_kmer(k)
max_kmers = kmers(k)
.each_with_object(Hash.new(0)) { |value, count| count[value] += 1 }
.group_by { |_,v| v }
.max
[Set.new(max_kmers[1].map { |e| e[0] }), max_kmers[0]]
end
def kmers(k)
nucleotide.chars.each_cons(k).map(&:join)
end
protected
attr_reader :nucleotide
end
This produces the following output, using Ruby 2.2.1, using the class and method you specified:
>> dna1 = DNA.new('AACCAATCCG')
=> #<DNA:0x007fe15205bc30 #nucleotide="AACCAATCCG">
>> dna1.count_kmer(1)
=> [#<Set: {"A", "C"}>, 4]
>> dna1.count_kmer(2)
=> [#<Set: {"AA", "CC"}>, 2]
As a bonus, you can also do:
>> dna1.kmers(2)
=> ["AA", "AC", "CC", "CA", "AA", "AT", "TC", "CC", "CG"]
Code
def most_frequent_substrings(str, k)
(0..str.size-k).each_with_object({}) do |i,h|
b = []
str[i..-1].scan(Regexp.new str[i,k]) { b << Regexp.last_match.begin(0) + i }
(h[b.size] ||= []) << b
end.max_by(&:first).last.each_with_object({}) { |a,h| h[str[a.first,k]] = a }
end
Example
str = "ABBABABBABCATSABBABB"
most_frequent_substrings(str, 4)
#=> {"ABBA"=>[0, 5, 14], "BBAB"=>[1, 6, 15]}
This shows that the most frequently-occurring 4-character substring of strappears 3 times. There are two such substrings: "ABBA" and "BBAB". "ABBA" begins at offsets (into str) 0, 5 and 14, "BBAB" substrings begin at offsets 1, 6 and 15.
Explanation
For the example above the steps are as follows.
k = 4
n = str.size - k
#=> 20 - 4 => 16
e = (0..n).each_with_object([])
#<Enumerator: 0..16:each_with_object([])>
We can see the values that will be generated by this enumerator by converting it to an array.
e.to_a
#=> [[0, []], [1, []], [2, []], [3, []], [4, []], [5, []], [6, []], [7, []], [8, []],
# [9, []], [10, []], [11, []], [12, []], [13, []], [14, []], [15, []], [16, []]]
Note the empty array contained in each element will be modified as the array is built. Continuing, the first element of e is passed to the block and the block variables are assigned using parallel assignment:
i,a = e.next
#=> [0, []]
i #=> 0
a #=> []
We are now considering the substring of size 4 that begins at str offset i #=> 0, which is seen to be "ABBA". Now the block calculation is performed.
b = []
r = Regexp.new str[i,k]
#=> Regexp.new str[0,4]
#=> Regexp.new "ABBA"
#=> /ABAB/
str[i..-1].scan(r) { b << Regexp.last_match.begin(0) + i }
#=> "ABBABABBABCATSABBABB".scan(r) { b << Regexp.last_match.begin(0) + i }
b #=> [0, 5, 14]
We next have
(h[b.size] ||= []) << b
which becomes
(h[b.size] = h[b.size] || []) << b
#=> (h[3] = h[3] || []) << [0, 5, 14]
Since h has no key 3, h[3] on the right side equals nil. Continuing,
#=> (h[3] = nil || []) << [0, 5, 14]
#=> (h[3] = []) << [0, 5, 14]
h #=> { 3=>[[0, 5, 14]] }
Notice that we throw away scan's return value. All we need is b
This tells us the "ABBA" appears thrice in str, beginning at offsets 0, 5 and 14.
Now observe
e.to_a
#=> [[0, [[0, 5, 14]]], [1, [[0, 5, 14]]], [2, [[0, 5, 14]]],
# ...
# [16, [[0, 5, 14]]]]
After all elements of e have been passed to the block, the block returns
h #=> {3=>[[0, 5, 14], [1, 6, 15]],
# 1=>[[2], [3], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16]],
# 2=>[[4, 16], [5, 14], [6, 15]]}
Consider substrings that appear just once: h[1]. One of those is [2]. This pertains to the 4-character substring beginning at str offset 2:
str[2,4]
#=> "BABA"
That is found to be the only instance of that substring. Similarly, among the substrings that appear twice is str[4,4] = str[16,4] #=> "BABB", given by h[2][0] #=> [4, 16].
Next we determine the greatest frequency of a substring of length 4:
c = h.max_by(&:first)
#=> [3, [[0, 5, 14], [1, 6, 15]]]
(which could also be written c = h.max_by { |k,_| k }).
d = c.last
#=> [[0, 5, 14], [1, 6, 15]]
For convenience, convert d to a hash:
d.each_with_object({}) { |a,h| h[str[a.first,k]] = a }
#=> {"ABBA"=>[0, 5, 14], "BBAB"=>[1, 6, 15]}
and return that hash from the method.
There is one detail that deserves mention. It is possible that d will contain two or more arrays that reference the same substring, in which case the value of the associated key (the substring) will equal the last of those arrays. Here's a simple example.
str = "AAA"
k = 2
In this case the array d above will equal
d = [[0], [1]]
Both of these reference str[0,2] #=> str[1,2] #=> "AA". In building the hash the first is overwritten by the second:
d.each_with_object({}) { |a,h| h[str[a.first,k]] = a }
#=> {"AA"=>[1]}

How to write a method that counts the most common substring in a string in ruby?

I have this program with a class DNA. The program counts the most frequent k-mer in a string. So, it is looking for the most common substring in a string with a length of k.
An example would be creating a dna1 object with a string of AACCAATCCG. The count k-mer method will look for a subtring with a length of k and output the most common answer. So, if we set k = 1 then 'A' and 'C' will be the most occurrence in the string because it appears four times. See example below:
dna1 = DNA.new('AACCAATCCG')
=> AACCAATCCG
>> dna1.count_kmer(1)
=> [#<Set: {"A", "C"}>, 4]
>> dna1.count_kmer(2)
=> [#<Set: {"AA", "CC"}>, 2]
Here is my DNA class :
class DNA
def initialize (nucleotide)
#nucleotide = nucleotide
end
def length
#nucleotide.length
end
protected
attr_reader :nucleotide
end
Here is my count kmer method that I am trying to implement:
# I have k as my only parameter because I want to pass the nucleotide string in the method
def count_kmer(k)
# I created an array as it seems like a good way to split up the nucleotide string.
counts = []
#this tries to count how many kmers of length k there are
num_kmers = self.nucleotide.length- k + 1
#this should try and look over the kmer start positions
for i in num_kmers
#Slice the string, so that way we can get the kmer
kmer = self.nucleotide.split('')
end
#add kmer if its not present
if !kmer = counts
counts[kmer] = 0
#increment the count for kmer
counts[kmer] +=1
end
#return the final count
return counts
end
#end dna class
end
I'm not sure where my method went wrong.
Something like this?
require 'set'
def count_kmer(k)
max_kmers = kmers(k)
.each_with_object(Hash.new(0)) { |value, count| count[value] += 1 }
.group_by { |_,v| v }
.max
[Set.new(max_kmers[1].map { |e| e[0] }), max_kmers[0]]
end
def kmers(k)
nucleotide.chars.each_cons(k).map(&:join)
end
EDIT: Here's the full text of the class:
require 'set'
class DNA
def initialize (nucleotide)
#nucleotide = nucleotide
end
def length
#nucleotide.length
end
def count_kmer(k)
max_kmers = kmers(k)
.each_with_object(Hash.new(0)) { |value, count| count[value] += 1 }
.group_by { |_,v| v }
.max
[Set.new(max_kmers[1].map { |e| e[0] }), max_kmers[0]]
end
def kmers(k)
nucleotide.chars.each_cons(k).map(&:join)
end
protected
attr_reader :nucleotide
end
This produces the following output, using Ruby 2.2.1, using the class and method you specified:
>> dna1 = DNA.new('AACCAATCCG')
=> #<DNA:0x007fe15205bc30 #nucleotide="AACCAATCCG">
>> dna1.count_kmer(1)
=> [#<Set: {"A", "C"}>, 4]
>> dna1.count_kmer(2)
=> [#<Set: {"AA", "CC"}>, 2]
As a bonus, you can also do:
>> dna1.kmers(2)
=> ["AA", "AC", "CC", "CA", "AA", "AT", "TC", "CC", "CG"]
Code
def most_frequent_substrings(str, k)
(0..str.size-k).each_with_object({}) do |i,h|
b = []
str[i..-1].scan(Regexp.new str[i,k]) { b << Regexp.last_match.begin(0) + i }
(h[b.size] ||= []) << b
end.max_by(&:first).last.each_with_object({}) { |a,h| h[str[a.first,k]] = a }
end
Example
str = "ABBABABBABCATSABBABB"
most_frequent_substrings(str, 4)
#=> {"ABBA"=>[0, 5, 14], "BBAB"=>[1, 6, 15]}
This shows that the most frequently-occurring 4-character substring of strappears 3 times. There are two such substrings: "ABBA" and "BBAB". "ABBA" begins at offsets (into str) 0, 5 and 14, "BBAB" substrings begin at offsets 1, 6 and 15.
Explanation
For the example above the steps are as follows.
k = 4
n = str.size - k
#=> 20 - 4 => 16
e = (0..n).each_with_object([])
#<Enumerator: 0..16:each_with_object([])>
We can see the values that will be generated by this enumerator by converting it to an array.
e.to_a
#=> [[0, []], [1, []], [2, []], [3, []], [4, []], [5, []], [6, []], [7, []], [8, []],
# [9, []], [10, []], [11, []], [12, []], [13, []], [14, []], [15, []], [16, []]]
Note the empty array contained in each element will be modified as the array is built. Continuing, the first element of e is passed to the block and the block variables are assigned using parallel assignment:
i,a = e.next
#=> [0, []]
i #=> 0
a #=> []
We are now considering the substring of size 4 that begins at str offset i #=> 0, which is seen to be "ABBA". Now the block calculation is performed.
b = []
r = Regexp.new str[i,k]
#=> Regexp.new str[0,4]
#=> Regexp.new "ABBA"
#=> /ABAB/
str[i..-1].scan(r) { b << Regexp.last_match.begin(0) + i }
#=> "ABBABABBABCATSABBABB".scan(r) { b << Regexp.last_match.begin(0) + i }
b #=> [0, 5, 14]
We next have
(h[b.size] ||= []) << b
which becomes
(h[b.size] = h[b.size] || []) << b
#=> (h[3] = h[3] || []) << [0, 5, 14]
Since h has no key 3, h[3] on the right side equals nil. Continuing,
#=> (h[3] = nil || []) << [0, 5, 14]
#=> (h[3] = []) << [0, 5, 14]
h #=> { 3=>[[0, 5, 14]] }
Notice that we throw away scan's return value. All we need is b
This tells us the "ABBA" appears thrice in str, beginning at offsets 0, 5 and 14.
Now observe
e.to_a
#=> [[0, [[0, 5, 14]]], [1, [[0, 5, 14]]], [2, [[0, 5, 14]]],
# ...
# [16, [[0, 5, 14]]]]
After all elements of e have been passed to the block, the block returns
h #=> {3=>[[0, 5, 14], [1, 6, 15]],
# 1=>[[2], [3], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16]],
# 2=>[[4, 16], [5, 14], [6, 15]]}
Consider substrings that appear just once: h[1]. One of those is [2]. This pertains to the 4-character substring beginning at str offset 2:
str[2,4]
#=> "BABA"
That is found to be the only instance of that substring. Similarly, among the substrings that appear twice is str[4,4] = str[16,4] #=> "BABB", given by h[2][0] #=> [4, 16].
Next we determine the greatest frequency of a substring of length 4:
c = h.max_by(&:first)
#=> [3, [[0, 5, 14], [1, 6, 15]]]
(which could also be written c = h.max_by { |k,_| k }).
d = c.last
#=> [[0, 5, 14], [1, 6, 15]]
For convenience, convert d to a hash:
d.each_with_object({}) { |a,h| h[str[a.first,k]] = a }
#=> {"ABBA"=>[0, 5, 14], "BBAB"=>[1, 6, 15]}
and return that hash from the method.
There is one detail that deserves mention. It is possible that d will contain two or more arrays that reference the same substring, in which case the value of the associated key (the substring) will equal the last of those arrays. Here's a simple example.
str = "AAA"
k = 2
In this case the array d above will equal
d = [[0], [1]]
Both of these reference str[0,2] #=> str[1,2] #=> "AA". In building the hash the first is overwritten by the second:
d.each_with_object({}) { |a,h| h[str[a.first,k]] = a }
#=> {"AA"=>[1]}

How to quickly print Ruby hashes in a table format?

Is there a way to quickly print a ruby hash in a table format into a file?
Such as:
keyA keyB keyC ...
123 234 345
125 347
4456
...
where the values of the hash are arrays of different sizes. Or is using a double loop the only way?
Thanks
Try this gem I wrote (prints hashes, ruby objects, ActiveRecord objects in tables): http://github.com/arches/table_print
Here's a version of steenslag's that works when the arrays aren't the same size:
size = h.values.max_by { |a| a.length }.length
m = h.values.map { |a| a += [nil] * (size - a.length) }.transpose.insert(0, h.keys)
nil seems like a reasonable placeholder for missing values but you can, of course, use whatever makes sense.
For example:
>> h = {:a => [1, 2, 3], :b => [4, 5, 6, 7, 8], :c => [9]}
>> size = h.values.max_by { |a| a.length }.length
>> m = h.values.map { |a| a += [nil] * (size - a.length) }.transpose.insert(0, h.keys)
=> [[:a, :b, :c], [1, 4, 9], [2, 5, nil], [3, 6, nil], [nil, 7, nil], [nil, 8, nil]]
>> m.each { |r| puts r.map { |x| x.nil?? '' : x }.inspect }
[:a, :b, :c]
[ 1, 4, 9]
[ 2, 5, ""]
[ 3, 6, ""]
["", 7, ""]
["", 8, ""]
h = {:a => [1, 2, 3], :b => [4, 5, 6], :c => [7, 8, 9]}
p h.values.transpose.insert(0, h.keys)
# [[:a, :b, :c], [1, 4, 7], [2, 5, 8], [3, 6, 9]]
No, there's no built-in function. Here's a code that would format it as you want it:
data = { :keyA => [123, 125, 4456], :keyB => [234000], :keyC => [345, 347] }
length = data.values.max_by{ |v| v.length }.length
widths = {}
data.keys.each do |key|
widths[key] = 5 # minimum column width
# longest string len of values
val_len = data[key].max_by{ |v| v.to_s.length }.to_s.length
widths[key] = (val_len > widths[key]) ? val_len : widths[key]
# length of key
widths[key] = (key.to_s.length > widths[key]) ? key.to_s.length : widths[key]
end
result = ""
data.keys.each {|key| result += key.to_s.ljust(widths[key]) + " " }
result += "\n"
for i in 0.upto(length)
data.keys.each { |key| result += data[key][i].to_s.ljust(widths[key]) + " " }
result += "\n"
end
# TODO write result to file...
Any comments and edits to refine the answer are very welcome.

Resources