How can I refactor my word frequency method? - ruby

This is my method word_frequency.
def frequencies(text)
words = text.split
the_frequencies = Hash.new(0)
words.each do |word|
the_frequencies[word] += 1
end
return the_frequencies
end
def most_common_words(file_name, stop_words_file_name, number_of_word)
# TODO: return hash of occurences of number_of_word most frequent words
opened_file_string = File.open(file_name.to_s).read.downcase.strip.split.join(" ").gsub(/[^a-zA-Z \'$]/, "").gsub(/'s/, "").split
opened_stop_file_string = File.open(stop_words_file_name.to_s).read.downcase.strip.split.join(" ").gsub(/[^a-zA-Z \']/, "").gsub(/'s/, "").split
# declarar variables de file_name stop words.
filtered_array = opened_file_string.reject { |n| opened_stop_file_string.include? n }
the_frequencies = Hash.new(0)
filtered_array.each do |word|
the_frequencies[word] += 1
end
store = the_frequencies.sort_by { |_key, value| value }.reverse[0..number_of_word - 1].to_h
store
end
Works well, but I think I can do it better. Rubocop says my lines are too long, and I'm agree, but this is my best. Can someone explain how I can do it better?

It would be good if you just decompose the big parts. The most_common_words seems still delicate, you could explain what you're trying to do, to see what can else can be done there.
You could also make use of frequencies, and looking at the pattern within the method arguments, an OOP approach would fit better here.
def join_file(file_name)
File.open(file_name).read.downcase.strip.split.join(' ')
end
def frequencies(text)
text.split.each_with_object(Hash.new(0)) { |word, hash| hash[word] += 1 }
end
def opened_file_string(file_name)
join_file(file_name).gsub(/[^a-zA-Z \'$]/, '').gsub(/'s/, '').split
end
def opened_stop_file_string(file_name)
#opened_stop_file_string ||= join_file(file_name).gsub(/[^a-zA-Z \']/, '').gsub(/'s/, '').split
end
def in_stop_file_string?(file_name, word)
opened_stop_file_string(file_name).include?(word)
end
def filtered_array(file_name, stop_words_file_name)
opened_file_string(file_name).reject do |word|
in_stop_file_string?(stop_words_file_name, word)
end
end
def frequencies_in_filtered_array(file_name, stop_words_file_name)
frequencies(filtered_array(file_name, stop_words_file_name)).sort_by { |_, value| value }
end
def most_common_words(file_name, stop_words_file_name, number_of_word)
frequencies_in_filtered_array(file_name.to_s, stop_words_file_name.to_s).reverse[0...number_of_word].to_h
end

This is a bit cleaner, use multiline method chaining etc.
def frequencies(text)
words = text.split
the_frequencies = Hash.new(0)
words.each do |word|
the_frequencies[word] += 1
end
the_frequencies
end
def pre_process_file(file_name)
File.open(file_name.to_s)
.read.downcase.strip.split.join(" ")
.gsub(/[^a-zA-Z \'$]/, "")
.gsub(/'s/, "")
.split
end
def most_common_words(file_name, stop_words_file_name, number_of_word)
# TODO: return hash of occurences of number_of_word most frequent words
opened_file_string = pre_process_file(file_name)
opened_stop_file_string = pre_process_file(stop_words_file_name)
# declarar variables de file_name stop words.
filtered_array = opened_file_string
.reject { |n| opened_stop_file_string.include? n }
the_frequencies = Hash.new(0)
filtered_array.each { |word| the_frequencies[word] += 1 }
the_frequencies
.sort_by { |_k, value| value }
.reverse[0..number_of_word - 1]
.to_h
end

Related

Is there a way to refactor this without using 3 collectors to combine strings?

I'm trying to refactor the following:
def method_name
array = ["abcdef", "ghijkl", "mnopqr"]
collector1 = ""
collector2 = ""
collector3 = ""
array.each do |string|
collector1 += string[0..1]
collector2 += string[2..3]
collector3 += string[4..5]
end
x = collector1 + "\n" + collector2 + "\n" + collector3
# "abghmn\ncdijop\nefklqr"
end
Are there any more efficient ways to write this? Or perhaps a different enumerable that will achieve the same result?
"abghmn\ncdijop\nefklqr" is the desired output!
Thanks!!
def method_name
array = ["abcdef", "ghijkl", "mnopqr"]
array.map { |s| s.chars.each_slice(2).to_a }.transpose.map(&:join).join("\n")
end
You could write the following.
array = ["abcdef", "ghijkl", "mnopqr"]
ranges = [0..1, 2..3, 4..5]
ranges.map { |r| array.map { |s| s[r] }.join }.join('\n')
#=> "abghmn\\ncdijop\\nefklqr"
or
ranges.map { |r| array.reduce('') { |t,s| t + s[r] } }.join('\n')
#=> "abghmn\\ncdijop\\nefklqr"

Word Count returns an array (of arrays of the form [word, count]) representing the frequency of each word

str = 'put returns between paragraph put returns between paragraph put returns between paragraph'
def word_count(string)
resut= []
return result = string.split.inject(Hash.new(0)) { |h,v| h[v] += 1; h }
end
def parse_word(word)
word.gsub!(/[^a-zA-Z0-9]/, " ")
word.downcase!
#yoo= word
end
result =word_count(str)
print result, "\n\n"
res2 = result.select { |pair| pair[1] > 1 } `#Error coming`
I am getting OutPut
**
OutPut
**
{"put"=>3, "returns"=>3, "between"=>3, "paragraph"=>3}
I need OutPut Like this
**
OutPut
**
{"put"=>3, "returns"=>3, "between"=>3, "paragraph"=>3}
and
put: 3
returns: 3
between: 3
but the main problem is that he gave us the code to do that but i cant able to understand it
I am not getting this what this code will do can anyone help me ...And modify it so it can work
The following processes the first paragraph of put returns ... Note that ss is an array of those words that occur at least twice in this paragraph.
nect = ss.select { |p| p[1] > 1 }
nect .sort.each do |key, count|
puts "#{key}: #{count}"
end
module WordCount
def self.word_count(s)
count_frequency(words_from_string(s))
end
def self.word_count_from_file(filename)
s = File.open(filename) { |file| file.read }
word_count(s)
end
def self.words_from_string(s)
s.downcase.scan(/[\w']+/)
end
def self.count_frequency(words)
counts = Hash.new(0)
for word in words
counts[word] += 1
end
# counts.to_a.sort {|a,b| b[1] <=> a[1]}
# sort by decreasing count, then lexicographically
counts.to_a.sort do |a,b|
[b[1],a[0]] <=> [a[1],b[0]]
end
end
end
def word_count(s)
WordCount.word_count(s)
end

Reverse words of a string in Ruby?

I am trying to reverse the words of a string in Ruby, without using the reverse method. I want to implement the known algorithm of:
Reverse the whole string
Reverse each word in the reversed string.
Here is what I have come up with:
class String
def custom_reverse(start, limit)
i_start = start
i_end = limit - 1
while (i_start <= i_end)
tmp = self[i_start]
self[i_start] = self[i_end]
self[i_end] = tmp
i_start += 1
i_end -= 1
end
return self
end
def custom_reverse_words
self.custom_reverse(0, self.size)
i_start = 0
i_end = 0
while (i_end <= self.length)
if (i_end == self.length || self[i_end] == ' ')
self.custom_reverse(i_start, i_end)
i_start += 1
end
i_end += 1
end
end
end
test_str = "hello there how are you"
p test_str.custom_reverse_words
But the results are "yahthello ow ou er ereh"
What am I missing?
The gist of any reverse operation is to iterate over elements in the reverse order of what you'd normally do. That is, where you'd usually use the set (0..N-1) you'd instead go through (N-1..0) or more specifically N-1-i where i is 0..N-1:
class String
def reverse_words
split(/\s+/).map{|w|wl=w.length-1;(0..wl).map{|i|w[wl-i]}.join}.join(' ')
end
end
puts "this is reverse test".reverse_words.inspect
# => "siht si esrever tset"
The same principle can be applied to the words in a given string.
Interview questions of this sort are of highly dubious value. Being "clever" in production code is usually a Very Bad Idea.
Here's one way to reverse an array without using the built-in reverse:
class Array
def reverse
tmp_ary = self.dup
ret_ary = []
self.size.times do
ret_ary << tmp_ary.pop
end
ret_ary
end
end
%w[a b c].reverse # => ["c", "b", "a"]
tmp_ary.pop is the secret. pop removes elements from the end of the array.
The cleanest solution I could think of is:
class Array
def my_reverse
sort_by.with_index {|_, i| -i}
end
end
class String
def words
split(/\W+/)
end
def revert_words
words.my_reverse.join(' ')
end
def revert_each_word
words.map {|w| w.chars.my_reverse.join}.join(' ')
end
end
Once you define a simple and efficient array reverser:
def reverse_array(a)
(a.length / 2).times {|i| a[i],a[-(i+1)] = a[-(i+1)],a[i]}
a
end
You can reverse a sentence pretty straightforwardly:
def reverse_sentence(s)
reverse_array(s.split('')).join.split(" ").map{|w| reverse_array(w.split('')).join}.join(" ")
end
reverse_sentence "Howdy pardner" # => "pardner Howdy"
Here's another way:
class String
def reverse_words
split.inject([]){|str, word| str.unshift word}.join(' ')
end
def reverse_chars
each_char.inject([]){|str, char| str.unshift char}.join('')
end
end
Revised
Carey raises a good point, reverse_chars can be simplified, since string is already an Enumerable:
class String
def reverse_chars
each_char.inject(""){|str, char| str.insert(0, char) }
end
end

How to get words frequency in efficient way with ruby?

Sample input:
"I was 09809 home -- Yes! yes! You was"
and output:
{ 'yes' => 2, 'was' => 2, 'i' => 1, 'home' => 1, 'you' => 1 }
My code that does not work:
def get_words_f(myStr)
myStr=myStr.downcase.scan(/\w/).to_s;
h = Hash.new(0)
myStr.split.each do |w|
h[w] += 1
end
return h.to_a;
end
print get_words_f('I was 09809 home -- Yes! yes! You was');
This works but I am kinda new to Ruby too. There might be a better solution.
def count_words(string)
words = string.split(' ')
frequency = Hash.new(0)
words.each { |word| frequency[word.downcase] += 1 }
return frequency
end
Instead of .split(' '), you could also do .scan(/\w+/); however, .scan(/\w+/) would separate aren and t in "aren't", while .split(' ') won't.
Output of your example code:
print count_words('I was 09809 home -- Yes! yes! You was');
#{"i"=>1, "was"=>2, "09809"=>1, "home"=>1, "yes"=>2, "you"=>1}
def count_words(string)
string.scan(/\w+/).reduce(Hash.new(0)){|res,w| res[w.downcase]+=1;res}
end
Second variant:
def count_words(string)
string.scan(/\w+/).each_with_object(Hash.new(0)){|w,h| h[w.downcase]+=1}
end
def count_words(string)
Hash[
string.scan(/[a-zA-Z]+/)
.group_by{|word| word.downcase}
.map{|word, words|[word, words.size]}
]
end
puts count_words 'I was 09809 home -- Yes! yes! You was'
This code will ask you for input and then find the word frequency for you:
puts "enter some text man"
text = gets.chomp
words = text.split(" ")
frequencies = Hash.new(0)
words.each { |word| frequencies[word.downcase] += 1 }
frequencies = frequencies.sort_by {|a, b| b}
frequencies.reverse!
frequencies.each do |word, frequency|
puts word + " " + frequency.to_s
end
This works, and ignores the numbers:
def get_words(my_str)
my_str = my_str.scan(/\w+/)
h = Hash.new(0)
my_str.each do |s|
s = s.downcase
if s !~ /^[0-9]*\.?[0-9]+$/
h[s] += 1
end
end
return h
end
print get_words('I was there 1000 !')
puts '\n'
You can look at my code that splits the text into words. The basic code would look as follows:
sentence = "Ala ma kota za 5zł i 10$."
splitter = SRX::Polish::WordSplitter.new(sentence)
histogram = Hash.new(0)
splitter.each do |word,type|
histogram[word.downcase] += 1 if type == :word
end
p histogram
You should be careful if you wish to work with languages other than English, since in Ruby 1.9 the downcase won't work as you expected for letters such as 'Ł'.
class String
def frequency
self.scan(/[a-zA-Z]+/).each.with_object(Hash.new(0)) do |word, hash|
hash[word.downcase] += 1
end
end
end
puts "I was 09809 home -- Yes! yes! You was".frequency

Recursively merge multidimensional arrays, hashes and symbols

I need a chunk of Ruby code to combine an array of contents like such:
[{:dim_location=>[{:dim_city=>:dim_state}]},
:dim_marital_status,
{:dim_location=>[:dim_zip, :dim_business]}]
into:
[{:dim_location => [:dim_business, {:dim_city=>:dim_state}, :dim_zip]},
:dim_marital_status]
It needs to support an arbitrary level of depth, though the depth will rarely be beyond 8 levels deep.
Revised after comment:
source = [{:dim_location=>[{:dim_city=>:dim_state}]}, :dim_marital_status, {:dim_location=>[:dim_zip, :dim_business]}]
expected = [{:dim_location => [:dim_business, {:dim_city=>:dim_state}, :dim_zip]}, :dim_marital_status]
source2 = [{:dim_location=>{:dim_city=>:dim_state}}, {:dim_location=>:dim_city}]
def merge_dim_locations(array)
return array unless array.is_a?(Array)
values = array.dup
dim_locations = values.select {|x| x.is_a?(Hash) && x.has_key?(:dim_location)}
old_index = values.index(dim_locations[0]) unless dim_locations.empty?
merged = dim_locations.inject({}) do |memo, obj|
values.delete(obj)
x = merge_dim_locations(obj[:dim_location])
if x.is_a?(Array)
memo[:dim_location] = (memo[:dim_location] || []) + x
else
memo[:dim_location] ||= []
memo[:dim_location] << x
end
memo
end
unless merged.empty?
values.insert(old_index, merged)
end
values
end
puts "source1:"
puts source.inspect
puts "result1:"
puts merge_dim_locations(source).inspect
puts "expected1:"
puts expected.inspect
puts "\nsource2:"
puts source2.inspect
puts "result2:"
puts merge_dim_locations(source2).inspect
I don't think there's enough detail in your question to give you a complete answer, but this might get you started:
class Hash
def recursive_merge!(other)
other.keys.each do |k|
if self[k].is_a?(Array) && other[k].is_a?(Array)
self[k] += other[k]
elsif self[k].is_a?(Hash) && other[k].is_a?(Hash)
self[k].recursive_merge!(other[k])
else
self[k] = other[k]
end
end
self
end
end

Resources