ruby: find item with most occurrences in array, if there is - arrays

With this code I can find most occurrences of items in an array:
letters.max_by { |i| letters.count(i) }
But this will return 2 for
a = [1, 2, 2, 3, 3]
although 3 has the same occurrence. How can I find out, if there really is an item with most occurrences? I would like to get false if there is no single champion.

This is pretty ugly and in need of refinement, but:
def champion(array)
grouped = array.group_by(&:itself).values.group_by(&:length)
best = grouped[grouped.keys.max]
if (best.length == 1)
best[0][0]
else
false
end
end
I'm not sure there's an easy single-shot solution for this, at least not one that's not O(n^2) or worse, which is unusual.

I guess you could do this if you don't care about performance:
def max_occurrences(arr)
arr.sort.max_by { |v| arr.count(v) } != arr.sort.reverse.max_by { |v| arr.count(v) } ? false : arr.max_by { |v| arr.count(v) }
end

I would do something like this:
def max_occurrences(arr)
counts = Hash.new { |h, k| h[k] = 0 }
grouped_by_count = Hash.new { |h, k| h[k] = [] }
arr.each { |el| counts[el] += 1 } # O(n)
counts.each { |el, count| grouped_by_count[count] << el } # O(n)
max = grouped_by_count.sort { |x, y| y[0] <=> x[0] }.first[1] # O(n log n)
max.length == 1 ? max[0] : false
end
It's no snazzy one-liner, but it's readable and runs in less than O(n log n).

a = [1, 2, 2, 3, 3]
occurrences = a.inject(Hash.new(0)){ |h, el| h[el] += 1; h } # => {1=>1, 2=>2, 3=>2}
max_occurences = occurrences.max_by{ |_, v| v } # => [2, 2]
max_occurences.count > 1 ? false : occurrences.key(max_occurences.first)

Related

Recursively setting hash keys from an array of keys

I want a function that can take an array like [:a, :b, :c] and recursively set hash keys, creating what it needs as it goes.
hash = {}
hash_setter(hash, [:a, :b, :c], 'value')
hash #=> {:a => {:b => {:c => 'value' } } }
hash_setter(hash, [:a, :b, :h], 'value2')
hash #=> {:a => {:b => {:c => 'value', :h => 'value2' } } }
I'm aware that Ruby 2.3's dig can be used for getting in this way, though that doesnt quite get you to an answer. If there was a setter equivalent of dig that'd be what I'm looking for.
Code
def nested_hash(keys, v, h={})
return subhash(keys, v) if h.empty?
return h.merge(subhash(keys, v)) if keys.size == 1
keys[0..-2].reduce(h) { |g,k| g[k] }.update(keys[-1]=>v)
h
end
def subhash(keys, v)
*first_keys, last_key = keys
h = { last_key=>v }
return h if first_keys.empty?
first_keys.reverse_each.reduce(h) { |g,k| g = { k=>g } }
end
Examples
h = nested_hash([:a, :b, :c], 14) #=> {:a=>{:b=>{:c=>14}}}
i = nested_hash([:a, :b, :d], 25, h) #=> {:a=>{:b=>{:c=>14, :d=>25}}}
j = nested_hash([:a, :b, :d], 99, i) #=> {:a=>{:b=>{:c=>14, :d=>99}}}
k = nested_hash([:a, :e], 104, j) #=> {:a=>{:b=>{:c=>14, :d=>99}, :e=>104}}
nested_hash([:f], 222, k) #=> {:a=>{:b=>{:c=>14, :d=>99}, :e=>104}, :f=>222}
Observe that the value of :d is overridden in the calculation of j. Also note that:
subhash([:a, :b, :c], 12)
#=> {:a=>{:b=>{:c=>12}}}
This mutates the hash h. If that is not desired one could insert the line
f = Marshal.load(Marshal.dump(h))
after the line return subhash(keys, v) if h.empty? and change subsequent references to h to f. Methods from the Marshal module can be used to create a deep copy of a hash so the original hash is not be mutated.
Solved it with recursion:
def hash_setter(hash, key_arr, val)
key = key_arr.shift
hash[key] = {} unless hash[key].is_a?(Hash)
key_arr.length > 0 ? hash_setter(hash[key], key_arr, val) : hash[key] = val
end
def set_value_for_keypath(initial, keypath, value)
temp = initial
for key in keypath.first(keypath.count - 1)
temp = (temp[key] ||= {})
end
temp[keypath.last] = value
return initial
end
initial = {:a => {:b => {:c => 'value' } } }
set_value_for_keypath(initial, [:a, :b, :h], 'value2')
initial
Or if you prefer something more unreadable:
def set_value_for_keypath(initial, keypath, value)
keypath.first(keypath.count - 1).reduce(initial) { |hash, key| hash[key] ||= {} }[keypath.last] = value
end

RUBY: Combining 2 different arrays with duplicate values into hash

I have 2 Arrays.
product_name = ["Pomegranate", "Raspberry", "Miracle fruit", "Raspberry"]
product_quantity = [2, 4, 5, 5]
I'd like to know how to initialize a hash such that it becomes
product_hash = {"Pomegranate"=>2, "Raspberry"=>9, "Miracle fruit"=>5}
Use each_with_object:
product_name.zip(product_quantity)
.each_with_object({}) {|(k, v), h| h[k] ? h[k] += v : h[k] = v }
#=> {"Pomegranate"=>2, "Raspberry"=>9, "Miracle fruit"=>5}
Or just use hash with default value:
product_name.zip(product_quantity)
.each_with_object(Hash.new(0)) {|(k, v), h| h[k] += v }
#=> {"Pomegranate"=>2, "Raspberry"=>9, "Miracle fruit"=>5}
I would start with something like this:
product_name.zip(product_quantity)
.group_by(&:first)
.map { |k, v| [k, v.map(&:last).inject(:+)] }
.to_h
#=> { "Pomegranate" => 2, "Raspberry" => 9, "Miracle fruit" => 5}
I suggest to lookup each method in the Ruby's docs for Array and Hash and to check in the console what each the intermediate step returns.
This is but a slight variation of #llya's solution #2.
product_name.each_index.with_object(Hash.new(0)) { |i,h|
h[product_name[i]] += h[product_quantity[i]] } .
Couldn't we just do:
product_name.zip(product_quantity).to_h
Seems to return the correct result for me?

Can I iterate through an array of arrays and compare it to an array of integers

I have an array of arrays [[1,2,3],[4,5,6],[7,8,9]]. I also have an array of integers [3,4,5,6,8].
Is it possible for me to check if my integers match a complete array in the array of arrays?
So I have 4,5,6 in the int array, and it matches the middle array [4,5,6].
This should work
a = [[1,2,3],[4,5,6],[7,8,9]]
integers = [3,4,5,6,8]
a.any? { |sub_array| sub_array.all? { |item| integers.include? item } }
Try this:
array_1 = [[1,2,3],[4,5,6],[7,8,9]]
array_2 = [3,4,5,6,8]
array_1.any? { |e| (e - array_2).empty? }
# => true
array1 = [[1,2,3],[4,5,6],[7,8,9]]
array2 = [4,5,6]
result = array1.map{|inner_array| inner_array - array2}
# => [[1, 2, 3], [], [7, 8, 9]]
result.any?{|inner_array| inner_array.empty?}
# => true
Assuming you expect a true or false and order doesn't matter, the following works:
require 'set'
a1 = [[1,2,3],[4,5,6],[7,8,9]]
a2 = [3,4,5,6,8]
a1.any? { |item| item.to_set.subset? a2.to_set } #=> true
Assuming you want the index into a1 or nil
a1.index { |item| item.to_set.subset? a2.to_set }
Assuming you want the subset itself or nil
index = a1.index { |item| item.to_set.subset? a2.to_set }
index && a1[index]

How to merge two arrays of hashes by the same pair of key and value ruby

I'm new in ruby. I have two hashes:
f = { "server"=>[{ "hostname"=>"a1", "ip"=>"10" }, {"hostname"=>"b1", "ip"=>"10.1" }] }
g = { "admin" =>[{ "name"=>"adam", "mail"=>"any", "hostname"=>"a1" },
{ "name"=>"mike", "mail"=>"id", "hostname"=>"b1"}]}
and I want to get another hash like this:
{ "data" => [{"hostname"=>"a1", "ip"=>"10", "name" =>"adam", "mail"=>"any"},
{"hostname"=>"b1", "ip"=>"10.1", "name" =>"mike", "mail"=>"id"}]}
The pairs "hostname"=>"something" always matches in hashes of both arrays. I have tried something like this:
data = server.merge(admin)
but it isn't so easy and as you expect it doesn't work. Could you help me merge these hashes and explain for the future how you did it?
A quick way that i can think of right now will look like:
servers = { "server" => [{"hostname"=>"a1", "ip"=>"10"}, {"hostname"=>"b1", "ip"=>"10.1"}]}
admins = { "data" => [{"hostname"=>"a1", "ip"=>"10", "name" =>"adam", "mail"=>"any"}, {"hostname"=>"b1", "ip"=>"10.1", "name" =>"mike", "mail"=>"id"}]}
# FYI: you can just use arrays for representing the above data, you don't necessarily need a hash.
list_of_entries = (servers.values + admins.values).flatten
grouped_by_hostname_entries = list_of_entries.group_by { |h| h['hostname'] }
grouped_by_hostname_entries.map { |_, values| values.inject({}, :merge) }
#=> [{"hostname"=>"a1", "ip"=>"10", "name"=>"adam", "mail"=>"any"}, {"hostname"=>"b1", "ip"=>"10.1", "name"=>"mike", "mail"=>"id"}]
As another variant you can try this
h1 = { "server" => [{"hostname"=>"a1", "ip"=>"10"}, {"hostname"=>"b1", "ip"=>"10.1"}]}
h2 = { "admin" => [{"name" =>"adam", "mail"=>"any", "hostname"=>"a1"}, {"name" =>"mike", "mail"=>"id", "hostname"=>"b1"}]}
h1['server'].zip(h2['admin']).map { |ar| ar.first.merge(ar.last) }
#=> [{"hostname"=>"a1", "ip"=>"10", "name"=>"adam", "mail"=>"any"}, {"hostname"=>"b1", "ip"=>"10.1", "name"=>"mike", "mail"=>"id"}]
zip let us iterate through two or more arrays at the same time.
We use map to return result.
In map block ar would be equal
[{"hostname"=>"a1", "ip"=>"10"}, {"name"=>"adam", "mail"=>"any", "hostname"=>"a1"}]
[{"hostname"=>"b1", "ip"=>"10.1"}, {"name"=>"mike", "mail"=>"id", "hostname"=>"b1"}]
So ar.first would be {"hostname"=>"a1", "ip"=>"10"} and the ar.last would be {"name"=>"adam", "mail"=>"any", "hostname"=>"a1"}
Finally we use merge to combine two hashes.
Hope this will help.
Code and example
ff = f["server"].each_with_object({}) { |g,h| h[g["hostname"]] = g }
#=> {"a1"=>{"hostname"=>"a1", "ip"=>"10"}, "b1"=>{"hostname"=>"b1", "ip"=>"10.1"}}
{ "data"=>g["admin"].map { |h| h.merge(ff[h["hostname"]]) } }
#=> {"data"=>[{"name"=>"adam", "mail"=>"any", "hostname"=>"a1", "ip"=>"10"},
# {"name"=>"mike", "mail"=>"id", "hostname"=>"b1", "ip"=>"10.1"}]}
Explanation
We want to produce a hash
{ "data"=>arr }
where
arr #=> [{ "name"=>"adam", "mail"=>"any", "hostname"=>"a1", "ip"=>"10" },
# { "name"=>"mike", "mail"=>"id", "hostname"=>"b1", "ip"=>"10.1" }]
so we need only compute arr.
First, we create the hash
ff = f["server"].each_with_object({}) { |g,h| h[g["hostname"]] = g }
#=> {"a1"=>{"hostname"=>"a1", "ip"=>"10"}, "b1"=>{"hostname"=>"b1", "ip"=>"10.1"}}
We have
enum = f["server"].each_with_object({})
#=> #<Enumerator: [{"hostname"=>"a1", "ip"=>"10"},
# {"hostname"=>"b1", "ip"=>"10.1"}]:each_with_object({})>
We can see the elements that will be generated by this enumerator (and passed to its block) by converting it to an array:
enum.to_a
#=> [[{"hostname"=>"a1", "ip"=>"10"}, {}],
# [{"hostname"=>"b1", "ip"=>"10.1"}, {}]]
Note
enum.each { |g,h| h[g["hostname"]] = g }
#=> {"a1"=>{"hostname"=>"a1", "ip"=>"10"},
# "b1"=>{"hostname"=>"b1", "ip"=>"10.1"}}
each passes the first element of enum and assigns the block variables using parallel assignement (also call multiple assignment):
g,h = enum.next
#=> [{"hostname"=>"a1", "ip"=>"10"}, {}]
g #=> {"hostname"=>"a1", "ip"=>"10"}
h #=> {}
We may now perform the block calculation:
h[g["hostname"]] = g
#=> h["a1"] = {"hostname"=>"a1", "ip"=>"10"}
#=> {"hostname"=>"a1", "ip"=>"10"}
The return value is the new value of the block variable h. The second element of enum is then passed to the block and the block calculation is performed:
g,h = enum.next
#=> [{"hostname"=>"b1", "ip"=>"10.1"}, {"a1"=>{"hostname"=>"a1", "ip"=>"10"}}]
g #=> {"hostname"=>"b1", "ip"=>"10.1"}
h #=> {"a1"=>{"hostname"=>"a1", "ip"=>"10"}}
Notice that the hash h has been updated.
h[g["hostname"]] = g
#=> {"hostname"=>"b1", "ip"=>"10.1"}
So now
h #=> {"a1"=>{"hostname"=>"a1", "ip"=>"10"},
# "b1"=>{"hostname"=>"b1", "ip"=>"10.1"}}
and
ff #=> {"a1"=>{"hostname"=>"a1", "ip"=>"10"}, "b1"=>{"hostname"=>"b1", "ip"=>"10.1"}}
Now we can compute arr:
g["admin"].map { |h| h.merge(ff[h["hostname"]]) }
The first element of g["admin"] is passed to the block and assigned to the block variable:
h = g["admin"][0]
#=> {"name"=>"adam", "mail"=>"any", "hostname"=>"a1"}
and the block calculation is performed:
h.merge(ff[h["hostname"]])
#=> h.merge(ff["a1"])
#=> h.merge({"hostname"=>"a1", "ip"=>"10"})
#=> {"name"=>"adam", "mail"=>"any", "hostname"=>"a1", "ip"=>"10"}
Then
h = g["admin"][1]
#=> {"name"=>"mike", "mail"=>"id", "hostname"=>"b1"}
h.merge(ff[h["hostname"]])
#=> h.merge(ff["b1"])
#=> h.merge({"hostname"=>"a2", "ip"=>"10"})
#=> {"name"=>"mike", "mail"=>"id", "hostname"=>"a2", "ip"=>"10"}
Therefore,
arr
#=> [{"name"=>"adam", "mail"=>"any", "hostname"=>"a1", "ip"=>"10"},
#=> {"name"=>"mike", "mail"=>"id", "hostname"=>"b1", "ip"=>"10.1"}]
is returned by the block and we are finished.
f = { "server"=>[{ "hostname"=>"a1", "ip"=>"10" },
{"hostname"=>"b1", "ip"=>"10.1" }] }
g = { "admin" =>[{ "name"=>"adam", "mail"=>"any", "hostname"=>"a1" },
{ "name"=>"mike", "mail"=>"id", "hostname"=>"b1"}]}
# manual way
host_admin_merge = []
host_admin_merge << f["server"].first.merge(g["admin"].first)
host_admin_merge << f["server"].last.merge(g["admin"].last)
# a bit more automated, iterate, test key's value, append to new array
host_admin_merge = []
f["server"].each do |host|
g["admin"].each do |admin|
if admin[:hostname] == host[:hostname]
host_admin_merge << host.merge(admin)
end
end
end
# assign the array to a hash with "data" as the key
host_admin_hash = {}
host_admin_hash["data"] = host_admin_merge
p host_admin_hash

How to recursion this array of hashes

I'm wondering how to sum the "analytic" value from this array of hashes with recursion.
Input :
[{"id"=>"1234",
"id_data"=>
[{"segment"=>{"segment_name"=>"Android"},
"metrics"=>
{
"logins"=>[1000, 2000],
"sign_ups_conversion"=>{
"count"=>[500, 200],
"cost"=>[2, 4]
}
},
},
{"segment"=>{"segment_name"=>"iOS"},
"metrics"=>
{
"logins"=>[5000, 10000],
"sign_ups_conversion"=>{
"count"=>[100, 50],
"cost"=>[6, 8]
}
},
}
]
},
{"id"=>"5678",
"id_data"=>
[{"segment"=>{"segment_name"=>"Android"},
"metrics"=>
{
"logins"=>[3000, 2000],
"sign_ups_conversion"=>{
"count"=>[300, 400],
"cost"=>[2, 4]
}
},
},
{"segment"=>{"segment_name"=>"iOS"},
"metrics"=>
{
"logins"=>[5000, 10000],
"sign_ups_conversion"=>{
"count"=>[100, 50],
"cost"=>[6, 8]
}
},
}
]
}]
Output :
{
"Android"=>{
"ids" => ['1234','5678'],
"segment" => {"segment_name"=>"Android"},
"id_data" => [{
"logins" => [4000, 4000], # sum by index from 'Android' logins ("logins"=>[1000, 2000] & "logins"=>[3000, 2000]),
"sign_ups_conversion" => {
"count" => [800, 600], # sum by index from 'Android' sign ups count ("count"=>[500, 200] & "count"=>[300, 400])
"cost" => [4, 8] # sum by index from 'Android' sign ups cost ("cost"=>[2, 4] & "cost"=>[2, 4])
}
}]
},
"iOS"=>{
"ids" => ['1234','5678'],
"segment" => {"segment_name"=>"iOS"},
"id_data" => [{
"logins" => [10000, 20000], # sum by index from 'iOS' logins ("logins"=>[5000, 10000] & "logins"=>[5000, 10000]),
"sign_ups_conversion" => {
"count" => [200, 100], # sum by index from 'iOS' sign ups count ("count"=>[100, 50] & "count"=>[100, 50])
"cost" => [12, 16] # sum by index from 'iOS' sign ups cost ("cost"=>[6, 8] & "cost"=>[6, 8])
}
}]
}
}
Me, trying to solve it with this methods but it is not counting analytics with hash format (sign_ups_conversion) and still figuring it out how the results should be equal to output.
def aggregate_by_segments(stats_array)
results = {}
stats_array.each do |stats|
stats['id_data'].each do |data|
segment_name = data['segment']['segment_name']
results[segment_name] ||= {}
(results[segment_name]['ids'] ||= []) << stats['id']
results[segment_name]['segment'] ||= data['segment']
results[segment_name]['id_data'] ||= [{}]
data['metrics'].each do |metric, values|
next if skip_metric?(values)
(results[segment_name]['id_data'][0][metric] ||= []) << values
end
end
end
sum_segments(results)
end
def sum_segments(segments)
segments.each do |segment, segment_details|
segment_details['id_data'][0].each do |metric, values|
segment_details['id_data'][0][metric] = sum_segment_metric(values)
end
end
segments
end
def sum_segment_metric(metric_value)
metric_value.transpose.map { |x| x.reduce(:+) }
end
# I skipped hash format for now
def skip_metric?(metric_values)
!metric_values.is_a? Array
end
############################################
# calls it with aggregate_by_segments(input)
############################################
I believe we should use recursion but i'm still figuring it out, anyone can help me?
Thanks in advance!
The problem here is how to acces this data structures, a ruby strategy can be iterate over arrays using each and conctenating keys with concatenated hashes like this:
Supposing that your structure is mantained:
Array[hash[array[hash]]
array_hash.each do |stats|
stats["id_data"].each do |h|
puts h["metrics"]["sign_ups_conversion"]
end
end
# => {"count"=>[500, 200], "cost"=>[2, 4]}
# => {"count"=>[100, 50], "cost"=>[6, 8]}
# => {"count"=>[300, 400], "cost"=>[2, 4]}
# => {"count"=>[100, 50], "cost"=>[6, 8]}
I solved it.
def aggregate_by_segments(stats_array)
results = {}
stats_array.each do |stats|
stats['id_data'].each do |data|
segment_name = data['segment']['segment_name']
results[segment_name] ||= {}
(results[segment_name]['ids'] ||= []) << stats['id']
results[segment_name]['segment'] ||= data['segment']
results[segment_name]['id_data'] ||= [{}]
data['metrics'].each do |metric, values|
hash_values(results[segment_name]['id_data'][0], metric, values) if values.is_a? Hash
next if skip_metric?(values)
(results[segment_name]['id_data'][0][metric] ||= []) << values
end
end
end
sum_segments(results)
end
def hash_values(metrics, metric, hash_values)
hash_values.each do |k, v|
next if skip_metric?(v)
metrics[metric] ||= {}
(metrics[metric][k] ||= []) << v
end
end
def sum_segments(segments)
segments.each do |segment, segment_details|
segment_details['id_data'][0].each do |metric, values|
segment_details['id_data'][0][metric] = sum_segment_metric(values)
end
end
segments
end
def sum_segment_metric(metric_value)
result = metric_value.transpose.map { |x| x.reduce(:+) } if metric_value.is_a? Array
result = metric_value.each do |k, v|
metric_value[k] = sum_segment_metric(v)
end if metric_value.is_a? Hash
result
end
def skip_metric?(metric_values)
!metric_values.is_a? Array
end
I know the code is pretty ugly. I will refactor it later :)
Thank you guys for visiting and commenting with constructive feedback.

Resources