Ruby -- convert a nested hash to a multidimensional array - arrays

I have a hash which is named h. I want to store the contents in a multidimensional array named ar. I am getting the error no implicit conversion from nil to integer.
Here is my code:
h = {"bob" => {email: "abc" , tel: "123"} , "daisy" => {email: "cab" , tel: "123456"}}
keys = h.keys
l = h.length
ar = Array.new(l) { Array.new(3) }
for i in 0..l-1
ar[[2][i]] = keys[i]
ar[[1][i]] = h[keys[i]][:email]
ar[[0][i]] = h[keys[i]][:tel]
end
puts ar.to_s
The desired output is:
[[email_1, email_2, ..][tel_1, tel_2, ..][name_1, name_2, ..]]
For example:
[["abc", "cab"] , ["123", "123456"] , ["bob", "daisy"]]

This is the way I would handle this:
h.values.each_with_object({}) do |h,obj|
obj.merge!(h) { |_k,v1,v2| ([v1] << v2).flatten }
end.values << h.keys
#=> [["abc", "cab"], ["123", "123456"], ["bob", "daisy"]]
First grab all the values (as Hashes)
loop through them with an accumulator ({})
merge! the values into the accumulator and on conflict append them to an array
return the values from the accumulator
then append the original keys
This is less explicit than #mudasobwa's answer and relies on the order of the first value to determine the output. e.g. if :tel came before :email the first 2 elements would have a reversed order

[2][i] returns nil for i > 0. ar[nil] raises the exception.
Here is what you do:
arr = h.map { |k, v| [v[:email], v[:tel], k] }.reduce(&:zip)
To make your code work:
Change
ar = Array.new(l) { Array.new(3) }
To
ar = Array.new(3) { Array.new(l) }
Change
ar[[2][i]] = keys[i]
ar[[1][i]] = h[keys[i]][:email]
ar[[0][i]] = h[keys[i]][:tel]
To
ar[2][i] = keys[i]
ar[1][i] = h[keys[i]][:email]
ar[0][i] = h[keys[i]][:tel]

What you mostly should do is to stop writing PHP code with Ruby syntax. Here it’s how is to be done in Ruby:
h.map { |k, v| [v[:email], v[:tel], k] }.reduce(&:zip)
or, even better, if you are certain of elements order in nested hashes:
h.map { |k, v| [*v.values, k] }.reduce(&:zip).map(&:flatten)
All the methods map, reduce and zip are thoroughly described in the documentation.

h.map { |k, v| [*v.values_at(:email, :tel), k] }.transpose
#=> [["abc", "cab"], ["123", "123456"], ["bob", "daisy"]]
The intermediate calculation is as follows.
h.map { |k, v| [*v.values_at(:email, :tel), k] }
#=> [["abc", "123", "bob"], ["cab", "123456", "daisy"]]

Related

Get the average of numbers in an array which is the values of an hash

in a Ruby program I have an hash which has normal strings as keys and the values are array of numbers:
hash_1 = {"Luke"=> [2,3,4], "Mark"=>[3,5], "Jack"=>[2]}
And what I'm looking for is to have as result the same hash with the values that become the average of the numbers inside the arrays:
{"Luke"=> 3, "Mark"=>4, "Jack"=>2}
One way to make it to work can be to create a new empty hash_2, loop over hash_1 and within the block assign the keys to hash_2 and the average of the numbers as values.
hash_2 = {}
hash_1.each do |key, value|
hash_2[key] = value.sum / value.count
end
hash_2 = {"Luke"=> 3, "Mark"=>4, "Jack"=>2}
Is there a better way I could do this, for instance without having to create a new hash?
hash_1 = {"Luke"=> [2,3,4], "Mark"=>[3,5], "Jack"=>[2]}
You don't need another hash for the given below code.
p hash_1.transform_values!{|x| x.sum/x.count}
Result
{"Luke"=>3, "Mark"=>4, "Jack"=>2}
def avg(arr)
return nil if arr.empty?
arr.sum.fdiv(arr.size)
end
h = { "Matthew"=>[2], "Mark"=>[3,6], "Luke"=>[2,3,4], "Jack"=>[] }
h.transform_values { |v| avg(v) }
#=> {"Matthew"=>2.0, "Mark"=>4.5, "Luke"=>3.0, "Jack"=>nil}
#Виктор
OK. How about this:
hash_1 = {"Luke"=> [2,3,4], "Mark"=>[3,5], "Jack"=>[2], "Bobby"=>[]}
hash_2 = hash_1.reduce(Hash.new(0)) do |acc, (k, v)|
v.size > 0 ? acc[k] = v.sum / v.size : acc[k] = 0
acc
end
p hash_2
This solution is different than the one that use transform_values! because return a new Hash object.
hash_1.map { |k,v| [k, v.sum / v.size] }.to_h

Ruby - Print only the values of dubplicate hash keys in an array of hashes

I have created an array of hashes from data ive to pull in from an xml file. Problem is, some of the hash keys in the array are duplicates and id like to pull just the values. For example, the code below outputs the following:
{"server_host"=>"hostone", "server_type"=>"redhat", "server_name"=>"RedhatOne"}
{"server_host"=>"hostone", "server_type"=>"windows", "server_name"=>"WinOne"}
and i'd like to be able print out this:
{"server_host"=>"hostone", "server_type"=>"redhat", "server_name"=>"RedhatOne"}
"server_type"=>"windows", "server_name"=>"WinOne"}
I think i need to create another array based on duplicate keys but what i am trying below is not working:
def parse_xml_file(filename)
require 'nokogiri'
xmlSource = File.read(filename)
parsedXml = Nokogiri::XML(xmlSource)
hostArray = Array.new
parsedXml.xpath("/New/Server").each do |srvNode|
hostNode = srvNode.at_xpath("Host")
hostArray << {"server_name"=>srvNode["Name"],
"server_type"=>srvNode["Type"], "server_host"=>hostNode["Address"] }
grouped = hostArray.group_by{|row| [row[:server_host]]}
filtered = grouped.values.select { |a| a.size > 1 }.flatten
end
Assuming you have a variable hash_arr which contains your duplicated hashes, here is some code that should get you pretty close to where you want to be. It's not optimized, but it's simple enough to understand:
hash_arr.group_by { |h| h["server_host"] }.each do |host_name, values|
puts "Server Host: #{host_name}"
values.each do |val|
val.delete("server_host")
puts val
end
end
prints out:
Server Host: hostone
{"server_type"=>"redhat", "server_name"=>"RedhatOne"}
{"server_type"=>"windows", "server_name"=>"WinOne"}
Or if you just the values per group without associating them across hashes:
hash_arr =[{"server_host"=>"hostone", "server_type"=>"redhat", "server_name"=>"RedhatOne"}, {"server_host"=>"hostone", "server_type"=>"windows", "server_name"=>"WinOne"}]
merged_hash = {}
hash_arr.each do |hash|
hash.each do |k, v|
merged_hash[k] ||= []
merged_hash[k] << v
end
end
merged_hash.values.each(&:uniq!)
And then the output:
[9] pry(main)> merged_hash
=> {"server_host"=>["hostone"], "server_type"=>["redhat", "windows"], "server_name"=>["RedhatOne", "WinOne"]}
This will get you the shared values:
shared = hash1.keep_if { |k, v| hash2.key? k }
And them you could print that however you like. Don't know if you want to print the keys, values, or both, but however you like:
shared.each_pair { |k, v| print k, v }
You could obviously merge these two snippets into one command, but for the sake of clarity, they are 2.
EDIT:
Just noticed you wanted as an array. If you wanted just values:
array = hash1.keep_if { |k, v| hash2.key? k }.values
Thanks for the advice - i've tried this :
shared = Hash.new
grouped = hostArray.group_by{|row| [row[:server_host]]}
filtered = grouped.values.select { |a| a.size > 1 }.flatten
filtered.each do |element|
element.each do |key, value|
shared = element.keep_if { |k, v| element.key? k }
end
shared.each_pair { |k, v| print k," ", v, "\n" }
end
but this output is still incorrect - i think i've referenced 'hash2' wrongly? is that correct?

Using gsub in array of hashes

I want to remove the spaces in the key value in the hashes
output = [
{"first name"=> "george", "country"=>"Australia"},
{"second name"=> "williams", "country"=>"South Africa"},
{"first name"=> "henry", "country"=>"US"}]
I was able to manage when only one hash was there inside the array with the following code
Array.wrap({}.tap do |hash|
output.each do |key|
key.each do |k, v|
hash[k.gsub(" ","_")] = v
end
end
end)
Please help me to modify the array containing more than one hash.
Note: the output value is dynamic that we cannot hardcode the hash key in the code.
If hash is not nested - you can simply
output.map{|h| h.each_pair.map{|k,v| [k.gsub(' ', '_'), v]}.to_h }
Here's code that will change the spaces to underscores for each key in a hash:
output.flat_map { |h| h.map { |key, v| { key.gsub(" ", "_") => v } } }
=> [{"first_name"=>"george"}, {"country"=>"Australia"}, {"second_name"=>"williams"}, {"country"=>"South Africa"}, {"first_name"=>"henry"}, {"country"=>"US"}]
You cannot modify a hash's keys. You must remove the unwanted key and add a new one. Here's a way of doing both operations in one step (see the doc Hash#delete):
def convert(h)
h.keys.each { |k| (h[k.tr(' ','_')] = h.delete(k)) if k =~ /\s/ }
h
end
Hence:
output.map { |h| convert h }
#=> [{"country"=>"Australia", "first_name"=>"george"},
# {"country"=>"South Africa", "second_name"=>"williams"},
# {"country"=>"US", "first_name"=>"henry"}]
I've used the method String#tr to convert spaces to underscores, but you could use String#gsub as well. Also, you could write k.include?(' ') rather than k =~ /\s/.

Rejecting hash contents if they are not in array

I have this array:
array = ["1", "2", "3", "4"]
I have this array of hashes:
ah = [
{:id=>"1", :value=>"A"},
{:id=>"2", :value=>"B"},
{:id=>"3", :value=>"C"},
{:id=>"4", :value=>"D"},
{:id=>"5", :value=>"E"},
{:id=>"6", :value=>"F"},
{:id=>"7", :value=>"G"},
{:id=>"8", :value=>"H"},
]
I need to reject any hash in ah whose id is not in array.
What is the best way of achieving this?
You can select the inverse - the hashes whose id is in array by using this code:
ah.select{|el| array.include?(el[:id])}
If you prefer reject, you can use:
ah.reject{|el| !array.include?(el[:id])}
For more info: Array#reject, Array#select.
These methods create a new array, if you want to modify in place use Array#reject! or Array#select!.
For big pieces of data I would go with some preprocessing to avoid O(n*m) lookups.
array = ["1", "2", "3", "4"]
array_hash = array.each_with_object({}){ |i, h| h[i] = true }
ah.select{ |obj| array_hash[obj[:id]] }
I realize there is already an accepted answer but since all the answers here are in O(n*m), I thought I'd propose an alternative in O(n)*.
Here's a rough benchmark if the ah array has 100_000 items and we have 10_000 items in the sub array. I'm including fl00r's answer here and Cary's as we're all trying to avoid the O(n*m) scenario.
user system total real
select with include 34.610000 0.110000 34.720000 ( 34.924679)
reject with include 34.320000 0.100000 34.420000 ( 34.611992)
group and select 0.170000 0.010000 0.180000 ( 0.182358)
select by value 0.040000 0.000000 0.040000 ( 0.041073)
select with set 0.040000 0.000000 0.040000 ( 0.048331)
hashify then values 0.130000 0.010000 0.140000 ( 0.139686)
The code to reproduce this:
require 'benchmark'
require 'set'
list_size = 100_000
sub_list_size = 10_000
ah = Array.new(list_size) { |i| { id: i, value: "A" } }
array = []
sub_list_size.times { array << (0..list_size).to_a.sample }
def group_than_select(ah, array)
grouped = ah.group_by { |x| x[:id] }
good_keys = grouped.keys - array
good_keys.map { |i| grouped[i] }.flatten
end
def select_by_fl00r(ah, array)
array_hash = array.each_with_object({}){ |i, h| h[i] = true }
ah.select{ |obj| array_hash[obj[:id]] }
end
def select_with_set(ah, array)
array_to_set = array.to_set
ah.select { |h| array_to_set.include?(h[:id]) }
end
def hashify_then_values_at(ah, array)
h = ah.each_with_object({}) { |g,h| h.update(g[:id]=>g) }
h.values_at(*(h.keys & array))
end
Benchmark.bm(25) do |x|
x.report("select with include") do
ah.select{|el| array.include?(el[:id])}
end
x.report("reject with include") do
ah.reject{|e| !array.include?(e[:id])}
end
x.report("group and select") do
group_than_select(ah, array)
end
x.report("select by value") do
select_by_fl00r(ah, array)
end
x.report("select with set") do
select_with_set(ah, array)
end
x.report("hashify then values") do
hashify_then_values_at(ah, array)
end
end
Hash maps are typically O(1) search though O(n) worst case is possible.
A better solution than rejecting those ids that are not in the array is to only accept the ones that do:
ah.select { |hash| array.include?(hash[:id]) }
Here are two more possibilities.
array = ["1", "2", "3", "4", "99999999"]
#1
I expect the include? solutions would be considerably faster if array were first converted to a set:
require 'set'
def select_with_set(ah, array)
array_to_set = array.to_set
ah.select { |h| array_to_set.include?(h[:id]) }
end
select_with_set(ah, array)
#=> [{:id=>"1", :value=>"A"}, {:id=>"2", :value=>"B"},
# {:id=>"3", :value=>"C"}, {:id=>"4", :value=>"D"}]
#2
If, as in the example, the hash elements of ah have distinct values for :id, one could do this:
def hashify_then_values_at(ah, array)
h = ah.each_with_object({}) { |g,h| h.update(g[:id]=>g) }
h.values_at(*(h.keys & array))
end
hashify_then_values_at(ah, array)
#=> [{:id=>"1", :value=>"A"}, {:id=>"2", :value=>"B"},
# {:id=>"3", :value=>"C"}, {:id=>"4", :value=>"D"}]

Turning a multi-dimensional array into a hash without overwriting values

I have a multi-dimensional array such as:
array = [["stop", "halt"],["stop", "red"],["go", "green"],["go","fast"],["caution","yellow"]]
And I want to turn it into a hash like this:
hash = {"stop" => ["halt","red"], "go" => ["green","fast"], "caution" => "yellow"}
However, when I array.to_h , the values overwrite one another and I get:
hash = {"stop" => "red", "go" => "fast", "caution" => "yellow"}
How do I get the desired array?
This is one way. It uses Enumerable#each_with_object and the form of Hash#update (aka merge!) that employs a block to determine the values of keys that are present in both hashes being merged.
array << ["stop", "or I'll fire!"]
array.each_with_object({}) { |(f,l),h|
h.update(f=>l) { |_,ov,nv| ov.is_a?(Array) ? ov << nv : [ov, nv] } }
#=> {"stop"=>["halt", "red", "or I'll fire!"],
# "go"=>["green", "fast"],
# "caution"=>"yellow"}
The code is simplified if you want all values in the returned hash to be arrays (i.e., "caution"=>["yellow"]), which is generally more convenient for subsequent calculations:
array.each_with_object({}) { |(f,l),h| h.update(f=>[l]) {|_,ov,nv| ov+nv }}
#=> {"stop"=>["halt", "red", "or I'll fire!"],
# "go"=>["green", "fast"],
# "caution"=>["yellow"]}
One way to do it:
array.inject({}) {|r, (k, v)| r[k] &&= [*r[k], v]; r[k] ||= v; r }
That's pretty messy though. Written out, it looks like this:
def to_hash_with_duplicates(arr)
{}.tap do |r|
arr.each do |k, v|
r[k] &&= [*r[k], v] # key already present, turn into array and add value
r[k] ||= v # key not present, simply store value
end
end
end
Edit: Thinking a bit more, #cary-swoveland's update-with-block solution is better, because it handles nil and false values correctly.

Resources