Delete multidimensional hash in a loop - loops

I'm doing a hierarchical clustering and I need to remove the clustered elements before moving on to the next step. I did the code for a single-dimensional hash and it ran fine. Now I have a two-dimensional hash, I'm unable to delete the elements.
use strict;
use Data::Dumper;
my %hash = (
'S1' => {
'A1' => 10,
'A2' => 11,
'A3' => 5,
},
'S2' => {
'A1' => 6,
'A2' => 8,
'A3' => 3,
},
'S3' => {
'A1' => 20,
'A2' => 21,
'A3' => 15,
},
'S4' => {
'A1' => 7,
'A2' => 6,
'A3' => 4,
},
'S5' => {
'A1' => 3,
'A2' => 2,
'A3' => 10,
},
);
my #array = ('A1', 'A2', 'A3');
my %distances;
for my $key_1 (sort keys %hash) {
for my $key_2 (sort keys %hash) {
if ($key_1 ne $key_2) {
my $deviation_vectors;
foreach (#array) {
$deviation_vectors += ($hash{$key_1}{$_} - $hash{$key_2}{$_}) ** 2;
};
$distances{$key_1}{$key_2} = $deviation_vectors ** 0.5 unless $distances{$key_2}{$key_1};
};
};
};
my #values;
while (my ($key, $element) = each %distances) {
while (my ($element, $value) = each %{$element}) {
push #values, $value;
};
};
my $min = (sort {$a <=> $b} #values)[0];
for my $key_1 (sort keys %hash) {
for my $key_2 (sort keys %hash) {
if ($key_1 ne $key_2) {
my $deviation_vectors;
foreach (#array) {
$deviation_vectors += ($hash{$key_1}{$_} - $hash{$key_2}{$_}) ** 2;
};
if ($min == $deviation_vectors ** 0.5) {
my $new_key = "$key_1,$key_2";
foreach (#array) {
$hash{$new_key}{$_} = mean($hash{$key_1}{$_}, $hash{$key_2}{$_});
};
# Problem here
# Delete doesn't completely remove the element, it returns a hash with an empty key element
delete $hash{$key_1};
delete $hash{$key_2};
};
};
};
};
print Dumper \%hash;
sub mean {
my #data = #_;
my $sum;
foreach (#data) {
$sum += $_;
};
return ($sum / #data)
};
This is the result I got...
$VAR1 = {
'S4' => {},
'S2' => {},
'S3' => {
'A1' => 20,
'A3' => 15,
'A2' => 21
},
'S2,S4' => {
'A2' => 7,
'A1' => '6.5',
'A3' => '3.5'
},
'S1' => {
'A3' => 5,
'A1' => 10,
'A2' => 11
},
'S5' => {
'A3' => 10,
'A1' => 3,
'A2' => 2
}
};
'S2' and 'S4' need to be completely removed from the hash.

Please inspect following code which is based on provided code with some modification to remove excessive loop cycles with introduction of two indexes.
Perhaps hash %distances in this algorithm is excessive, it is kept for demonstration purpose only as it can be useful to OP.
NOTE: the code is provided for an demonstration purpose in an attempt to improve code readability
INFO: $distance ** 0.5 is better written as sqrt($distance), documentation sqrt
use strict;
use warnings;
use feature 'say';
use Data::Dumper;
my %hash = (
'S1' => {
'A1' => 10,
'A2' => 11,
'A3' => 5,
},
'S2' => {
'A1' => 6,
'A2' => 8,
'A3' => 3,
},
'S3' => {
'A1' => 20,
'A2' => 21,
'A3' => 15,
},
'S4' => {
'A1' => 7,
'A2' => 6,
'A3' => 4,
},
'S5' => {
'A1' => 3,
'A2' => 2,
'A3' => 10,
},
);
my(%distances, $deviation, #array, #keys);
#array = qw(A1 A2 A3);
#keys = sort keys %hash;
for my $index_1 (0..$#keys) {
for my $index_2 (1+$index_1..$#keys) {
my($distance, $key_1, $key_2) = (0, $keys[$index_1], $keys[$index_2]);
$distance += ( $hash{$key_1}{$_} - $hash{$key_2}{$_} ) ** 2 for #array;
$distance = $distance ** 0.5;
$distances{$key_1}{$key_2} = $distance;
$deviation->{min} = $distance unless $deviation->{min};
if( $deviation->{min} > $distance ) {
$deviation->{min} = $distance;
$deviation->{keys} = [$key_1, $key_2];
}
}
}
my($key_1, $key_2) = $deviation->{keys}->#*;
$hash{"$key_1,$key_2"}{$_} = mean($hash{$key_1}{$_}, $hash{$key_2}{$_}) for #array;
delete #hash{($key_1, $key_2)};
say Dumper(\%hash);
exit 0;
sub mean {
my #data = #_;
my $sum;
$sum += $_ for #data;
return $sum / #data;
}
Output sample
$VAR1 = {
'S2,S4' => {
'A1' => '6.5',
'A2' => '7',
'A3' => '3.5'
},
'S5' => {
'A1' => 3,
'A3' => 10,
'A2' => 2
},
'S3' => {
'A1' => 20,
'A3' => 15,
'A2' => 21
},
'S1' => {
'A2' => 11,
'A3' => 5,
'A1' => 10
}
};

So this is the subroutine I've made from #Polar Bear solution. It has three parameters, the first one will be the input data, the second one will be the array of sub-elements, then the last one will be the threshold where we want to stop the subroutine.
...
sub agglomerative_clustering {
my %data = %{$_[0]};
my #array = #{$_[1]};
my $threshold = $_[2];
my $size = keys %data;
my %clusters;
for (my $i = 1; $i < $size; $i++) {
my (%distances, $find, #keys);
#keys = sort keys %data;
for my $index_1 (0 .. $#keys) {
for my $index_2 (1 + $index_1 .. $#keys) {
my ($distance, $key_1, $key_2) = (0, $keys[$index_1], $keys[$index_2]);
$distance += ($data{$key_1}{$_} - $data{$key_2}{$_}) ** 2 foreach #array;
$distance = sqrt($distance);
$distances{$key_1}{$key_2} = $distance;
$find->{min} = $distance unless $find->{min};
$find->{key} = [$key_1, $key_2] unless $find->{key};
if ($find->{min} > $distance) {
$find->{min} = $distance;
$find->{key} = [$key_1, $key_2];
};
};
};
my ($key_1, $key_2) = $find->{key}->#*;
$data{"$key_1,$key_2"}{$_} = mean($data{$key_1}{$_}, $data{$key_2}{$_}) foreach #array;
delete #data{($key_1, $key_2)};
last if $find->{min} >= $threshold;
%clusters = %data;
};
return %clusters;
};
sub mean {
my #data = #_;
my $sum;
$sum += $_ for #data;
return $sum / #data;
};
...

Related

Check data structure and disregard if hash or array

I have a bunch of Hashes inside of an array. When checking my keys and values I get the expected output except for some special cases as they refer to more Arrays/Hashes.
Think of something like this:
#AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
My function iterates through the array and displays my keys and values as in the following:
sub function(...){
print "$key => $value\n";
}
husband => homer
wife => marge
people => ARRAY(0x6b0d80)
Bob => 24
Lukas => 37
vegetables => HASH(0x2570d38)
tomato => red
carrot => orange
Now I want to access my keys and values, but when getting something like ARRAY or HASH as value, I want to disregard that hash and not print it.
Is there some kind of way to only access Values with type scalar?
So far I tried this:
if ($value eq 'ARRAY') {
}
elsif ($value eq ref {}) {
}
else {
print "$key => $value\n";
}
But, it ends up printing exactly the same as above and does not disregard the other data structures.
For an arbitrary data structure like yours, you can use Data::Traverse:
use warnings;
use strict;
use Data::Traverse qw(traverse);
my #AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
traverse { print "$a => $b\n" if /HASH/ } \#AoH;
Output:
wife => marge
husband => homer
Bob => 24
Lukas => 37
carrot => orange
tomato => red
Following demo code does not utilize external modules, provided for educational purpose.
use strict;
use warnings;
use feature 'say';
my #AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
drill_in( \#AoH );
sub drill_in {
my $data = shift;
if( ref $data eq 'ARRAY' ) {
drill_in($_) for #$data;
} elsif ( ref $data eq 'HASH' ) {
while( my($k, $v ) = each %{$data} ) {
(ref $v eq 'ARRAY' or ref $v eq 'HASH') ? drill_in($v) : say "$k => $v";
}
}
}
Output
husband => homer
wife => marge
Lukas => 37
Bob => 24
tomato => red
carrot => orange

What's the better way to get highest couple of values from an array of keys in Perl?

What's the better way to get the highest value from an array of hashes? I want to get highest ID value from each file, content in my array (keys are file name and ID).
my #array contains these values
[
{ file => "messages0.0", id => "1", },
{ file => "messages0.1", id => "2", },
{ file => "messages0.3", id => "3", },
{ file => "messages1.0", id => "1", },
{ file => "messages1.1", id => "2", },
{ file => "messages2.0", id => "1", },
{ file => "messages2.1", id => "1", }
]
If I use
my #new_array = sort { $b->{id} <=> $a->{id} } #array;
If I have value greater than 10 then sort function doesn't works correctly
messages0.0.log;1
messages1.0.log;1
messages2.0.log;1
messages2.1.log;1
messages1.0.log;10
messages1.0.log;11
Here is my array content (with field separated by ; for a better view
messages1.0.log;12
messages1.0.log;11
messages1.0.log;10
messages1.0.log;9
messages0.0.log;8
messages1.0.log;8
messages0.0.log;7
messages1.0.log;7
messages0.0.log;6
messages1.0.log;6
messages0.0.log;5
messages1.0.log;5
messages2.0.log;5
messages2.1.log;5
messages0.0.log;4
messages1.0.log;4
messages2.0.log;4
messages2.1.log;4
messages2.0.log;3
messages2.1.log;3
messages0.0.log;3
messages0.2.log;3
messages0.3.log;3
messages1.0.log;3
messages2.0.log;3
messages2.1.log;3
messages0.3.log;2
messages0.2.log;2
messages0.0.log;2
messages1.0.log;2
messages2.0.log;2
messages2.1.log;2
messages0.0.log;1
messages0.2.log;1
messages0.3.log;1
messages1.0.log;1
messages1.1.log;1
messages2.0.log;1
messages2.1.log;1
My desired output is
messages1.0.log;12
messages0.0.log;8
messages2.0.log;5
messages2.1.log;5
messages0.2.log;3
messages0.3.log;3
messages1.1.log;1
#!/usr/bin/perl
use strict;
use warnings;
my $STAT = ".logstatistics";
open( STAT, '>', $STAT ) or die $!;
my #new_array = sort { $b->{id} <=> $a->{id} } #array;
# Print Log statistics
foreach my $entry ( #new_array ) {
print STAT join ';', $entry->{file}, "$entry->{id}\n";
}
close( STAT );
To help me with the analysis I've written the following code to load the array from a file
open( STAT, $STAT );
while ( <STAT> ) {
my #lines = split /\n/;
my ( $file, $id ) = $lines[0] =~ /\A(.\w.*);(\d.*)/;
push #array, { file => $file, id => $id, };
}
close( STAT );
I've solved my problem with an if statement into data loading into #array.
if the old value of the file name is the same as the current value it is skipped.
In this way, I have only one value for each file.
This seems to do what you want.
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
# This seems to be the data structure that you are working with
my #data = ( {
file => 'messages1.0.log', id => 12,
}, {
file => 'messages1.0.log', id => 11,
}, {
file => 'messages1.0.log', id => 10,
}, {
file => 'messages1.0.log', id => 9,
}, {
file => 'messages0.0.log', id => 8,
}, {
file => 'messages1.0.log', id => 8,
}, {
file => 'messages0.0.log', id => 7,
}, {
file => 'messages1.0.log', id => 7,
}, {
file => 'messages0.0.log', id => 6,
}, {
file => 'messages1.0.log', id => 6,
}, {
file => 'messages0.0.log', id => 5,
}, {
file => 'messages1.0.log', id => 5,
}, {
file => 'messages2.0.log', id => 5,
}, {
file => 'messages2.1.log', id => 5,
}, {
file => 'messages0.0.log', id => 4,
}, {
file => 'messages1.0.log', id => 4,
}, {
file => 'messages2.0.log', id => 4,
}, {
file => 'messages2.1.log', id => 4,
}, {
file => 'messages2.0.log', id => 3,
}, {
file => 'messages2.1.log', id => 3,
}, {
file => 'messages0.0.log', id => 3,
}, {
file => 'messages0.2.log', id => 3,
}, {
file => 'messages0.3.log', id => 3,
}, {
file => 'messages1.0.log', id => 3,
}, {
file => 'messages2.0.log', id => 3,
}, {
file => 'messages2.1.log', id => 3,
}, {
file => 'messages0.3.log', id => 2,
}, {
file => 'messages0.2.log', id => 2,
}, {
file => 'messages0.0.log', id => 2,
}, {
file => 'messages1.0.log', id => 2,
}, {
file => 'messages2.0.log', id => 2,
}, {
file => 'messages2.1.log', id => 2,
}, {
file => 'messages0.0.log', id => 1,
}, {
file => 'messages0.2.log', id => 1,
}, {
file => 'messages0.3.log', id => 1,
}, {
file => 'messages1.0.log', id => 1,
}, {
file => 'messages1.1.log', id => 1,
}, {
file => 'messages2.0.log', id => 1,
}, {
file => 'messages2.1.log', id => 1,
});
my %stats;
# Walk your input data, making a note of the highest
# id associated with every file.
for (#data) {
if (($stats{$_->{file}} // 0) < $_->{id}) {
$stats{$_->{file}} = $_->{id};
}
}
# Walk the %stats hash in sorted order, printing
# the file and the maximum associated id.
for ( sort my_clever_sort keys %stats) {
say join ';', $_, $stats{$_};
}
# (Slightly) clever sorting algorithm
sub my_clever_sort {
# Extract the floating point numbers from the filenames
my ($str_num_a) = $a =~ /(\d+\.\d+)/;
my ($str_num_b) = $b =~ /(\d+\.\d+)/;
# Sort by id (descending) and then filename (ascending)
return ($stats{$b} <=> $stats{$a}) || ($str_num_a <=> $str_num_b);
}
Instead of
my #new_array = sort { $a->{id} cmp $b->{id} } #array;
try this
my #new_array = sort { $a->{id} <=> $b->{id} } #array;
The <=> operator treats the fields to compare as numbers instead of strings. It will treat 10 as greater than 3, so it will treat 10 as greater than 03.
The cmp operator treats your values as strings, so it will sort 21 before 3 just as it would sort BA before C.

Combine hash properties from array of hashes

I'm parsing an excel spreadsheet and trying to combine data rows by id across tables and files. This is a condensed, simplified version of what I have. With a PHP/JS background, I prefer conceptualizing hashes as objects, so %aoo means array of objects instead of array of hashes...
#!/usr/bin/env perl
use v5.10.0;
use strict;
use warnings;
use Data::Dump;
use Data::Dumper;
# Array of objects
# Each object is a row from a table
my $aoo1 = [
{
"id" => 1,
"name" => "Dan",
"team" => "red"
},
{
"id" => 2,
"name" => "Arnold",
"team" => "red"
},
{
"id" => 3,
"name" => "Kristen",
"team" => "red"
}
];
my #aoo2 = (
{
"id" => 1,
"position" => "web developer",
},
{
"id" => 2,
"position" => "CEO",
},
{
"id" => 3,
"position" => "Secretary",
}
);
my #aoo3 = (
{
"id" => 1,
"tenure" => "1yr",
},
{
"id" => 2,
"tenure" => "25yr",
},
{
"id" => 3,
"tenure" => "5yr",
}
);
# object of arrays
# each property is a table name from spreadsheet
my %ooa;
%ooa = (
"People List" => $aoo1,
"Position List" => \#aoo2,
"Tenure List" => \#aoo3
);
# dd \%ooa;
while (my ($list_name, $aoo) = each %ooa)
{
# $aoo reftype is array | [ %object, %object, %object ]
# Do something to look into other objects for same id...
}
I want to be able to create a new object for each unique row in the file, so I can filter the values and then write it to a CSV file.
Ex. of the end result
%complete_row = (
'id' => 1,
'name' => 'Dan',
'team' => 'red',
'position => 'Web Dev',
'tenure' => '1yr'
);
Put the 2nd and 3rd arrays into hashes mapping ID to the hash. Then loop through the people and use the ID to get the data from position and tenure hashes.
use strict;
use warnings;
use Data::Dumper;
my $people = [
{
id => 1,
name => "Dan",
team => "red"
},
{
id => 2,
name => "Arnold",
team => "red"
},
{
id => 3,
name => "Kristen",
team => "red"
}
];
my $positions = [
{
id => 1,
position => "web developer",
},
{
id => 2,
position => "CEO",
},
{
id => 3,
position => "Secretary",
}
];
my $tenures = [
{
id => 1,
tenure => "1yr",
},
{
id => 2,
tenure => "25yr",
},
{
id => 3,
tenure => "5yr",
}
];
# hash each by ID
my %position_hash = map { $_->{id} => $_ } #$positions;
my %tenure_hash = map { $_->{id} => $_ } #$tenures;
# combine
my $complete = [];
foreach my $person (#$people) {
my $id = $person->{id};
my $complete_row = {
%$person,
position => $position_hash{$id}->{position},
tenure => $tenure_hash{$id}->{tenure},
};
push #$complete, $complete_row
}
print "complete = " . Dumper($complete);
This should work:
my %newHash;
foreach my $arrRef(map {$ooa{$_}} keys %ooa) { #reading all values of ooa hash, each value is an array ref
foreach my $hashRef(#$arrRef) { #reading each array element, each array element is a hash ref
foreach my $key(keys %{$hashRef}) { #reading all keys of each internal hash
$newHash{$hashRef->{'id'}}{$key} = $hashRef->{$key}; #building new hash of hashes with id as key and value as hash ref
}
}
}
my #newArray = map {$newHash{$_}} keys %newHash; #converting hash of hashes into array of hashes

Generate array from reference hash

I'm trying to generate array from hash reference, created by joining all keys of hashes with sorting.
Consider I have dynamic hash reference like
my $hash_ref = {
'A1' => {
'B2' => {
'C1' => {
'D1' => {},
'D2' => {},
'D3' => {}
}
},
'B3' => {
'C1' => {
'D2' => {},
'D1' => {},
'D3' => {}
}
},
'B1' => {
'C1' => {
'D1' => {},
'D2' => {}
}
}
}
};
how to create array from above hash like
#arr = qw/A1B1C1D1 A1B1C1D2 A1B2C1D1 ..../;
below is the code I tried(which is not working)
my $out = hash_walk($hash_ref);
say Dumper $out;
sub hash_walk {
my $hash = shift;
my $array_ref;
my $temp_arr;
my #temp_arr2;
foreach my $k ( sort keys %$hash ) {
$v = $$hash{$k};
if ( ref($v) eq 'HASH' ) {
# Recurse.
$temp_arr = hash_walk( $v);
}
push #$array_ref, $k if $k;
my (#lvlfirst, #lvlnext );
if ($array_ref && $temp_arr){
#lvlfirst = #$array_ref;
#lvlnext = #$temp_arr;
}
for ( my $i = 0 ; $i <= $#lvlfirst ; $i++ ) {
for ( my $j = 0 ; $j <= $#lvlnext ; $j++ ) {
push #temp_arr2, "$lvlfirst[$i]$lvlnext[$j]"; ##Trying to join here
}
}
}
return \#temp_arr2;
}
XML is:
<root>
<class1 name="A1">
<class2 name="B1">
<class3 name="C1">
<class4 name="D1"></class4>
<class4 name="D2"></class4>
</class3>
</class2>
<class2 name="B2">
<class3 name="C1">
<class4 name="D1"></class4>
</class3>
</class2>
<class2 name="B3">
<class3 name="C1">
<class4 name="D1"></class4>
<class4 name="D2"></class4>
<class4 name="D3"></class4>
</class3>
</class2>
</class1>
</root>
You should really make some effort yourself before coming to SO for help. We're far more likely to help you fix broken code than just give you an answer.
But I'm feeling generous and I have a couple of minutes to spare.
The brute force approach would be to walk through every key at every level in the hash.
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Data::Dumper;
my $hash_ref = {
'A1' => {
'B2' => {
'C1' => {
'D1' => {},
'D2' => {},
'D3' => {}
}
},
'B3' => {
'C1' => {
'D2' => {},
'D1' => {},
'D3' => {}
}
},
'B1' => {
'C1' => {
'D1' => {},
'D2' => {}
}
}
}
};
my #arr;
for my $l1 (sort keys %$hash_ref) {
for my $l2 (sort keys %{$hash_ref->{$l1}}) {
for my $l3 (sort keys %{$hash_ref->{$l1}{$l2}}) {
for my $l4 (sort keys %{$hash_ref->{$l1}{$l2}{$l3}}) {
push #arr, "$l1$l2$l3$l4";
}
}
}
}
say Dumper \#arr;
This produces the output:
$VAR1 = [
'A1B1C1D1',
'A1B1C1D2',
'A1B2C1D1',
'A1B2C1D2',
'A1B2C1D3',
'A1B3C1D1',
'A1B3C1D2',
'A1B3C1D3'
];
Update: Here's a recursive solution:
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Data::Dumper;
my $hash_ref = {
'A1' => {
'B2' => {
'C1' => {
'D1' => {},
'D2' => {},
'D3' => {}
}
},
'B3' => {
'C1' => {
'D2' => {},
'D1' => {},
'D3' => {}
}
},
'B1' => {
'C1' => {
'D1' => {},
'D2' => {}
}
}
}
};
my #arr = walk_hash($hash_ref, '');
say Dumper \#arr;
sub walk_hash {
my ($hash_ref, $prefix) = #_;
return $prefix unless keys %$hash_ref;
return map { walk_hash($hash_ref->{$_}, "$prefix$_") } sort keys %$hash_ref;
}
I would tackle this differently - as this is XML, I would skip the intermediate 'mangle the XML into a hash' step, and just work with it directly.
Something like this does what you want:
#!/usr/bin/env perl
use strict;
use warnings 'all';
use XML::Twig;
use Data::Dumper;
my $twig = XML::Twig -> new -> parsefile ('your.xml');
my #node_keys;
#find all the nodes with a name attribute.
#then grep out the ones that have child nodes.
foreach my $elt ( grep { not $_ -> descendants } $twig -> get_xpath('//*[#name]') ){
my $path = $elt -> att('name');
my $cursor = $elt;
#recurse upwards through 'parent' nodes with a 'name' attribute.
while ( $cursor -> parent -> att('name') ) {
$path = $cursor -> parent -> att('name') . $path;
$cursor = $cursor -> parent;
}
push #node_keys, $path;
}
print Dumper \#node_keys;
Gives output:
$VAR1 = [
'A1B1C1D1',
'A1B1C1D2',
'A1B2C1D1',
'A1B3C1D1',
'A1B3C1D2',
'A1B3C1D3'
];
Note - because it's walking in 'XML order' it's preserving the same ordering as source. That might be called a feature, or you can sort it afterwards.
But I would question perhaps, what you're trying to accomplish by making these compounds of 'name' attributes - it may be that you can solve the task more effectively through XML parsing and xpath queries.

perl hash of hashes or array of arrays example

How come I never see examples like these where you declare the hash, and then put then inside another hash?
my %hash1={};
$hash1{'key1'}='1-111';
$hash1{'key2'}='1-222';
$hash1{'key3'}='1-333';
my %hash2={};
$hash2{'key1'}='2-111';
$hash2{'key2'}='2-222';
$hash2{'key3'}='2-333';
my %main_hash1={%hash1, %hash2};
I've only seen examples like these where they put the hashes inside the hash, instead of a variable for the hash:
my %main_hash2=( 'hash1' => {
'key1' => '1-111',
'key2' => '1-222',
'key3' => '1-333'
},
'hash2' => {
'key1' => '2-111',
'key2' => '2-222',
'key3' => '2-333'
}
);
(similar with arrays also)
You can't store a hash in a hash, you can store a hashref in a hash though:
my %main_hash1 = ( hash1 => \%hash1, hash2 => \%hash2 );
The same goes with arrays:
my #main_array1 = ( \#array1, \#array2 );
And with mixes:
my #array_of_hrefs = ( \%hash1, \%hash2 );
my %hash_of_arefs = ( arr1 => \#arr1, arr2 => \#arr2 );
This is done all the time; I don't know why you haven't seen it and I doubt anyone on SO would know that answer.
Also, this does not initialize a hash:
my %hash1={}; ## should be my %hash1; or my %hash1 = ();
See the following example :
The Perl code :
my %hash1;
$hash1{'key1'}='1-111';
$hash1{'key2'}='1-222';
$hash1{'key3'}='1-333';
my %hash2;
$hash2{'key1'}='2-111';
$hash2{'key2'}='2-222';
$hash2{'key3'}='2-333';
my %main_hash = ( hash1 => \%hash1, hash2 => \%hash2 );
use Data::Dumper;
print Dumper %main_hash;
The output :
$VAR1 = 'hash2';
$VAR2 = {
'key2' => '2-222',
'key1' => '2-111',
'key3' => '2-333'
};
$VAR3 = 'hash1';
$VAR4 = {
'key2' => '1-222',
'key1' => '1-111',
'key3' => '1-333'
};
That use references, see http://perldoc.perl.org/perlreftut.html & if needed : http://perldoc.perl.org/perlref.html
There's a ref trick that makes things a bit magic :
my $hash_ref = {}; # reference to a blank hash
my %h = ( foo => "1", bar => "2" );
push #{$hash_ref->{'1st_level'}->{'level-2'}->{'level_3'}->{'arr'}}, 123;
push #{$hash_ref->{'1st_level'}->{'level-2'}->{'level_3'}->{'arr'}}, 456;
$hash_ref->{'1st_level'}->{'level-2'}->{'level_3'}->{'arr'}->[2] = \%h;
use Data::Dumper;
print Dumper $hash_ref;
The output :
$VAR1 = {
'1st_level' => {
'level-2' => {
'level_3' => {
'arr' => [
123,
456,
{
'bar' => '2',
'foo' => '1'
}
]
}
}
}
};

Resources