I'm trying to generate array from hash reference, created by joining all keys of hashes with sorting.
Consider I have dynamic hash reference like
my $hash_ref = {
'A1' => {
'B2' => {
'C1' => {
'D1' => {},
'D2' => {},
'D3' => {}
}
},
'B3' => {
'C1' => {
'D2' => {},
'D1' => {},
'D3' => {}
}
},
'B1' => {
'C1' => {
'D1' => {},
'D2' => {}
}
}
}
};
how to create array from above hash like
#arr = qw/A1B1C1D1 A1B1C1D2 A1B2C1D1 ..../;
below is the code I tried(which is not working)
my $out = hash_walk($hash_ref);
say Dumper $out;
sub hash_walk {
my $hash = shift;
my $array_ref;
my $temp_arr;
my #temp_arr2;
foreach my $k ( sort keys %$hash ) {
$v = $$hash{$k};
if ( ref($v) eq 'HASH' ) {
# Recurse.
$temp_arr = hash_walk( $v);
}
push #$array_ref, $k if $k;
my (#lvlfirst, #lvlnext );
if ($array_ref && $temp_arr){
#lvlfirst = #$array_ref;
#lvlnext = #$temp_arr;
}
for ( my $i = 0 ; $i <= $#lvlfirst ; $i++ ) {
for ( my $j = 0 ; $j <= $#lvlnext ; $j++ ) {
push #temp_arr2, "$lvlfirst[$i]$lvlnext[$j]"; ##Trying to join here
}
}
}
return \#temp_arr2;
}
XML is:
<root>
<class1 name="A1">
<class2 name="B1">
<class3 name="C1">
<class4 name="D1"></class4>
<class4 name="D2"></class4>
</class3>
</class2>
<class2 name="B2">
<class3 name="C1">
<class4 name="D1"></class4>
</class3>
</class2>
<class2 name="B3">
<class3 name="C1">
<class4 name="D1"></class4>
<class4 name="D2"></class4>
<class4 name="D3"></class4>
</class3>
</class2>
</class1>
</root>
You should really make some effort yourself before coming to SO for help. We're far more likely to help you fix broken code than just give you an answer.
But I'm feeling generous and I have a couple of minutes to spare.
The brute force approach would be to walk through every key at every level in the hash.
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Data::Dumper;
my $hash_ref = {
'A1' => {
'B2' => {
'C1' => {
'D1' => {},
'D2' => {},
'D3' => {}
}
},
'B3' => {
'C1' => {
'D2' => {},
'D1' => {},
'D3' => {}
}
},
'B1' => {
'C1' => {
'D1' => {},
'D2' => {}
}
}
}
};
my #arr;
for my $l1 (sort keys %$hash_ref) {
for my $l2 (sort keys %{$hash_ref->{$l1}}) {
for my $l3 (sort keys %{$hash_ref->{$l1}{$l2}}) {
for my $l4 (sort keys %{$hash_ref->{$l1}{$l2}{$l3}}) {
push #arr, "$l1$l2$l3$l4";
}
}
}
}
say Dumper \#arr;
This produces the output:
$VAR1 = [
'A1B1C1D1',
'A1B1C1D2',
'A1B2C1D1',
'A1B2C1D2',
'A1B2C1D3',
'A1B3C1D1',
'A1B3C1D2',
'A1B3C1D3'
];
Update: Here's a recursive solution:
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use Data::Dumper;
my $hash_ref = {
'A1' => {
'B2' => {
'C1' => {
'D1' => {},
'D2' => {},
'D3' => {}
}
},
'B3' => {
'C1' => {
'D2' => {},
'D1' => {},
'D3' => {}
}
},
'B1' => {
'C1' => {
'D1' => {},
'D2' => {}
}
}
}
};
my #arr = walk_hash($hash_ref, '');
say Dumper \#arr;
sub walk_hash {
my ($hash_ref, $prefix) = #_;
return $prefix unless keys %$hash_ref;
return map { walk_hash($hash_ref->{$_}, "$prefix$_") } sort keys %$hash_ref;
}
I would tackle this differently - as this is XML, I would skip the intermediate 'mangle the XML into a hash' step, and just work with it directly.
Something like this does what you want:
#!/usr/bin/env perl
use strict;
use warnings 'all';
use XML::Twig;
use Data::Dumper;
my $twig = XML::Twig -> new -> parsefile ('your.xml');
my #node_keys;
#find all the nodes with a name attribute.
#then grep out the ones that have child nodes.
foreach my $elt ( grep { not $_ -> descendants } $twig -> get_xpath('//*[#name]') ){
my $path = $elt -> att('name');
my $cursor = $elt;
#recurse upwards through 'parent' nodes with a 'name' attribute.
while ( $cursor -> parent -> att('name') ) {
$path = $cursor -> parent -> att('name') . $path;
$cursor = $cursor -> parent;
}
push #node_keys, $path;
}
print Dumper \#node_keys;
Gives output:
$VAR1 = [
'A1B1C1D1',
'A1B1C1D2',
'A1B2C1D1',
'A1B3C1D1',
'A1B3C1D2',
'A1B3C1D3'
];
Note - because it's walking in 'XML order' it's preserving the same ordering as source. That might be called a feature, or you can sort it afterwards.
But I would question perhaps, what you're trying to accomplish by making these compounds of 'name' attributes - it may be that you can solve the task more effectively through XML parsing and xpath queries.
Related
I'm doing a hierarchical clustering and I need to remove the clustered elements before moving on to the next step. I did the code for a single-dimensional hash and it ran fine. Now I have a two-dimensional hash, I'm unable to delete the elements.
use strict;
use Data::Dumper;
my %hash = (
'S1' => {
'A1' => 10,
'A2' => 11,
'A3' => 5,
},
'S2' => {
'A1' => 6,
'A2' => 8,
'A3' => 3,
},
'S3' => {
'A1' => 20,
'A2' => 21,
'A3' => 15,
},
'S4' => {
'A1' => 7,
'A2' => 6,
'A3' => 4,
},
'S5' => {
'A1' => 3,
'A2' => 2,
'A3' => 10,
},
);
my #array = ('A1', 'A2', 'A3');
my %distances;
for my $key_1 (sort keys %hash) {
for my $key_2 (sort keys %hash) {
if ($key_1 ne $key_2) {
my $deviation_vectors;
foreach (#array) {
$deviation_vectors += ($hash{$key_1}{$_} - $hash{$key_2}{$_}) ** 2;
};
$distances{$key_1}{$key_2} = $deviation_vectors ** 0.5 unless $distances{$key_2}{$key_1};
};
};
};
my #values;
while (my ($key, $element) = each %distances) {
while (my ($element, $value) = each %{$element}) {
push #values, $value;
};
};
my $min = (sort {$a <=> $b} #values)[0];
for my $key_1 (sort keys %hash) {
for my $key_2 (sort keys %hash) {
if ($key_1 ne $key_2) {
my $deviation_vectors;
foreach (#array) {
$deviation_vectors += ($hash{$key_1}{$_} - $hash{$key_2}{$_}) ** 2;
};
if ($min == $deviation_vectors ** 0.5) {
my $new_key = "$key_1,$key_2";
foreach (#array) {
$hash{$new_key}{$_} = mean($hash{$key_1}{$_}, $hash{$key_2}{$_});
};
# Problem here
# Delete doesn't completely remove the element, it returns a hash with an empty key element
delete $hash{$key_1};
delete $hash{$key_2};
};
};
};
};
print Dumper \%hash;
sub mean {
my #data = #_;
my $sum;
foreach (#data) {
$sum += $_;
};
return ($sum / #data)
};
This is the result I got...
$VAR1 = {
'S4' => {},
'S2' => {},
'S3' => {
'A1' => 20,
'A3' => 15,
'A2' => 21
},
'S2,S4' => {
'A2' => 7,
'A1' => '6.5',
'A3' => '3.5'
},
'S1' => {
'A3' => 5,
'A1' => 10,
'A2' => 11
},
'S5' => {
'A3' => 10,
'A1' => 3,
'A2' => 2
}
};
'S2' and 'S4' need to be completely removed from the hash.
Please inspect following code which is based on provided code with some modification to remove excessive loop cycles with introduction of two indexes.
Perhaps hash %distances in this algorithm is excessive, it is kept for demonstration purpose only as it can be useful to OP.
NOTE: the code is provided for an demonstration purpose in an attempt to improve code readability
INFO: $distance ** 0.5 is better written as sqrt($distance), documentation sqrt
use strict;
use warnings;
use feature 'say';
use Data::Dumper;
my %hash = (
'S1' => {
'A1' => 10,
'A2' => 11,
'A3' => 5,
},
'S2' => {
'A1' => 6,
'A2' => 8,
'A3' => 3,
},
'S3' => {
'A1' => 20,
'A2' => 21,
'A3' => 15,
},
'S4' => {
'A1' => 7,
'A2' => 6,
'A3' => 4,
},
'S5' => {
'A1' => 3,
'A2' => 2,
'A3' => 10,
},
);
my(%distances, $deviation, #array, #keys);
#array = qw(A1 A2 A3);
#keys = sort keys %hash;
for my $index_1 (0..$#keys) {
for my $index_2 (1+$index_1..$#keys) {
my($distance, $key_1, $key_2) = (0, $keys[$index_1], $keys[$index_2]);
$distance += ( $hash{$key_1}{$_} - $hash{$key_2}{$_} ) ** 2 for #array;
$distance = $distance ** 0.5;
$distances{$key_1}{$key_2} = $distance;
$deviation->{min} = $distance unless $deviation->{min};
if( $deviation->{min} > $distance ) {
$deviation->{min} = $distance;
$deviation->{keys} = [$key_1, $key_2];
}
}
}
my($key_1, $key_2) = $deviation->{keys}->#*;
$hash{"$key_1,$key_2"}{$_} = mean($hash{$key_1}{$_}, $hash{$key_2}{$_}) for #array;
delete #hash{($key_1, $key_2)};
say Dumper(\%hash);
exit 0;
sub mean {
my #data = #_;
my $sum;
$sum += $_ for #data;
return $sum / #data;
}
Output sample
$VAR1 = {
'S2,S4' => {
'A1' => '6.5',
'A2' => '7',
'A3' => '3.5'
},
'S5' => {
'A1' => 3,
'A3' => 10,
'A2' => 2
},
'S3' => {
'A1' => 20,
'A3' => 15,
'A2' => 21
},
'S1' => {
'A2' => 11,
'A3' => 5,
'A1' => 10
}
};
So this is the subroutine I've made from #Polar Bear solution. It has three parameters, the first one will be the input data, the second one will be the array of sub-elements, then the last one will be the threshold where we want to stop the subroutine.
...
sub agglomerative_clustering {
my %data = %{$_[0]};
my #array = #{$_[1]};
my $threshold = $_[2];
my $size = keys %data;
my %clusters;
for (my $i = 1; $i < $size; $i++) {
my (%distances, $find, #keys);
#keys = sort keys %data;
for my $index_1 (0 .. $#keys) {
for my $index_2 (1 + $index_1 .. $#keys) {
my ($distance, $key_1, $key_2) = (0, $keys[$index_1], $keys[$index_2]);
$distance += ($data{$key_1}{$_} - $data{$key_2}{$_}) ** 2 foreach #array;
$distance = sqrt($distance);
$distances{$key_1}{$key_2} = $distance;
$find->{min} = $distance unless $find->{min};
$find->{key} = [$key_1, $key_2] unless $find->{key};
if ($find->{min} > $distance) {
$find->{min} = $distance;
$find->{key} = [$key_1, $key_2];
};
};
};
my ($key_1, $key_2) = $find->{key}->#*;
$data{"$key_1,$key_2"}{$_} = mean($data{$key_1}{$_}, $data{$key_2}{$_}) foreach #array;
delete #data{($key_1, $key_2)};
last if $find->{min} >= $threshold;
%clusters = %data;
};
return %clusters;
};
sub mean {
my #data = #_;
my $sum;
$sum += $_ for #data;
return $sum / #data;
};
...
I have a bunch of Hashes inside of an array. When checking my keys and values I get the expected output except for some special cases as they refer to more Arrays/Hashes.
Think of something like this:
#AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
My function iterates through the array and displays my keys and values as in the following:
sub function(...){
print "$key => $value\n";
}
husband => homer
wife => marge
people => ARRAY(0x6b0d80)
Bob => 24
Lukas => 37
vegetables => HASH(0x2570d38)
tomato => red
carrot => orange
Now I want to access my keys and values, but when getting something like ARRAY or HASH as value, I want to disregard that hash and not print it.
Is there some kind of way to only access Values with type scalar?
So far I tried this:
if ($value eq 'ARRAY') {
}
elsif ($value eq ref {}) {
}
else {
print "$key => $value\n";
}
But, it ends up printing exactly the same as above and does not disregard the other data structures.
For an arbitrary data structure like yours, you can use Data::Traverse:
use warnings;
use strict;
use Data::Traverse qw(traverse);
my #AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
traverse { print "$a => $b\n" if /HASH/ } \#AoH;
Output:
wife => marge
husband => homer
Bob => 24
Lukas => 37
carrot => orange
tomato => red
Following demo code does not utilize external modules, provided for educational purpose.
use strict;
use warnings;
use feature 'say';
my #AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
drill_in( \#AoH );
sub drill_in {
my $data = shift;
if( ref $data eq 'ARRAY' ) {
drill_in($_) for #$data;
} elsif ( ref $data eq 'HASH' ) {
while( my($k, $v ) = each %{$data} ) {
(ref $v eq 'ARRAY' or ref $v eq 'HASH') ? drill_in($v) : say "$k => $v";
}
}
}
Output
husband => homer
wife => marge
Lukas => 37
Bob => 24
tomato => red
carrot => orange
I have a list like this:
my $myV3VersionOfData = ["ZG","ZB","CXLDN",...];
and I want to convert it into a dictionary like this:
my $entries = {
'ZG' => {
'value' => 'ZG'
},
'ZB' => {
'value' => 'ZB'
},
'CXLDN' => {
'value' => 'CXLDN'
},
...
};
I tried this so far, but it doesn't work and gives me an error:
Can't use string ("ZG") as a HASH ref while "strict refs" in use at..
I understand this is occurring since I'm trying to assign the key value from the list, but how do I convert this list into a dictionary shown above?
my %genericHash;
for my $entry (#$myV3VersionOfData) {
$genericHash{ $entry->{key} } = $entry->{value};
}
How can I achieve this? I am new to Perl, and I have tried a bunch of things but it doesn't seem to work. Can anyone please help with this?
You were close. Here is one way to create a hash reference from an array reference:
use warnings;
use strict;
use Data::Dumper;
$Data::Dumper::Sortkeys=1;
my $myV3VersionOfData = ["ZG","ZB","CXLDN"];
my $entries;
for (#{ $myV3VersionOfData }) {
$entries->{$_} = {value => $_};
}
print Dumper($entries);
Output:
$VAR1 = {
'CXLDN' => {
'value' => 'CXLDN'
},
'ZB' => {
'value' => 'ZB'
},
'ZG' => {
'value' => 'ZG'
}
};
If you want to go through every element, to compute a new element, then you can use the map function. As map, can return multiple values, you return two values for each entry. And those can be converted to a hash.
my $array = ["ZG","ZB","CXLDN"];
my %hash = map { $_ => { value => $_ } } #$array;
my $hashref = { map { $_ => { value => $_ } } #$array };
Here's how I've done it for over 10 years.
#! /usr/bin/perl
use warnings;
use strict;
use Data::Dumper qw(Dumper);
my %entries;
my #myV3VersionOfData = ("ZG","ZB","CXLDN");
foreach (#myV3VersionOfData) {
$entries{$_}{'value'} = $_;
}
print Dumper \%entries;
We want
'ZG' => { 'value' => 'ZG' } # Copied literally from the Question
But the ZG part is variable, so we use
$_ => { 'value' => $_ }
Now loop!
my %genericHash = map { $_ => { 'value' => $_ } } #$myV3VersionOfData;
my $entries = { map { $_ => { 'value' => $_ } } #$myV3VersionOfData };
It's not clear which one you want.
Of course, it could also be done using a foreach loop.
my %genericHash;
for (#$myV3VersionOfData) {
$genericHash{$_} = { 'value' => $_ };
}
Please study following code snippet for compliance with your problem.
use strict;
use warnings;
use feature 'say';
use Data::Dumper;
my $myV3VersionOfData = ['ZG','ZB','CXLDN'];
my $hashref;
$hashref->{$_}{value} = $_ for #$myV3VersionOfData;
say Dumper($hashref);
Output
$VAR1 = {
'CXLDN' => {
'value' => 'CXLDN'
},
'ZB' => {
'value' => 'ZB'
},
'ZG' => {
'value' => 'ZG'
}
};
I have recursively put together an array of hashes for perl, which looks something like this :
[
{
'Default' => {
'Elect' => { 'P' => 1 }
}
},
{
'Default' => {
'Elect' => { 'A' => 1 }
}
},
{
'Default' => {
'Elect' => { 'M' => 1 }
}
},
{
'Default' => {
'Elect' => { 'I' => 1 }
}
},
{
'Default' => {
'Picker' => { 'L' => 1 }
}
},
]
My aim is to make this more condensed and look like a single hash, as compared to array of hashes. Is there anyway in which i can make this array of hashes look like a hash:
{
'Default' =>{
'Elect' =>{
'P' => 1,
'A' => 1,
'M' => 1,
'I' => 1,
},
'Picker' => {
'L' => 1
}
}
}
Well, here is a simple recursive procedure to merge two hash references:
sub merge {
my ($xs, $ys) = #_;
while (my ($k, $v) = each %$ys) {
if ('HASH' eq ref $v) {
merge($xs->{$k} //= {}, $v);
}
else {
$xs->{$k} = $v;
}
}
}
Then:
my $data = ...; # your input data structure
my $acc = {};
merge($acc, $_) for #$data;
which produces the result you desire in $acc.
There is also the Hash::Merge module, with that:
use Hash::Merge 'merge';
my $acc = {};
$acc = merge $acc, $_ for #$data;
How come I never see examples like these where you declare the hash, and then put then inside another hash?
my %hash1={};
$hash1{'key1'}='1-111';
$hash1{'key2'}='1-222';
$hash1{'key3'}='1-333';
my %hash2={};
$hash2{'key1'}='2-111';
$hash2{'key2'}='2-222';
$hash2{'key3'}='2-333';
my %main_hash1={%hash1, %hash2};
I've only seen examples like these where they put the hashes inside the hash, instead of a variable for the hash:
my %main_hash2=( 'hash1' => {
'key1' => '1-111',
'key2' => '1-222',
'key3' => '1-333'
},
'hash2' => {
'key1' => '2-111',
'key2' => '2-222',
'key3' => '2-333'
}
);
(similar with arrays also)
You can't store a hash in a hash, you can store a hashref in a hash though:
my %main_hash1 = ( hash1 => \%hash1, hash2 => \%hash2 );
The same goes with arrays:
my #main_array1 = ( \#array1, \#array2 );
And with mixes:
my #array_of_hrefs = ( \%hash1, \%hash2 );
my %hash_of_arefs = ( arr1 => \#arr1, arr2 => \#arr2 );
This is done all the time; I don't know why you haven't seen it and I doubt anyone on SO would know that answer.
Also, this does not initialize a hash:
my %hash1={}; ## should be my %hash1; or my %hash1 = ();
See the following example :
The Perl code :
my %hash1;
$hash1{'key1'}='1-111';
$hash1{'key2'}='1-222';
$hash1{'key3'}='1-333';
my %hash2;
$hash2{'key1'}='2-111';
$hash2{'key2'}='2-222';
$hash2{'key3'}='2-333';
my %main_hash = ( hash1 => \%hash1, hash2 => \%hash2 );
use Data::Dumper;
print Dumper %main_hash;
The output :
$VAR1 = 'hash2';
$VAR2 = {
'key2' => '2-222',
'key1' => '2-111',
'key3' => '2-333'
};
$VAR3 = 'hash1';
$VAR4 = {
'key2' => '1-222',
'key1' => '1-111',
'key3' => '1-333'
};
That use references, see http://perldoc.perl.org/perlreftut.html & if needed : http://perldoc.perl.org/perlref.html
There's a ref trick that makes things a bit magic :
my $hash_ref = {}; # reference to a blank hash
my %h = ( foo => "1", bar => "2" );
push #{$hash_ref->{'1st_level'}->{'level-2'}->{'level_3'}->{'arr'}}, 123;
push #{$hash_ref->{'1st_level'}->{'level-2'}->{'level_3'}->{'arr'}}, 456;
$hash_ref->{'1st_level'}->{'level-2'}->{'level_3'}->{'arr'}->[2] = \%h;
use Data::Dumper;
print Dumper $hash_ref;
The output :
$VAR1 = {
'1st_level' => {
'level-2' => {
'level_3' => {
'arr' => [
123,
456,
{
'bar' => '2',
'foo' => '1'
}
]
}
}
}
};