Converting and printing array of hashes - Perl - arrays

I really dont know how to do it so I ended up here.
I want to convert this input:
my #sack_files_1 = (
'mgenv/1_2_3/parent.dx_environment',
'mgenv/1_2_3/doc/types.dat',
'u5env/1_2_3/parent.dx_environment',
'u5env/1_2_3/doc/types.dat',
);
To this:
my $sack_tree_1 = {
'mgenv' => {
'1_2_3' => [ 'parent.dx_environment', 'doc/types.dat' ],
},
'u5env' => {
'1_2_3' => [ 'parent.dx_environment', 'doc/types.dat' ],
}
};

Something like this should do the trick:
use strict;
use warnings;
use Data::Dumper;
my #sack_files_1 = (
'mgenv/1_2_3/parent.dx_environment',
'mgenv/1_2_3/doc/types.dat',
'u5env/1_2_3/parent.dx_environment',
'u5env/1_2_3/doc/types.dat',
);
my %sack_tree_1;
foreach (#sack_files_1) {
my ( $env, $number, #everything_else ) = split('/');
push( #{ $sack_tree_1{$env}{$number} }, join( "/", #everything_else ) );
}
print Dumper \%sack_tree_1

This will do as you ask. It uses File::Spec::Functions to split each path into its components.
The first two elements of the hash are used directly as hash keys, relying on autovivication to create the necessary hash elements.
A simple push to an implied array reference also autovivifies the lowest-level hash element.
I have used Data::Dump to display the resulting hash. It is not part of the core Perl installation and you may need to install it, but it is much superior to Data::Dumper.
use strict;
use warnings;
use File::Spec::Functions qw/ splitdir catfile /;
my #sack_files_1 = (
'mgenv/1_2_3/parent.dx_environment',
'mgenv/1_2_3/doc/types.dat',
'u5env/1_2_3/parent.dx_environment',
'u5env/1_2_3/doc/types.dat',
);
my %paths;
for my $path (#sack_files_1) {
my ($p1, $p2, #path) = splitdir $path;
push #{ $paths{$p1}{$p2} }, catfile #path;
}
use Data::Dump;
dd \%paths;
output
{
mgenv => { "1_2_3" => ["parent.dx_environment", "doc\\types.dat"] },
u5env => { "1_2_3" => ["parent.dx_environment", "doc\\types.dat"] },
}

my $sack_tree_1 = {};
foreach my $data (#sack_files_1) {
my #path = split '/', $data;
my ($file,$last_part) = pop #path, pop #path; # get the file name and last part of the path
my $hash_part = $sack_tree_1;
foreach my $path (#path) { # For every element in the remaining part of the path
$hash_part->{$path} //= {}; # Make sure we have a hash ref to play with
$hash_part = $hash_part->{$path} # Move down the hash past the current path element
}
$hash_part->{$last_part} = $file; # Add the file name to the last part of the path
}
This handles all path lengths of 2 or more

Related

Trying to figure out how to push specific links contained in each link of separate list of links into an array

GENERAL IDEA
Here is a snippet of what I'm working with:
my $url_temp;
my $page_temp;
my $p_temp;
my #temp_stuff;
my #collector;
foreach (#blarg_links) {
$url_temp = $_;
$page_temp = get( $url_temp ) or die $!;
$p_temp = HTML::TreeBuilder->new_from_content( $page_temp );
#temp_stuff = $p_temp->look_down(
_tag => 'foo',
class => 'bar'
);
foreach (#temp_stuff) {
push(#collector, "http://www.foobar.sx" . $1) if $_->as_HTML =~ m/href="(.*?)"/;
};
};
Hopefully it is clear that what I'm hopelessly trying to do is push the link endings found in each of a list of links into an array called #temp_stuff. So the first link in #blarg_links, when visited, has greater than or equal to 1 foo tag with an associated bar class that when acted on by as_HTML will match something I want in the href equality to then pump into an array of links which have the data I'm really after... Does that make sense?
ACTUAL DATA
my $url2 = 'http://www.chemistry.ucla.edu/calendar-node-field-date/year';
my $page2 = get( $url2 ) or die $!;
my $p2 = HTML::TreeBuilder->new_from_content( $page2 );
my #stuff2 = $p2->look_down(
_tag => 'div',
class => 'year mini-day-on'
);
my #chem_links;
foreach (#stuff2) {
push(#chem_links, $1) if $_->as_HTML =~ m/(http:\/\/www\.chemistry\.ucla\.edu\/calendar-node-field-date\/day\/[0-9]{4}-[0-9]{2}-[0-9]{2})/;
};
my $url_temp;
my $page_temp;
my $p_temp;
my #temp_stuff;
my #collector;
foreach (#chem_links) {
$url_temp = $_;
$page_temp = get( $url_temp ) or die $!;
$p_temp = HTML::TreeBuilder->new_from_content( $page_temp );
#temp_stuff = $p_temp->look_down(
_tag => 'span',
class => 'field-content'
);
};
foreach (#temp_stuff) {
push(#collector, "http://www.chemistry.ucla.edu" . $1) if $_->as_HTML =~ m/href="(.*?)"/;
};
n.b. - I want to use HTML::TreeBuilder. I'm aware of alternatives.
This is a rough attempt at what I think you want.
It fetches all the links on the first page and visits each of them in turn, printing the link in each <span class="field-content"> element.
use strict;
use warnings;
use 5.010;
use HTML::TreeBuilder;
STDOUT->autoflush;
my $url = 'http://www.chemistry.ucla.edu/calendar-node-field-date/year';
my $tree = HTML::TreeBuilder->new_from_url($url);
my #chem_links;
for my $div ( $tree->look_down( _tag => 'div', class => qr{\bmini-day-on\b} ) ) {
my ($anchor)= $div->look_down(_tag => 'a', href => qr{http://www\.chemistry\.ucla\.edu});
push #chem_links, $anchor->attr('href');
};
my #collector;
for my $url (#chem_links) {
say $url;
my $tree = HTML::TreeBuilder->new_from_url($url);
my #seminars;
for my $span ( $tree->look_down( _tag => 'span', class => 'field-content' ) ) {
my ($anchor) = $span->look_down(_tag => 'a', href => qr{/});
push #seminars, 'http://www.chemistry.ucla.edu'.$anchor->attr('href');
}
say " $_" for #seminars;
say '';
push #collector, #seminars;
};
For a more modern framework for parsing webpages, I would suggest you take a look at Mojo::UserAgent and Mojo::DOM. Instead of having to manually march through each section of your html tree, you can use the power of css selectors to zero in on the specific data that you want. There's a nice 8 minute introductory video on the framework at Mojocast Episode 5.
# Parses the UCLA Chemistry Calendar and displays all seminar links
use strict;
use warnings;
use Mojo::UserAgent;
use URI;
my $url = 'http://www.chemistry.ucla.edu/calendar-node-field-date/year';
my $ua = Mojo::UserAgent->new;
my $dom = $ua->get($url)->res->dom;
for my $dayhref ($dom->find('div.mini-day-on > a[href*="/day/"]')->attr('href')->each) {
my $dayurl = URI->new($dayhref)->abs($url);
print $dayurl, "\n";
my $daydom = $ua->get($dayurl->as_string)->res->dom;
for my $seminarhref ($daydom->find('span.field-content > a[href]')->attr('href')->each) {
my $seminarurl = URI->new($seminarhref)->abs($dayurl);
print " $seminarurl\n";
}
print "\n";
}
Output is identical to that of Borodin's solution using HTML::TreeBuilder:
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-06
http://www.chemistry.ucla.edu/seminars/nano-rheology-enzymes
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-09
http://www.chemistry.ucla.edu/seminars/imaging-approach-biology-disease-through-chemistry
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-10
http://www.chemistry.ucla.edu/seminars/arginine-methylation-%E2%80%93-substrates-binders-function
http://www.chemistry.ucla.edu/seminars/special-inorganic-chemistry-seminar
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-13
http://www.chemistry.ucla.edu/events/robert-l-scott-lecture-0
...

Perl Issue putting data into array reference (with perl URI)

I have the following code:
#!usr/bin/perl
use strict;
use warnings;
use URI qw( );
my #insert_words = qw( HELLO );
my $newURLs;
while ( my $baseURL = <DATA>) {
chomp $baseURL;
my $url = URI->new($baseURL);
my $path = $url->path();
for (#insert_words) {
# Use package vars to communicate with /(?{})/ blocks.
local our $insert_word = $_;
local our #paths;
$path =~ m{
^(.*[/])([^/]*)((?:[/].*)?)\z
(?{
push #paths, "$1$insert_word$2$3";
if (length($2)) {
push #paths, "$1$insert_word$3";
push #paths, "$1$2$insert_word$3";
}
})
(?!)
}x;
for (#paths) {
$url->path($_);
print "$url\n"; #THIS PRINTS THE CORRECT URLS I WANT IN THE ARRAY REF
push( #{ $newURLs->{$baseURL} }, $url ); #TO PUT EACH URL INTO AN ARRAYREF BUT ITS NOT WORKING
}
}
}
print "\n"; #for testing only
print Dumper($newURLs); #for testing only
print "\n"; #for testing only
__DATA__
http://www.stackoverflow.com/dog/cat/rabbit/
http://www.superuser.co.uk/dog/cat/rabbit/hamster/
http://10.15.16.17/dog/cat/rabbit/
The problem I am having:
When I do print "$url\n"; as shown in the code above, it prints the correct urls that I want to put in the array ref, but I when I do push( #{ $newURLs->{$baseURL} }, $url ); I get the following in the data structure:
$VAR1 = {
'http://www.stackoverflow.com/dog/cat/rabbit/' => [
bless( do{\(my $o = 'http://www.stackoverflow.com/dogHELLO/cat/rabbit/')}, 'URI::http' ),
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0]
],
When what I should be getting is the following
$VAR1 = {
'http://www.stackoverflow.com/dog/cat/rabbit/' => [
http://www.stackoverflow.com/dog/cat/rabbit/HELLO
http://www.stackoverflow.com/dog/cat/HELLOrabbit/
http://www.stackoverflow.com/dog/cat/HELLO/
http://www.stackoverflow.com/dog/cat/rabbitHELLO/
http://www.stackoverflow.com/dog/HELLOcat/rabbit/
http://www.stackoverflow.com/dog/HELLO/rabbit/
http://www.stackoverflow.com/dog/catHELLO/rabbit/
http://www.stackoverflow.com/HELLOdog/cat/rabbit/
http://www.stackoverflow.com/HELLO/cat/rabbit/
http://www.stackoverflow.com/dogHELLO/cat/rabbit/
],
Is it something obvious that I am overlooking or doing wrong? Your help with this will be much appreciated, many thanks
$url is an object. To get its stringification, you can let it interpolate:
push #{ $newURLs->{$baseURL} }, "$url";
or call the as_string method:
push #{ $newURLs->{$baseURL} }, $url->as_string;
try
push( #{ $newURLs->{$baseURL} }, "".$url );

Perl: Comparing 2 hash of arrays with another array

I have written the code below in Perl but it's not giving the desirable output. I am dealing with the comparison between one array and two hash of arrays.
Given sample input files:
1) file1.txt
A6416 A2318
A84665 A88
2) hashone.pl
%hash1=(
A6416=>['E65559', 'C11162.1', 'c002gnj.3',],
A88=>['E77522', 'M001103', 'C1613.1', 'c001hyf.2',],
A84665=>['E138347', 'M032578', 'C7275.1', 'c009xpt.3',],
A2318=>['E128591', 'C43644.1', 'C47705.1', 'c003vnz.4',],
);
3) hashtwo.pl
%hash2=(
15580=>['C7275.1', 'E138347', 'M032578', 'c001jnm.3', 'c009xpt.2'],
3178=>['C1613.1', 'E77522','M001103', 'c001hyf.2', 'c001hyg.2'],
24406=>['C11162.1', 'E65559', 'M003010', 'c002gnj.2'],
12352=>['C43644.1', 'C47705.1', 'E128591','M001458', 'c003vnz.3'],
);
My aim is to achieve the task described:
From file1.txt, I have to locate the corresponding ID in %hash1. For instance,A6416 (file1.txt) is the key in %hash1. Next, I have to find the values of A6416 ['E65559', 'C11162.1', 'c002gnj.3',] in %hash2. If majority (more than 50%) of the values are found in %hash2, I replace A6416 with corresponding key from %hash2.
Example:
A6416 A2318
A84665 A88
Output:
24406 12352
15580 3178
Please note that the keys for %hash1 and %hash2 are different (they don't overlap). But the values are the same (they overlap).
#!/usr/bin/perl -w
use strict;
use warnings;
open FH, "file1.txt" || die "Error\n";
my %hash1 = do 'hashone.pl';
my %hash2 = do 'hashtwo.pl';
chomp(my #array=<FH>);
foreach my $amp (#array)
{
if ($amp =~ /(\d+)(\s?)/)
{
if (exists ($hash1{$1}))
{
for my $key (keys %hash2)
{
for my $i ( 0 .. $#{ $hash2{$key} } )
{
if ((#{$hash1{$1}}) eq ($hash2{$key}[$i]))
{
print "$key";
}
}
}
}
}
}
close FH;
1;
Any guidance on this problem is highly appreciated. Thank you!
I think you should invert %hash2 into this structure:
$hash2{'C7275.1'} = $hash2{'E138347'} = $hash2{'M032578'}
= $hash2{'c001jnm.3'} = $hash2{'c009xpt.2'} = 15580;
$hash2{'C1613.1'} = $hash2{'E77522'} = $hash2{'M001103'}
= $hash2{'c001hyf.2'} = $hash2{'c001hyg.2'} = 3178;
$hash2{'C11162.1'} = $hash2{'E65559'}
= $hash2{'M003010'} = $hash2{'c002gnj.2'} = 24406;
$hash2{'C43644.1'} = $hash2{'C47705.1'} = $hash2{'E128591'}
= $hash2{'M001458'} = $hash2{'c003vnz.3'} = 3178;
So that you can perform these look-ups much more effectively, rather than having to iterate over every element of every element of %hash2.
Building on the responses from ruakh and zock here is the code you need to build the look-up table for hash2
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my %hash2=(
15580=>['C7275.1', 'E138347', 'M032578', 'c001jnm.3', 'c009xpt.2'],
3178=>['C1613.1', 'E77522','M001103', 'c001hyf.2', 'c001hyg.2'],
24406=>['C11162.1', 'E65559', 'M003010', 'c002gnj.2'],
12352=>['C43644.1', 'C47705.1', 'E128591','M001458', 'c003vnz.3'],
);
# Build LUT for hash2
my %hash2_lut;
foreach my $key (keys %hash2)
{
foreach my $val (#{$hash2{$key}})
{
$hash2_lut{$val} = $key
}
}
print Dumper(\%hash2_lut);
Please select ruakh's post as the answer, just trying to clarify the code for you. Use Data::Dumper...it is your friend.
Here is the output:
$VAR1 = {
'C47705.1' => '12352',
'M032578' => '15580',
'E138347' => '15580',
'E77522' => '3178',
'C7275.1' => '15580',
'c001jnm.3' => '15580',
'E65559' => '24406',
'C1613.1' => '3178',
'M001458' => '12352',
'c002gnj.2' => '24406',
'c009xpt.2' => '15580',
'c001hyf.2' => '3178',
'C43644.1' => '12352',
'E128591' => '12352',
'c001hyg.2' => '3178',
'M003010' => '24406',
'c003vnz.3' => '12352',
'C11162.1' => '24406',
'M001103' => '3178'
};

Perl CGI scrolling list wont print array of Hash

I am trying to write a Perl CGI script that prints /etc/passwd users but when I open my CGI my scrolling list just prints out multiple lines of this:
"HASH(0x27836d8)"
Here is my code where I grab etc passwd and print it to the scrolling list. Can anyone help me out with printing this correctly to the scrolling list?
setpwent();
while (#list = getpwent())
{
($LOGIN,$PASSWORD,$UID,$GID,$QUOTA,$COMMENT,$GECOS,$HOMEDIR,$SHELL) = #list[0,1,2,3,4,5,6,7,8];
if( $UID >= 1001 )
{
push #users, { login => "$LOGIN"};
}
}
endpwent();
print scrolling_list(-name=>'user_list',
-values=>[#users],
-size=>15);
You gave the list a bunch of hash references, so that's what got displayed. Change
push #users, { login => "$LOGIN"};
to
push #users, $LOGIN;
use strict;
use warnings;
...
setpwent();
while (my #list = getpwent()) {
my ($user, $uid) = #list[2, 3];
push #users, $user
if $ui >= 1001;
}
endpwent();
print scrolling_list(
-name => 'user_list',
-values => \#users,
-size => 15,
);
As documented in the CGI perldoc, the thing you pass with -values should be a list ref, but you've created a list ref to a list of hashes (due to your use of curly braces above). Here's a fixed version:
setpwent();
while (#list = getpwent())
{
($LOGIN,$PASSWORD,$UID,$GID,$QUOTA,$COMMENT,$GECOS,$HOMEDIR,$SHELL) = #list[0,1,2,3,4,5,6,7,8];
if( $UID >= 1001 )
{
push #users, $LOGIN;
}
}
endpwent();
print scrolling_list(-name=>'user_list',
-values=>\#users,
-size=>15);

How do I pass all elements of "array of hashes" into function as an array

How do I pass a element of "array of hashes" into function as an array?
say for instance I wanted to pass all $link->{text} as an array into the sort() function.
#!/usr/bin/perl
use strict; use warnings;
my $field = <<EOS;
Baboon
Antelope
dog
cat
EOS
#/ this comment is to unconfuse the SO syntax highlighter.
my #array_of_links;
while ($field =~ m{<a.*?href="(.*?)".*?>(.*?)</a>}g) {
push #array_of_links, { url => $1, text => $2 };
}
for my $link (#array_of_links) {
print qq("$link->{text}" goes to -> "$link->{url}"\n);
}
If you want to sort your links by text,
my #sorted_links = sort { $a->{text} cmp $b->{text} } #array_of_links;
If you actually just want to get and sort the text,
my #text = sort map $_->{text}, #array_of_links;
Better to err on the side of caution and use an HTML parser to parse HTML:
use strict; use warnings;
use HTML::TokeParser::Simple;
my $field = <<EOS;
Baboon
Antelope
dog
cat
EOS
my $parser = HTML::TokeParser::Simple->new(string => $field);
my #urls;
while ( my $tag = $parser->get_tag ) {
next unless $tag->is_start_tag('a');
next unless defined(my $url = $tag->get_attr('href'));
my $text = $parser->get_text('/a');
push #urls, { url => $url, text => $text };
}
#urls = sort {
$a->{text} cmp $b->{text} ||
$a->{url} cmp $b->{url}
} #urls;
use YAML;
print Dump \#urls;

Resources