I have the following code:
#!usr/bin/perl
use strict;
use warnings;
use URI qw( );
my #insert_words = qw( HELLO );
my $newURLs;
while ( my $baseURL = <DATA>) {
chomp $baseURL;
my $url = URI->new($baseURL);
my $path = $url->path();
for (#insert_words) {
# Use package vars to communicate with /(?{})/ blocks.
local our $insert_word = $_;
local our #paths;
$path =~ m{
^(.*[/])([^/]*)((?:[/].*)?)\z
(?{
push #paths, "$1$insert_word$2$3";
if (length($2)) {
push #paths, "$1$insert_word$3";
push #paths, "$1$2$insert_word$3";
}
})
(?!)
}x;
for (#paths) {
$url->path($_);
print "$url\n"; #THIS PRINTS THE CORRECT URLS I WANT IN THE ARRAY REF
push( #{ $newURLs->{$baseURL} }, $url ); #TO PUT EACH URL INTO AN ARRAYREF BUT ITS NOT WORKING
}
}
}
print "\n"; #for testing only
print Dumper($newURLs); #for testing only
print "\n"; #for testing only
__DATA__
http://www.stackoverflow.com/dog/cat/rabbit/
http://www.superuser.co.uk/dog/cat/rabbit/hamster/
http://10.15.16.17/dog/cat/rabbit/
The problem I am having:
When I do print "$url\n"; as shown in the code above, it prints the correct urls that I want to put in the array ref, but I when I do push( #{ $newURLs->{$baseURL} }, $url ); I get the following in the data structure:
$VAR1 = {
'http://www.stackoverflow.com/dog/cat/rabbit/' => [
bless( do{\(my $o = 'http://www.stackoverflow.com/dogHELLO/cat/rabbit/')}, 'URI::http' ),
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0],
$VAR1->{'http://www.stackoverflow.com/dog/cat/rabbit/'}[0]
],
When what I should be getting is the following
$VAR1 = {
'http://www.stackoverflow.com/dog/cat/rabbit/' => [
http://www.stackoverflow.com/dog/cat/rabbit/HELLO
http://www.stackoverflow.com/dog/cat/HELLOrabbit/
http://www.stackoverflow.com/dog/cat/HELLO/
http://www.stackoverflow.com/dog/cat/rabbitHELLO/
http://www.stackoverflow.com/dog/HELLOcat/rabbit/
http://www.stackoverflow.com/dog/HELLO/rabbit/
http://www.stackoverflow.com/dog/catHELLO/rabbit/
http://www.stackoverflow.com/HELLOdog/cat/rabbit/
http://www.stackoverflow.com/HELLO/cat/rabbit/
http://www.stackoverflow.com/dogHELLO/cat/rabbit/
],
Is it something obvious that I am overlooking or doing wrong? Your help with this will be much appreciated, many thanks
$url is an object. To get its stringification, you can let it interpolate:
push #{ $newURLs->{$baseURL} }, "$url";
or call the as_string method:
push #{ $newURLs->{$baseURL} }, $url->as_string;
try
push( #{ $newURLs->{$baseURL} }, "".$url );
Related
I'm trying to extract data from our JSON data based on given output fields, but I'm not getting a good result.
e.g.
Given fields that I want:
Array
(
[0] => id
[1] => name
[2] => email
[3] => optin_email
)
Those fields exist in my datastring, I want to export those to a CSV.
I can do this, hardcoded
foreach ($jsonString as $value) {
$row = [
$value->id,
$value->name,
$value->email,
$value->phone
];
print_r($row);
}
The above will give me the list/file I need. BUT, I want to make that dynamic based on the data in the array, so, fo rexample, when this is the Array:
Array
(
[0] => id
[1] => name
)
This should be my output:
foreach ($jsonString as $value) {
$row = [
$value->id,
$value->name
];
print_r($row);
}
So I need to dynamicly create the
$value->{var}
I have been trying forever, but I am not seeing it straight anymore.
Tried this:
$rowFields = '';
foreach ($export_datafields AS $v) {
$rowFields .= '$value->' . $v . ',';
}
$trimmed_row_fields = rtrim($rowFields, ',');
foreach ($jsonString as $value) {
$row = $trimmed_row_fields;
print_r($row);
}
And several variations of that:
foreach ($jsonString as $value) {
$row = [$trimmed_row_fields];
print_r($row);
}
Question is: how can I get
$value->VAR
as a valid array key when I only know the VAR name and need the prefixed $value-> object.
I ended up using the following code which works for me. If anybody still has the answer to my original question, please shoot. Always good to know it all.
header("Content-type: application/csv");
header("Content-Disposition: attachment; filename=$csvFileName");
header("Pragma: no-cache");
header("Expires: 0");
$new_row = implode(",", $export_datafields) . "\n";
foreach ($jsonString as $value) {
foreach ($export_datafields AS $v) {
$new_row .= $value->$v . ',';
}
$new_row = substr($new_row, 0, -1);
$new_row .= "\n";
}
echo $new_row;
my csv input file
Chapter,Content
A,1
B,3
C,1
C,2
C,3
D,5
My current perl script
open(INFILE,$input)||die "can't open the file";
#line = (split/,/,$_);
#line_last = (split/\n/,$line[1]);
if ($_ =~ /A/){
push #con1, $line[1];
}
elsif ($_ =~ /B/){
push #con2, $line[1];
}
elsif ($_ =~ /C/){
push #con3, $line[1];
}
elsif ($_ =~ /D/){
push #con4, $line[1];
}
close INFILE;
chomp #con1, #con2, #con3, #con4;
print "content =", (join ", ", #con1),"\n";
print "content =", (join ", ", #con2),"\n";
print "content =", (join ", ", #con3),"\n";
print "content =", (join ", ", #con4),"\n";
My current code can work but I wanna make my code shorter & more efficient.
How do I make #con more automate? something like using For loop & use $i to assign to #con to make overall code shorter.
Why it's stupid to `use a variable as a variable name'.
You should use an AoA instead.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my #foos = qw( A B C D );
my $num_eles = 0;
my %index_by_foo = map { $_ => $num_eles++ } #foos;
my #cons_by_foo = map { [] } 1..$num_eles;
while (my $row = $csv->getline($fh)) {
if ( my $i = $index_by_foo{ $row->[0] } ) {
push #{ $cons_by_foo[$i] }, $row->[1];
}
}
for my $i (1..$num_eles-1) {
print("content =", join(", ", #{ $cons_by_foo[$i] }), "\n");
}
That said, a HoA seems a far better fit.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my %cons_by_foo;
while (my $row = $csv->getline($fh)) {
push #{ $cons_by_foo{ $row->[0] } }, $row->[1];
}
for my $foo (sort keys %cons_by_foo) {
print("content $foo =", join(", ", #{ $cons_by_foo{$foo} }), "\n");
}
This is what a hash is great for IMO (hashtable or dictionary in other languages).
note: I haven't tested this code, it's from memory.
my %con;
while (<INFILE>)
{
my #line_last = (split/\n/,$line[1]);
$con{$line[0]} = $con{$line[0]} + ", " + $line[1];
}
foreach my $str (keys sort %con)
{
print "content $con{$str}\n";
}
edit: much better and tested code here.
open(INFILE,"input.txt")||die "can't open the file";
my %con;
while (<INFILE>)
{
next if /^Chapter/;
chomp;
my #line = (split ',' , $_);
push #{$con{$line[0]}}, $line[1];
}
foreach my $str (sort keys %con)
{
my $tmp = join ', ', #{$con{$str}};
print "content $str = $tmp\n";
}
output is:
content A = 1
content B = 3
content C = 1,2,3
content D = 5
I know that isn't your output, but it seems more useful than what you had.
my %cc; #chapter contents
open my $FH, $filename or die "Cannot open $filename";
push #{ $cc{ $$_[0] } }, $$_[1] for map {chomp;[split/,/]} <$FH>;
close $FH;
print "Chapter = $_ Content = " . join(", ", #{ $cc{$_} }) . "\n" for sort keys %cc;
Output:
Chapter = A Content = 1
Chapter = B Content = 3
Chapter = C Content = 1, 2, 3
Chapter = D Content = 5
I really dont know how to do it so I ended up here.
I want to convert this input:
my #sack_files_1 = (
'mgenv/1_2_3/parent.dx_environment',
'mgenv/1_2_3/doc/types.dat',
'u5env/1_2_3/parent.dx_environment',
'u5env/1_2_3/doc/types.dat',
);
To this:
my $sack_tree_1 = {
'mgenv' => {
'1_2_3' => [ 'parent.dx_environment', 'doc/types.dat' ],
},
'u5env' => {
'1_2_3' => [ 'parent.dx_environment', 'doc/types.dat' ],
}
};
Something like this should do the trick:
use strict;
use warnings;
use Data::Dumper;
my #sack_files_1 = (
'mgenv/1_2_3/parent.dx_environment',
'mgenv/1_2_3/doc/types.dat',
'u5env/1_2_3/parent.dx_environment',
'u5env/1_2_3/doc/types.dat',
);
my %sack_tree_1;
foreach (#sack_files_1) {
my ( $env, $number, #everything_else ) = split('/');
push( #{ $sack_tree_1{$env}{$number} }, join( "/", #everything_else ) );
}
print Dumper \%sack_tree_1
This will do as you ask. It uses File::Spec::Functions to split each path into its components.
The first two elements of the hash are used directly as hash keys, relying on autovivication to create the necessary hash elements.
A simple push to an implied array reference also autovivifies the lowest-level hash element.
I have used Data::Dump to display the resulting hash. It is not part of the core Perl installation and you may need to install it, but it is much superior to Data::Dumper.
use strict;
use warnings;
use File::Spec::Functions qw/ splitdir catfile /;
my #sack_files_1 = (
'mgenv/1_2_3/parent.dx_environment',
'mgenv/1_2_3/doc/types.dat',
'u5env/1_2_3/parent.dx_environment',
'u5env/1_2_3/doc/types.dat',
);
my %paths;
for my $path (#sack_files_1) {
my ($p1, $p2, #path) = splitdir $path;
push #{ $paths{$p1}{$p2} }, catfile #path;
}
use Data::Dump;
dd \%paths;
output
{
mgenv => { "1_2_3" => ["parent.dx_environment", "doc\\types.dat"] },
u5env => { "1_2_3" => ["parent.dx_environment", "doc\\types.dat"] },
}
my $sack_tree_1 = {};
foreach my $data (#sack_files_1) {
my #path = split '/', $data;
my ($file,$last_part) = pop #path, pop #path; # get the file name and last part of the path
my $hash_part = $sack_tree_1;
foreach my $path (#path) { # For every element in the remaining part of the path
$hash_part->{$path} //= {}; # Make sure we have a hash ref to play with
$hash_part = $hash_part->{$path} # Move down the hash past the current path element
}
$hash_part->{$last_part} = $file; # Add the file name to the last part of the path
}
This handles all path lengths of 2 or more
GENERAL IDEA
Here is a snippet of what I'm working with:
my $url_temp;
my $page_temp;
my $p_temp;
my #temp_stuff;
my #collector;
foreach (#blarg_links) {
$url_temp = $_;
$page_temp = get( $url_temp ) or die $!;
$p_temp = HTML::TreeBuilder->new_from_content( $page_temp );
#temp_stuff = $p_temp->look_down(
_tag => 'foo',
class => 'bar'
);
foreach (#temp_stuff) {
push(#collector, "http://www.foobar.sx" . $1) if $_->as_HTML =~ m/href="(.*?)"/;
};
};
Hopefully it is clear that what I'm hopelessly trying to do is push the link endings found in each of a list of links into an array called #temp_stuff. So the first link in #blarg_links, when visited, has greater than or equal to 1 foo tag with an associated bar class that when acted on by as_HTML will match something I want in the href equality to then pump into an array of links which have the data I'm really after... Does that make sense?
ACTUAL DATA
my $url2 = 'http://www.chemistry.ucla.edu/calendar-node-field-date/year';
my $page2 = get( $url2 ) or die $!;
my $p2 = HTML::TreeBuilder->new_from_content( $page2 );
my #stuff2 = $p2->look_down(
_tag => 'div',
class => 'year mini-day-on'
);
my #chem_links;
foreach (#stuff2) {
push(#chem_links, $1) if $_->as_HTML =~ m/(http:\/\/www\.chemistry\.ucla\.edu\/calendar-node-field-date\/day\/[0-9]{4}-[0-9]{2}-[0-9]{2})/;
};
my $url_temp;
my $page_temp;
my $p_temp;
my #temp_stuff;
my #collector;
foreach (#chem_links) {
$url_temp = $_;
$page_temp = get( $url_temp ) or die $!;
$p_temp = HTML::TreeBuilder->new_from_content( $page_temp );
#temp_stuff = $p_temp->look_down(
_tag => 'span',
class => 'field-content'
);
};
foreach (#temp_stuff) {
push(#collector, "http://www.chemistry.ucla.edu" . $1) if $_->as_HTML =~ m/href="(.*?)"/;
};
n.b. - I want to use HTML::TreeBuilder. I'm aware of alternatives.
This is a rough attempt at what I think you want.
It fetches all the links on the first page and visits each of them in turn, printing the link in each <span class="field-content"> element.
use strict;
use warnings;
use 5.010;
use HTML::TreeBuilder;
STDOUT->autoflush;
my $url = 'http://www.chemistry.ucla.edu/calendar-node-field-date/year';
my $tree = HTML::TreeBuilder->new_from_url($url);
my #chem_links;
for my $div ( $tree->look_down( _tag => 'div', class => qr{\bmini-day-on\b} ) ) {
my ($anchor)= $div->look_down(_tag => 'a', href => qr{http://www\.chemistry\.ucla\.edu});
push #chem_links, $anchor->attr('href');
};
my #collector;
for my $url (#chem_links) {
say $url;
my $tree = HTML::TreeBuilder->new_from_url($url);
my #seminars;
for my $span ( $tree->look_down( _tag => 'span', class => 'field-content' ) ) {
my ($anchor) = $span->look_down(_tag => 'a', href => qr{/});
push #seminars, 'http://www.chemistry.ucla.edu'.$anchor->attr('href');
}
say " $_" for #seminars;
say '';
push #collector, #seminars;
};
For a more modern framework for parsing webpages, I would suggest you take a look at Mojo::UserAgent and Mojo::DOM. Instead of having to manually march through each section of your html tree, you can use the power of css selectors to zero in on the specific data that you want. There's a nice 8 minute introductory video on the framework at Mojocast Episode 5.
# Parses the UCLA Chemistry Calendar and displays all seminar links
use strict;
use warnings;
use Mojo::UserAgent;
use URI;
my $url = 'http://www.chemistry.ucla.edu/calendar-node-field-date/year';
my $ua = Mojo::UserAgent->new;
my $dom = $ua->get($url)->res->dom;
for my $dayhref ($dom->find('div.mini-day-on > a[href*="/day/"]')->attr('href')->each) {
my $dayurl = URI->new($dayhref)->abs($url);
print $dayurl, "\n";
my $daydom = $ua->get($dayurl->as_string)->res->dom;
for my $seminarhref ($daydom->find('span.field-content > a[href]')->attr('href')->each) {
my $seminarurl = URI->new($seminarhref)->abs($dayurl);
print " $seminarurl\n";
}
print "\n";
}
Output is identical to that of Borodin's solution using HTML::TreeBuilder:
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-06
http://www.chemistry.ucla.edu/seminars/nano-rheology-enzymes
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-09
http://www.chemistry.ucla.edu/seminars/imaging-approach-biology-disease-through-chemistry
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-10
http://www.chemistry.ucla.edu/seminars/arginine-methylation-%E2%80%93-substrates-binders-function
http://www.chemistry.ucla.edu/seminars/special-inorganic-chemistry-seminar
http://www.chemistry.ucla.edu/calendar-node-field-date/day/2014-01-13
http://www.chemistry.ucla.edu/events/robert-l-scott-lecture-0
...
How do I pass a element of "array of hashes" into function as an array?
say for instance I wanted to pass all $link->{text} as an array into the sort() function.
#!/usr/bin/perl
use strict; use warnings;
my $field = <<EOS;
Baboon
Antelope
dog
cat
EOS
#/ this comment is to unconfuse the SO syntax highlighter.
my #array_of_links;
while ($field =~ m{<a.*?href="(.*?)".*?>(.*?)</a>}g) {
push #array_of_links, { url => $1, text => $2 };
}
for my $link (#array_of_links) {
print qq("$link->{text}" goes to -> "$link->{url}"\n);
}
If you want to sort your links by text,
my #sorted_links = sort { $a->{text} cmp $b->{text} } #array_of_links;
If you actually just want to get and sort the text,
my #text = sort map $_->{text}, #array_of_links;
Better to err on the side of caution and use an HTML parser to parse HTML:
use strict; use warnings;
use HTML::TokeParser::Simple;
my $field = <<EOS;
Baboon
Antelope
dog
cat
EOS
my $parser = HTML::TokeParser::Simple->new(string => $field);
my #urls;
while ( my $tag = $parser->get_tag ) {
next unless $tag->is_start_tag('a');
next unless defined(my $url = $tag->get_attr('href'));
my $text = $parser->get_text('/a');
push #urls, { url => $url, text => $text };
}
#urls = sort {
$a->{text} cmp $b->{text} ||
$a->{url} cmp $b->{url}
} #urls;
use YAML;
print Dump \#urls;