I am charged with making a Perl module more efficient. It takes log entries which are currently uniquely identified by date and server name and inserts them into our mysql database for later processing.
I've posted the original file and my attempt below. My code doesn't insert into the database. I'm sure its a simple array handling problem but not sure.
Original file
package UlsaSpectrumAnalyser;
use strict;
use warnings;
use Data::Dumper;
use EnmServiceGroup;
use StatsDB;
use DBI;
use StatsTime;
sub new
{
my $klass = shift;
my $self = bless {}, $klass;
return $self;
}
sub init($$$$)
{
my ($self,$r_cliArgs,$r_incr,$dbh) = #_;
$self->{'site'} = $r_cliArgs->{'site'};
$self->{'siteId'} = $r_cliArgs->{'siteId'};
$self->{'date'} = $r_cliArgs->{'date'};
if ( exists $r_incr->{'UlsaSpectrumAnalyser'} )
{
$self->{'r_UlsaSpectrumAnalyser'} = $r_incr->{'UlsaSpectrumAnalyser'}->{'r_UlsaSpectrumAnalyser'};
}
else
{
$self->{'r_UlsaSpectrumAnalyser'} = {};
}
my #subscriptions = ();
$self->{'serverMap'} = {};
foreach my $service( "pmservice", "saservice" ) {
my $r_serverMap = enmGetServiceGroupInstances($self->{'site'}, $self->{'date'},$service);
while ( my ($server,$serverId) = each %{$r_serverMap} ) {
push ( #subscriptions, {'server' => $server, 'prog' => 'JBOSS'} );
$self->{'serverMap'}->{$server} = $serverId;
}
}
return \#subscriptions;
}
sub handle($$$$$$$)
{
my ($self,$timestamp,$host,$program,$severity,$message,$messageSize) = #_;
if ( $::DEBUG > 9 ) { print "UlsaSpectrumAnalyser::handle got message from $host $program : $message\n"; }
# Skip any warnings/errors
if ( $severity ne 'info' ) {
return;
}
my ($time)=$timestamp=~/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}).*/;
my ($epochtime) = getDateTimeInMilliSeconds($timestamp);
if ( $::DEBUG > 3 ) { print "UlsaSpectrumAnalyser::handle got message from $time $host $program : $message\n"; }
#Sample Log
#2017-09-08 14:55:36,028 INFO [com.X.Y.itpf.EVENT_LOGGER] (ajp-executor-threads - 18) [administrator, Spectrum Analyzer, DETAILED, #ULSA_COMPONENT_FFT, MeContext=lienb4003, Samples=65510; FileParsingTime(ms)=50; FastFourierTime(ms)=370; PostProcessingTime(ms)=3; #ChartScalingTime(ms)=1; TotalTime(ms)=424]
if( $message =~ /.*ULSA_COMPONENT_FFT,\s+(\S+),\s+Samples=(\d+);\s+FileParsingTime\S+=(\d+);\s+FastFourierTime\S+=(\d+);\s+PostProcessingTime\S+=(\d+);\s+ChartScalingTime\S+=(\d+);\s+TotalTime\S+=(\d+)]/ ) {
my $activity = $epochtime . '##' . $host; #TODO Handle array instead of hash. Working on separately
my $serverid = $self->{'serverMap'}->{$host};
$self->{'r_UlsaSpectrumAnalyser'}->{$activity} = {
'time' => $time,
'epochtime' => $epochtime,
'serverid' => $serverid,
'source' => $1,
'sample' => $2,
'file_parsing_time' => $3,
'fast_fourier_time' => $4,
'post_processing_time' => $5,
'chart_scaling_time' => $6,
'total_time' => $7};
}
}
sub handleExceeded($$$)
{
my ($self, $host, $program) = #_;
}
sub done($$$)
{
my ($self,$dbh,$r_incr) = #_;
my $tmpDir = '/data/tmp';
my $date=$self->{'date'};
if (exists $ENV{'TMP_DIR'})
{
$tmpDir = $ENV{'TMP_DIR'};
}
my $bcpFileUlsaAnalyserLogs = "$tmpDir/ulsa_spectrum_analyser_logs.bcp";
open (BCP, "> $bcpFileUlsaAnalyserLogs") or die "Failed to open $bcpFileUlsaAnalyserLogs";
foreach my $activity (sort keys %{$self->{'r_UlsaSpectrumAnalyser'}}) {
print BCP $self->{'siteId'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'serverid'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'time'} . "\t" .
$self->{'r_UlsaSpiUectrumAnalyser'}->{$activity}->{'epochtime'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'source'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'sample'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'file_parsing_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'fast_fourier_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'post_processing_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'chart_scaling_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'total_time'} . "\n";
}
close BCP;
dbDo( $dbh, "DELETE FROM enm_ulsa_spectrum_analyser_logs WHERE siteid = $self->{'siteId'} AND time BETWEEN '$date 00:00:00' AND '$date 23:59:59'" )
or die "Failed to delete from enm_ulsa_spectrum_analyser_logs" . $dbh->errstr;
dbDo( $dbh, "LOAD DATA INFILE '$bcpFileUlsaAnalyserLogs' INTO TABLE enm_ulsa_spectrum_analyser_logs" )
or die "Failed to load new data from '$bcpFileUlsaAnalyserLogs' file to 'enm_ulsa_spectrum_analyser_logs' table" . $dbh->errstr;
unlink($bcpFileUlsaAnalyserLogs);
$r_incr->{'UlsaSpectrumAnalyser'} = {
'r_UlsaSpectrumAnalyser' => $self->{'r_UlsaSpectrumAnalyser'}
};
}
1;
My attempt
package UlsaSpectrumAnalyser;
use strict;
use warnings;
use Data::Dumper;
use StatsDB;
use DBI;
use StatsTime;
sub new
{
my $klass = shift;
my $self = bless {}, $klass;
return $self;
}
sub init($$$$)
{
my ($self,$r_cliArgs,$r_incr,$dbh) = #_;
$self->{'site'} = $r_cliArgs->{'site'};
$self->{'siteId'} = $r_cliArgs->{'siteId'};
$self->{'date'} = $r_cliArgs->{'date'};
$self->{'r_instrDataEvent'}->{'ulsaStats'} = [];
my #subscriptions = ();
$self->{'serverMap'} = {};
foreach my $service( "pmservice", "saservice" ) {
my $r_serverMap = enmGetServiceGroupInstances($self->{'site'}, $self->{'date'},$service);
while ( my ($server,$serverId) = each %{$r_serverMap} ) {
push ( #subscriptions, {'server' => $server, 'prog' => 'JBOSS'} );
$self->{'serverMap'}->{$server} = $serverId;
}
}
return \#subscriptions;
}
sub handle($$$$$$$)
{
my ($self,$timestamp,$host,$program,$severity,$message,$messageSize) = #_;
if ( $::DEBUG > 9 ) { print "UlsaSpectrumAnalyser::handle got message from $host $program : $message\n"; }
# Skip any warnings/errors
if ( $severity ne 'info' ) {
return;
}
my ($time)=$timestamp=~/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}).*/;
my ($epochtime) = getDateTimeInMilliSeconds($timestamp);
if ( $::DEBUG > 3 ) { print "UlsaSpectrumAnalyser::handle got message from $time $host $program : $message\n"; }
#Sample Log
#2017-09-08 14:55:36,028 INFO [com.X.y.itpf.EVENT_LOGGER] (ajp-executor-threads - 18) [administrator, Spectrum Analyzer, DETAILED, #ULSA_COMPONENT_FFT, MeContext=lienb4003, Samples=65510; FileParsingTime(ms)=50; FastFourierTime(ms)=370; PostProcessingTime(ms)=3; #ChartScalingTime(ms)=1; TotalTime(ms)=424]
if( $message =~ /.*ULSA_COMPONENT_FFT,\s+(\S+),\s+Samples=(\d+);\s+FileParsingTime\S+=(\d+);\s+FastFourierTime\S+=(\d+);\s+PostProcessingTime\S+=(\d+);\s+ChartScalingTime\S+=(\d+);\s+TotalTime\S+=(\d+)]/ ) {
my $serverid = $self->{'r_instrDataEvent'}->{'serverid'};
my %event = (
'time' => $time,
'epochtime' => $epochtime,
'serverid' => $serverid,
'source' => $1,
'sample' => $2,
'file_parsing_time' => $3,
'fast_fourier_time' => $4,
'post_processing_time' => $5,
'chart_scaling_time' => $6,
'total_time' => $7
);
push #{$self->{'r_instrDataEvent'}->{'ulsaStats'}}, \%event;
}
}
sub handleExceeded($$$)
{
my ($self, $host, $program) = #_;
}
sub done($$$)
{
my ($self,$dbh,$r_incr) = #_;
my $bcpFileUlsaAnalyserLogs = getBcpFilename("enm_ulsa_spectrum_analyser_logs ");
open (BCP, "> $bcpFileUlsaAnalyserLogs") or die "Failed to open $bcpFileUlsaAnalyserLogs";
foreach my $activity (#{$self->{'r_instrDataEvent'}->{'ulsaStats'}}) {
print BCP $self->{'siteId'} . "\t" .
$activity->{'serverid'} . "\t" .
$activity->{'time'} . "\t" .
$activity->{'epochtime'} . "\t" .
$activity->{'source'} . "\t" .
$activity->{'sample'} . "\t" .
$activity->{'file_parsing_time'} . "\t" .
$activity->{'fast_fourier_time'} . "\t" .
$activity->{'post_processing_time'} . "\t" .
$activity->{'chart_scaling_time'} . "\t" .
$activity->{'total_time'} . "\n";
}
close BCP;
dbDo( $dbh, sprintf("DELETE FROM enm_ulsa_spectrum_analyser_logs WHERE siteid = %d AND time BETWEEN '%s' AND '%s'",
$self->{'siteId'}, $self->{'r_instrDataEvent'}->{'ulsaStats'}->[0]->{'epochtime'},$self->{'r_instrDataEvent'}->{'ulsaStats'}->[$#{$self->{'r_instrDataEvent'}->{'ulsaStats'}}]->{'epochtime'}))
or die "Failed to delete from enm_ulsa_spectrum_analyser_logs" . $dbh->errstr;#'
dbDo( $dbh, "LOAD DATA INFILE '$bcpFileUlsaAnalyserLogs' INTO TABLE enm_ulsa_spectrum_analyser_logs" )
or die "Failed to load new data from '$bcpFileUlsaAnalyserLogs' file to 'enm_ulsa_spectrum_analyser_logs' table" . $dbh->errstr;
}
1;
I would say that the problem is your
my $bcpFileUlsaAnalyserLogs = getBcpFilename( "enm_ulsa_spectrum_analyser_logs " )
I doubt if the file name really has a space at the end!
I'm surprised that your dbDo or at least your
or die "Failed to load new data ... "
didn't catch this and report it. I don't like wrapping DBI code inside subroutines, especially when I can't see those subroutines.
Note also that you should be using placeholders
dbDo( $dbh, sprintf("DELETE FROM enm_ulsa_spectrum_analyser_logs WHERE siteid = %d AND time BETWEEN '%s' AND '%s'",
$self->{'siteId'}, $self->{'r_instrDataEvent'}->{'ulsaStats'}->[0]->{'epochtime'},$self->{'r_instrDataEvent'}->{'ulsaStats'}->[$#{$self->{'r_instrDataEvent'}->{'ulsaStats'}}]->{'epochtime'}))
or die "Failed to delete from enm_ulsa_spectrum_analyser_logs" . $dbh->errstr;#'
would be much better as
my $sth = $dbh->prepare(<<END_SQL);
DELETE FROM enm_ulsa_spectrum_analyser_logs
WHERE siteid = ? AND time BETWEEN ? AND ?
END_SQL
my $ulsa_stats = $self->{r_instrDataEvent}{ulsaStats};
$sth->execute(
$self->{siteId},
$ulsa_stats->[0]{epochtime},
$ulsa_stats->[-1]{epochtime}
) or die "Failed to delete from enm_ulsa_spectrum_analyser_logs: " . $dbh->errstr;
Related
my csv input file
Chapter,Content
A,1
B,3
C,1
C,2
C,3
D,5
My current perl script
open(INFILE,$input)||die "can't open the file";
#line = (split/,/,$_);
#line_last = (split/\n/,$line[1]);
if ($_ =~ /A/){
push #con1, $line[1];
}
elsif ($_ =~ /B/){
push #con2, $line[1];
}
elsif ($_ =~ /C/){
push #con3, $line[1];
}
elsif ($_ =~ /D/){
push #con4, $line[1];
}
close INFILE;
chomp #con1, #con2, #con3, #con4;
print "content =", (join ", ", #con1),"\n";
print "content =", (join ", ", #con2),"\n";
print "content =", (join ", ", #con3),"\n";
print "content =", (join ", ", #con4),"\n";
My current code can work but I wanna make my code shorter & more efficient.
How do I make #con more automate? something like using For loop & use $i to assign to #con to make overall code shorter.
Why it's stupid to `use a variable as a variable name'.
You should use an AoA instead.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my #foos = qw( A B C D );
my $num_eles = 0;
my %index_by_foo = map { $_ => $num_eles++ } #foos;
my #cons_by_foo = map { [] } 1..$num_eles;
while (my $row = $csv->getline($fh)) {
if ( my $i = $index_by_foo{ $row->[0] } ) {
push #{ $cons_by_foo[$i] }, $row->[1];
}
}
for my $i (1..$num_eles-1) {
print("content =", join(", ", #{ $cons_by_foo[$i] }), "\n");
}
That said, a HoA seems a far better fit.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my %cons_by_foo;
while (my $row = $csv->getline($fh)) {
push #{ $cons_by_foo{ $row->[0] } }, $row->[1];
}
for my $foo (sort keys %cons_by_foo) {
print("content $foo =", join(", ", #{ $cons_by_foo{$foo} }), "\n");
}
This is what a hash is great for IMO (hashtable or dictionary in other languages).
note: I haven't tested this code, it's from memory.
my %con;
while (<INFILE>)
{
my #line_last = (split/\n/,$line[1]);
$con{$line[0]} = $con{$line[0]} + ", " + $line[1];
}
foreach my $str (keys sort %con)
{
print "content $con{$str}\n";
}
edit: much better and tested code here.
open(INFILE,"input.txt")||die "can't open the file";
my %con;
while (<INFILE>)
{
next if /^Chapter/;
chomp;
my #line = (split ',' , $_);
push #{$con{$line[0]}}, $line[1];
}
foreach my $str (sort keys %con)
{
my $tmp = join ', ', #{$con{$str}};
print "content $str = $tmp\n";
}
output is:
content A = 1
content B = 3
content C = 1,2,3
content D = 5
I know that isn't your output, but it seems more useful than what you had.
my %cc; #chapter contents
open my $FH, $filename or die "Cannot open $filename";
push #{ $cc{ $$_[0] } }, $$_[1] for map {chomp;[split/,/]} <$FH>;
close $FH;
print "Chapter = $_ Content = " . join(", ", #{ $cc{$_} }) . "\n" for sort keys %cc;
Output:
Chapter = A Content = 1
Chapter = B Content = 3
Chapter = C Content = 1, 2, 3
Chapter = D Content = 5
I'm trying to save error codes by:
#global space
my #retCodes;
#main
sub BuildInit {
my $actionStr = "";
my $compStr = "";
my #component_dirs;
my #compToBeBuilt;
foreach my $comp (#compList) {
#component_dirs = GetDirs($comp); #populates #component_dirs
}
print "Printing Action List: #actionList\n";
#---------------------------------------
#---- Setup Worker Threads ----------
for ( 1 .. NUM_WORKERS ) {
async {
while ( defined( my $job = $q->dequeue() ) ) {
worker($job);
}
};
}
#-----------------------------------
#---- Enqueue The Work ----------
for my $action (#actionList) {
my $sem = Thread::Semaphore->new(0);
$q->enqueue( [ $_, $action, $sem ] ) for #component_dirs;
$sem->down( scalar #component_dirs );
print "\n------>> Waiting for prior actions to finish up... <<------\n";
}
# Nothing more to do - notify the Queue that we're not adding anything else
$q->end();
$_->join() for threads->list();
return 0;
}
#worker
sub worker {
my ($job) = #_;
my ( $component, $action, $sem ) = #$job;
Build( $component, $action );
$sem->up();
}
#builder method
sub Build {
my ( $comp, $action ) = #_;
my $cmd = "$MAKE $MAKE_INVOCATION_PATH/$comp ";
my $retCode = -1;
given ($action) {
when ("depend") { $cmd .= "$action >nul 2>&1" } #suppress output
when ("clean") { $cmd .= $action }
when ("build") { $cmd .= 'l1' }
when ("link") { $cmd .= '' } #add nothing; default is to link
default { die "Action: $action is unknown to me." }
}
print "\n\t\t*** Performing Action: \'$cmd\' on $comp ***" if $verbose;
if ( $action eq "link" ) {
# hack around potential race conditions -- will only be an issue during linking
my $tries = 1;
until ( $retCode == 0 or $tries == 0 ) {
last if ( $retCode = system($cmd) ) == 2; #compile error; stop trying
$tries--;
}
}
else {
$retCode = system($cmd);
}
push( #retCodes, ( $retCode >> 8 ) );
#testing
if ( $retCode != 0 ) {
print "\n\t\t*** ERROR IN $comp: $# !! ***\n";
print "\t\t*** Action: $cmd -->> Error Level: " . ( $retCode >> 8 ) . "\n";
#exit(-1);
}
return $retCode;
}
Error that gets displayed:
Use of uninitialized value $maxReturnCode in concatenation (.) or
string at C:\script.pl line 66, line 415.
I can see from the first line of output though, that I get things like: Return Code: 0 Return Code: 0 Return Code: 2 ..
The issue here is that the code isn't sharing the array between threads; so because of that, each thread is modifying it's local copy of the array, not the global array as expected. The fix for this problem is to share the variable, and lock it before accessing it during the thread processing:
my #retCodes;
share(#retCodes);
...
#during the thread sub
lock(#retCodes);
push(#retCodes, ($retCode>>8));
Here's a stubbed-out runnable version that you should be able to modify a bit to do what you need:
#!/usr/bin/perl
use strict;
use warnings;
use List::Util 'max';
use threads;
#global space
my #retCodes = ();
share(#retCodes);
sub builder {
my ($comp, $cmd) = ('builder', 'test');
for my $retCode (qw/0 0 256/) {
print "\n\t\tReturn Code: " . ($retCode >>8) . "\n";
lock(#retCodes);
push(#retCodes, ($retCode>>8));
}
}
#main
builder();
# other threads started...
# wait for threads to complete...
printf "Codes: %s\n", join(', ', #retCodes);
my $maxReturnCode = max(#retCodes);
print "Highest Error Code: $maxReturnCode\n"; #<-- crashes with error below
exit($maxReturnCode);
I have this perl script:
my %perMpPerMercHash;
foreach my $sheet () { #proper ranges specified
foreach my $row ( ) { #proper ranges specified
#required variables declared.
push(#{$perMpPerMercHash{join("-", $mercId, $mpId)}}, $mSku);
}
}
#Finally 'perMpPerMercHash' will be a hash of array`
foreach my $perMpPerMerc ( keys %perMpPerMercHash ) {
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
}
sub genFile {
my ( $outFileName, #skuArr ) = #_;
my $output = new IO::File(">$outFileName");
my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2);
#mpId is generated.
&prepareMessage($writer, $mpId, #skuArr);
}
sub prepareMessage {
my ( $writer, $mpId, #skuArr ) = #_;
my $count = 1;
print Dumper \#skuArr; #Printing correctly, 8-10 values.
foreach my $sku ( #skuArr ) { #not iterating.
print "loop run" , $sku, "\n"; #printed only once.
}
}
Can somebody please help why this is happening. I am new to perl and could not understand this anomaly.
EDIT:
output of Dumper:
$VAR1 = [
'A',
'B',
'C',
];
When you do
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
You're passing a reference to an array.
So in
sub genFile {
my ( $outFileName, #skuArr ) = #_;
You have to do :
sub genFile {
my ( $outFileName, $skuArr ) = #_;
and then use #$skuArr.
Have a look at references
The modified genFile sub will be:
sub genFile {
my ( $outFileName, $skuArr ) = #_;
my $output = new IO::File(">$outFileName");
my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2);
#mpId is generated.
&prepareMessage($writer, $mpId, #$skuArr);
}
And the other sub don't need to be modified.
Or you can pass always skuArr by reference:
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
...
sub genFile {
my ( $outFileName, $skuArr ) = #_;
...
&prepareMessage($writer, $mpId, $skuArr);
}
sub prepareMessage {
my ( $writer, $mpId, $skuArr ) = #_;
my $count = 1;
print Dumper $skuArr;
foreach my $sku ( #$skuArr ) {
print "loop run" , $sku, "\n";
}
}
With of these two approaches would you prefer: trying to find the right data-types or simply using always varchar?
# ...
use HTML::TableExtract;
my $te = HTML::TableExtract->new( headers => [ 'some headers', 'one', 'two' ], keep_headers => 1 );
$te->parse( $html_string );
die $te->tables if $te->tables != 1;
( my $grid ) = $te->tables;
use DBI;
my $dbh = DBI->connect( ... ) or die $DBI::errstr;
my $table = 'my_test_table';
my #rows = $grid->rows;
my $header_row = shift #rows;
##### version 1 ####
use Data::Types qw(:all);
my #create_row;
for my $col ( 0 .. $#$header_row ) {
my ( $count, $int, $float ) = ( 0, 0, 0 );
my $longest = 0;
for my $row ( #rows ) {
$longest = length $row->[$col] if length $row->[$col] > $longest;
$int++ if is_int( $row->[$col] );
$float++ if is_float( $row->[$col] );
$count++;
}
if ( $int == $count ) {
$create_row[$col] = $dbh->quote( $header_row->[$col] ) . ' int';
}
elsif ( $float == $count ) {
$create_row[$col] = $dbh->quote( $header_row->[$col] ) . ' float';
}
else {
$create_row[$col] = $dbh->quote( $header_row->[$col] ) . " char($longest)";
}
}
$sql = sprintf "CREATE TABLE $table ( %s )",
join( ', ', #create_row );
$dbh->do( $sql );
$sql = sprintf "INSERT INTO $table ( %s ) VALUES( %s )",
join( ',', map { $dbh->quote( $_ ) } #$header_row ), join( ',', ('?') x #$header_row );
my $sth = $dbh->prepare( $sql );
#### version 2 ####
# always varchar
$sql = sprintf "CREATE TABLE $table ( %s )",
join( ', ', map { "'$_' varchar(60)" } #$header_row );
$dbh->do( $sql );
$sql = sprintf "INSERT INTO $table ( %s ) VALUES( %s )",
join( ',', map { $dbh->quote( $_ ) } #$header_row ), join( ',', ('?') x #$header_row );
my $sth = $dbh->prepare( $sql );
If the table you're processing will not change, and if the column will only be used for that single table's data, then it is safe to guess a data type that seems to fit (version 1).
However, if you plan to add any more data to that column then you'd need to keep everything as varchars in case there's some data of a different type in future (version 2).
I can't seem to get these arrays to work. I'm getting errors along the following lines:
Global symbol '$href_array' requires specific package name
What does this mean? Here's part of my code:
sub scrape {
my ( $self, $DBhost, $DBuser, $DBpass, $DBname ) = #_;
my ($dbh, $query, $result, $array);
my $DNS = "dbi:mysql:$DBname:$DBhost:3306";
$dbh = DBI->connect($DNS, $DBuser, $DBpass ) or die $DBI::errstr;
if( defined( $self->{_process_image} ) && ( -e 'href_w_' . $self->{_process_image} . ".txt" ) ) {
open ERROR_W, "error_w_" . $self->{_process_image} . ".txt";
open M_HREF_W, "m_href_w_" . $self->{_process_image} . ".txt";
open HREF_W, "href_w_" . $self->{_process_image} . ".txt";
my #m_error_array = ( split( '|||', <ERROR_W> ) );
my #m_href_array = ( split( '|||', <M_HREF_W> ) );
my #href_array = ( split( '|||', <HREF_W> ) );
close ( ERROR_W );
close ( M_HREF_W );
close ( HREF_W );
}else{
my #m_error_array;
my #m_href_array;
my #href_array = ( $self->{_url} );
}
my $z = 0;
while( $href_array ){
if( defined( $self->{_x_more} ) && $z == $self->{_x_more} ) {
break;
}
if( defined( $self->{_process_image} ) ) {
$self->write( 'm_href_w', #m_href_array );
$self->write( 'href_w', #href_array );
$self->write( 'error_w', #m_error_array );
}
$self->{_link_count} = scalar #m_href_array;
}
}
FIXED:
#!/usr/bin/perl
use strict;
use URI;
use File::Basename;
use DBI;
package Crawler;
sub new {
my $class = shift;
my $self = {
_url => shift,
_max_link => 0,
_local => 1
};
bless $self, $class;
return $self;
}
sub process_image {
my ($self, $process_image) = #_;
$self->{_process_image} = $process_image;
}
sub local {
my ($self, $local) = #_;
$self->{_local} = $local;
}
sub max_link {
my ($self, $max_link) = #_;
$self->{_max_link} = $max_link;
}
sub x_more {
my ($self, $x_more) = #_;
$self->{_x_more} = $x_more;
}
sub resolve_href {
my ($base, $href) = #_;
my $uri = URI->new($href);
return $uri->rel($base);
}
sub write {
my ( $self, $ref, $data ) = #_;
open FILE, '>>' . $ref . '_' . $self->{_process_image} . '.txt';
print FILE join( '|||', $data );
close( FILE );
}
sub scrape {
my #m_error_array;
my #m_href_array;
my #href_array;
my ( $self, $DBhost, $DBuser, $DBpass, $DBname ) = #_;
my ($dbh, $query, $result, $array);
my $DNS = "dbi:mysql:$DBname:$DBhost:3306";
$dbh = DBI->connect($DNS, $DBuser, $DBpass ) or die $DBI::errstr;
if( defined( $self->{_process_image} ) && ( -e 'href_w_' . $self->{_process_image} . ".txt" ) ) {
open ERROR_W, "error_w_" . $self->{_process_image} . ".txt";
open M_HREF_W, "m_href_w_" . $self->{_process_image} . ".txt";
open HREF_W, "href_w_" . $self->{_process_image} . ".txt";
my #m_error_array = ( split( '|||', <ERROR_W> ) );
my #m_href_array = ( split( '|||', <M_HREF_W> ) );
my #href_array = ( split( '|||', <HREF_W> ) );
close ( ERROR_W );
close ( M_HREF_W );
close ( HREF_W );
}else{
#href_array = ( $self->{_url} );
}
my $z = 0;
while( #href_array ){
if( defined( $self->{_x_more} ) && $z == $self->{_x_more} ) {
break;
}
if( defined( $self->{_process_image} ) ) {
$self->write( 'm_href_w', #m_href_array );
$self->write( 'href_w', #href_array );
$self->write( 'error_w', #m_error_array );
}
$self->{_link_count} = scalar #m_href_array;
}
}
1;
#$query = "SELECT * FROM `actwebdesigns.co.uk` ORDER BY ID DESC";
#$result = $dbh->prepare($query);
#$result->execute();
#while( $array = $result->fetchrow_hashref() ) {
# print $array->{'URL'} . "\n";
#}
Your code probably won't work the way you think it will. Take, for example, this block:
}else{
my #m_error_array;
my #m_href_array;
my #href_array = ( $self->{_url} );
}
What this does is declare three arrays. That's all fine and good, but because you've used my, they "disappear" when they go out of scope at the end of the else{} block. You probably want to declare them up at the top, before your if...else block and remove the duplicated declarations from both blocks in the if...else section.
Then, you're probably trying to iterate over the #href_array list. You'll want to change your while() to something like:
foreach my $href_array ( #href_array ) {
...
}
All that said, kudos for putting use strict in your code (it must be there, or else you wouldn't have received the warning you did). That's an important thing that programmers new to Perl greatly benefit from, because it warns them about exactly this problem!
It means you are using an undeclared variable $href_array. You need to rewrite this line:
while( $href_array ){
to something like:
foreach my $element (#href_array) {
...although a bit more context is needed in this code to be able to understand what it is doing -- for example, you open a database connection in the scrape method ($dbh = ...), but never use it.
Your code includes while($href_array) but you don't declare a variable called $href_array.
You may have meant #href_array (these are not the same variable), although you don't seem to modify this array inside the loop.
In all these answers no one bothers to tell you that the strict pragma enforces some more stringent coding practices in Perl. See its docs for the details. We explain it fully in Learning Perl too :)
When you get a warning you don't understand, you can get more information with the diagnostics. Here's an example:
use strict;
use diagnostics;
while( $href ) { 1 };
Here's the extended error message:
Execution of /Users/brian/Desktop/t aborted due to compilation errors (#1)
(F) You've said "use strict" or "use strict vars", which indicates
that all variables must either be lexically scoped (using "my" or "state"),
declared beforehand using "our", or explicitly qualified to say
which package the global variable is in (using "::").