DBI: How to find the right data-types for unknown data? - database

With of these two approaches would you prefer: trying to find the right data-types or simply using always varchar?
# ...
use HTML::TableExtract;
my $te = HTML::TableExtract->new( headers => [ 'some headers', 'one', 'two' ], keep_headers => 1 );
$te->parse( $html_string );
die $te->tables if $te->tables != 1;
( my $grid ) = $te->tables;
use DBI;
my $dbh = DBI->connect( ... ) or die $DBI::errstr;
my $table = 'my_test_table';
my #rows = $grid->rows;
my $header_row = shift #rows;
##### version 1 ####
use Data::Types qw(:all);
my #create_row;
for my $col ( 0 .. $#$header_row ) {
my ( $count, $int, $float ) = ( 0, 0, 0 );
my $longest = 0;
for my $row ( #rows ) {
$longest = length $row->[$col] if length $row->[$col] > $longest;
$int++ if is_int( $row->[$col] );
$float++ if is_float( $row->[$col] );
$count++;
}
if ( $int == $count ) {
$create_row[$col] = $dbh->quote( $header_row->[$col] ) . ' int';
}
elsif ( $float == $count ) {
$create_row[$col] = $dbh->quote( $header_row->[$col] ) . ' float';
}
else {
$create_row[$col] = $dbh->quote( $header_row->[$col] ) . " char($longest)";
}
}
$sql = sprintf "CREATE TABLE $table ( %s )",
join( ', ', #create_row );
$dbh->do( $sql );
$sql = sprintf "INSERT INTO $table ( %s ) VALUES( %s )",
join( ',', map { $dbh->quote( $_ ) } #$header_row ), join( ',', ('?') x #$header_row );
my $sth = $dbh->prepare( $sql );
#### version 2 ####
# always varchar
$sql = sprintf "CREATE TABLE $table ( %s )",
join( ', ', map { "'$_' varchar(60)" } #$header_row );
$dbh->do( $sql );
$sql = sprintf "INSERT INTO $table ( %s ) VALUES( %s )",
join( ',', map { $dbh->quote( $_ ) } #$header_row ), join( ',', ('?') x #$header_row );
my $sth = $dbh->prepare( $sql );

If the table you're processing will not change, and if the column will only be used for that single table's data, then it is safe to guess a data type that seems to fit (version 1).
However, if you plan to add any more data to that column then you'd need to keep everything as varchars in case there's some data of a different type in future (version 2).

Related

Converting code using hash to array

I am charged with making a Perl module more efficient. It takes log entries which are currently uniquely identified by date and server name and inserts them into our mysql database for later processing.
I've posted the original file and my attempt below. My code doesn't insert into the database. I'm sure its a simple array handling problem but not sure.
Original file
package UlsaSpectrumAnalyser;
use strict;
use warnings;
use Data::Dumper;
use EnmServiceGroup;
use StatsDB;
use DBI;
use StatsTime;
sub new
{
my $klass = shift;
my $self = bless {}, $klass;
return $self;
}
sub init($$$$)
{
my ($self,$r_cliArgs,$r_incr,$dbh) = #_;
$self->{'site'} = $r_cliArgs->{'site'};
$self->{'siteId'} = $r_cliArgs->{'siteId'};
$self->{'date'} = $r_cliArgs->{'date'};
if ( exists $r_incr->{'UlsaSpectrumAnalyser'} )
{
$self->{'r_UlsaSpectrumAnalyser'} = $r_incr->{'UlsaSpectrumAnalyser'}->{'r_UlsaSpectrumAnalyser'};
}
else
{
$self->{'r_UlsaSpectrumAnalyser'} = {};
}
my #subscriptions = ();
$self->{'serverMap'} = {};
foreach my $service( "pmservice", "saservice" ) {
my $r_serverMap = enmGetServiceGroupInstances($self->{'site'}, $self->{'date'},$service);
while ( my ($server,$serverId) = each %{$r_serverMap} ) {
push ( #subscriptions, {'server' => $server, 'prog' => 'JBOSS'} );
$self->{'serverMap'}->{$server} = $serverId;
}
}
return \#subscriptions;
}
sub handle($$$$$$$)
{
my ($self,$timestamp,$host,$program,$severity,$message,$messageSize) = #_;
if ( $::DEBUG > 9 ) { print "UlsaSpectrumAnalyser::handle got message from $host $program : $message\n"; }
# Skip any warnings/errors
if ( $severity ne 'info' ) {
return;
}
my ($time)=$timestamp=~/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}).*/;
my ($epochtime) = getDateTimeInMilliSeconds($timestamp);
if ( $::DEBUG > 3 ) { print "UlsaSpectrumAnalyser::handle got message from $time $host $program : $message\n"; }
#Sample Log
#2017-09-08 14:55:36,028 INFO [com.X.Y.itpf.EVENT_LOGGER] (ajp-executor-threads - 18) [administrator, Spectrum Analyzer, DETAILED, #ULSA_COMPONENT_FFT, MeContext=lienb4003, Samples=65510; FileParsingTime(ms)=50; FastFourierTime(ms)=370; PostProcessingTime(ms)=3; #ChartScalingTime(ms)=1; TotalTime(ms)=424]
if( $message =~ /.*ULSA_COMPONENT_FFT,\s+(\S+),\s+Samples=(\d+);\s+FileParsingTime\S+=(\d+);\s+FastFourierTime\S+=(\d+);\s+PostProcessingTime\S+=(\d+);\s+ChartScalingTime\S+=(\d+);\s+TotalTime\S+=(\d+)]/ ) {
my $activity = $epochtime . '##' . $host; #TODO Handle array instead of hash. Working on separately
my $serverid = $self->{'serverMap'}->{$host};
$self->{'r_UlsaSpectrumAnalyser'}->{$activity} = {
'time' => $time,
'epochtime' => $epochtime,
'serverid' => $serverid,
'source' => $1,
'sample' => $2,
'file_parsing_time' => $3,
'fast_fourier_time' => $4,
'post_processing_time' => $5,
'chart_scaling_time' => $6,
'total_time' => $7};
}
}
sub handleExceeded($$$)
{
my ($self, $host, $program) = #_;
}
sub done($$$)
{
my ($self,$dbh,$r_incr) = #_;
my $tmpDir = '/data/tmp';
my $date=$self->{'date'};
if (exists $ENV{'TMP_DIR'})
{
$tmpDir = $ENV{'TMP_DIR'};
}
my $bcpFileUlsaAnalyserLogs = "$tmpDir/ulsa_spectrum_analyser_logs.bcp";
open (BCP, "> $bcpFileUlsaAnalyserLogs") or die "Failed to open $bcpFileUlsaAnalyserLogs";
foreach my $activity (sort keys %{$self->{'r_UlsaSpectrumAnalyser'}}) {
print BCP $self->{'siteId'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'serverid'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'time'} . "\t" .
$self->{'r_UlsaSpiUectrumAnalyser'}->{$activity}->{'epochtime'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'source'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'sample'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'file_parsing_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'fast_fourier_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'post_processing_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'chart_scaling_time'} . "\t" .
$self->{'r_UlsaSpectrumAnalyser'}->{$activity}->{'total_time'} . "\n";
}
close BCP;
dbDo( $dbh, "DELETE FROM enm_ulsa_spectrum_analyser_logs WHERE siteid = $self->{'siteId'} AND time BETWEEN '$date 00:00:00' AND '$date 23:59:59'" )
or die "Failed to delete from enm_ulsa_spectrum_analyser_logs" . $dbh->errstr;
dbDo( $dbh, "LOAD DATA INFILE '$bcpFileUlsaAnalyserLogs' INTO TABLE enm_ulsa_spectrum_analyser_logs" )
or die "Failed to load new data from '$bcpFileUlsaAnalyserLogs' file to 'enm_ulsa_spectrum_analyser_logs' table" . $dbh->errstr;
unlink($bcpFileUlsaAnalyserLogs);
$r_incr->{'UlsaSpectrumAnalyser'} = {
'r_UlsaSpectrumAnalyser' => $self->{'r_UlsaSpectrumAnalyser'}
};
}
1;
My attempt
package UlsaSpectrumAnalyser;
use strict;
use warnings;
use Data::Dumper;
use StatsDB;
use DBI;
use StatsTime;
sub new
{
my $klass = shift;
my $self = bless {}, $klass;
return $self;
}
sub init($$$$)
{
my ($self,$r_cliArgs,$r_incr,$dbh) = #_;
$self->{'site'} = $r_cliArgs->{'site'};
$self->{'siteId'} = $r_cliArgs->{'siteId'};
$self->{'date'} = $r_cliArgs->{'date'};
$self->{'r_instrDataEvent'}->{'ulsaStats'} = [];
my #subscriptions = ();
$self->{'serverMap'} = {};
foreach my $service( "pmservice", "saservice" ) {
my $r_serverMap = enmGetServiceGroupInstances($self->{'site'}, $self->{'date'},$service);
while ( my ($server,$serverId) = each %{$r_serverMap} ) {
push ( #subscriptions, {'server' => $server, 'prog' => 'JBOSS'} );
$self->{'serverMap'}->{$server} = $serverId;
}
}
return \#subscriptions;
}
sub handle($$$$$$$)
{
my ($self,$timestamp,$host,$program,$severity,$message,$messageSize) = #_;
if ( $::DEBUG > 9 ) { print "UlsaSpectrumAnalyser::handle got message from $host $program : $message\n"; }
# Skip any warnings/errors
if ( $severity ne 'info' ) {
return;
}
my ($time)=$timestamp=~/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}).*/;
my ($epochtime) = getDateTimeInMilliSeconds($timestamp);
if ( $::DEBUG > 3 ) { print "UlsaSpectrumAnalyser::handle got message from $time $host $program : $message\n"; }
#Sample Log
#2017-09-08 14:55:36,028 INFO [com.X.y.itpf.EVENT_LOGGER] (ajp-executor-threads - 18) [administrator, Spectrum Analyzer, DETAILED, #ULSA_COMPONENT_FFT, MeContext=lienb4003, Samples=65510; FileParsingTime(ms)=50; FastFourierTime(ms)=370; PostProcessingTime(ms)=3; #ChartScalingTime(ms)=1; TotalTime(ms)=424]
if( $message =~ /.*ULSA_COMPONENT_FFT,\s+(\S+),\s+Samples=(\d+);\s+FileParsingTime\S+=(\d+);\s+FastFourierTime\S+=(\d+);\s+PostProcessingTime\S+=(\d+);\s+ChartScalingTime\S+=(\d+);\s+TotalTime\S+=(\d+)]/ ) {
my $serverid = $self->{'r_instrDataEvent'}->{'serverid'};
my %event = (
'time' => $time,
'epochtime' => $epochtime,
'serverid' => $serverid,
'source' => $1,
'sample' => $2,
'file_parsing_time' => $3,
'fast_fourier_time' => $4,
'post_processing_time' => $5,
'chart_scaling_time' => $6,
'total_time' => $7
);
push #{$self->{'r_instrDataEvent'}->{'ulsaStats'}}, \%event;
}
}
sub handleExceeded($$$)
{
my ($self, $host, $program) = #_;
}
sub done($$$)
{
my ($self,$dbh,$r_incr) = #_;
my $bcpFileUlsaAnalyserLogs = getBcpFilename("enm_ulsa_spectrum_analyser_logs ");
open (BCP, "> $bcpFileUlsaAnalyserLogs") or die "Failed to open $bcpFileUlsaAnalyserLogs";
foreach my $activity (#{$self->{'r_instrDataEvent'}->{'ulsaStats'}}) {
print BCP $self->{'siteId'} . "\t" .
$activity->{'serverid'} . "\t" .
$activity->{'time'} . "\t" .
$activity->{'epochtime'} . "\t" .
$activity->{'source'} . "\t" .
$activity->{'sample'} . "\t" .
$activity->{'file_parsing_time'} . "\t" .
$activity->{'fast_fourier_time'} . "\t" .
$activity->{'post_processing_time'} . "\t" .
$activity->{'chart_scaling_time'} . "\t" .
$activity->{'total_time'} . "\n";
}
close BCP;
dbDo( $dbh, sprintf("DELETE FROM enm_ulsa_spectrum_analyser_logs WHERE siteid = %d AND time BETWEEN '%s' AND '%s'",
$self->{'siteId'}, $self->{'r_instrDataEvent'}->{'ulsaStats'}->[0]->{'epochtime'},$self->{'r_instrDataEvent'}->{'ulsaStats'}->[$#{$self->{'r_instrDataEvent'}->{'ulsaStats'}}]->{'epochtime'}))
or die "Failed to delete from enm_ulsa_spectrum_analyser_logs" . $dbh->errstr;#'
dbDo( $dbh, "LOAD DATA INFILE '$bcpFileUlsaAnalyserLogs' INTO TABLE enm_ulsa_spectrum_analyser_logs" )
or die "Failed to load new data from '$bcpFileUlsaAnalyserLogs' file to 'enm_ulsa_spectrum_analyser_logs' table" . $dbh->errstr;
}
1;
I would say that the problem is your
my $bcpFileUlsaAnalyserLogs = getBcpFilename( "enm_ulsa_spectrum_analyser_logs " )
I doubt if the file name really has a space at the end!
I'm surprised that your dbDo or at least your
or die "Failed to load new data ... "
didn't catch this and report it. I don't like wrapping DBI code inside subroutines, especially when I can't see those subroutines.
Note also that you should be using placeholders
dbDo( $dbh, sprintf("DELETE FROM enm_ulsa_spectrum_analyser_logs WHERE siteid = %d AND time BETWEEN '%s' AND '%s'",
$self->{'siteId'}, $self->{'r_instrDataEvent'}->{'ulsaStats'}->[0]->{'epochtime'},$self->{'r_instrDataEvent'}->{'ulsaStats'}->[$#{$self->{'r_instrDataEvent'}->{'ulsaStats'}}]->{'epochtime'}))
or die "Failed to delete from enm_ulsa_spectrum_analyser_logs" . $dbh->errstr;#'
would be much better as
my $sth = $dbh->prepare(<<END_SQL);
DELETE FROM enm_ulsa_spectrum_analyser_logs
WHERE siteid = ? AND time BETWEEN ? AND ?
END_SQL
my $ulsa_stats = $self->{r_instrDataEvent}{ulsaStats};
$sth->execute(
$self->{siteId},
$ulsa_stats->[0]{epochtime},
$ulsa_stats->[-1]{epochtime}
) or die "Failed to delete from enm_ulsa_spectrum_analyser_logs: " . $dbh->errstr;

mediawiki query NOT IN syntax

I have a database query and I would like to add the condition:
$not_these_ids = array(22, 34, 55, 66)
'page_id' != $not_these_ids;
How do I write it in the mediawiki syntax?
`
$dbr = wfGetDB( DB_SLAVE ); ...
$conds = array( 'rev_id = page_latest',
'page_id' != $not_these_ids
); `
but it doesn't work.
You can just pass an array of strings, one for each conditions, to the third parameter of select().
Example from maintenance/Maintenance.php:
$dbw = wfGetDB( DB_MASTER );
$dbw->begin( __METHOD__ );
[...]
if ( count( $latestRevs ) > 0 ) {
$revConds[] = 'rev_id NOT IN (' . $dbw->makeList( $latestRevs ) . ')';
}
$res = $dbw->select( 'revision', 'rev_id', $revConds, __METHOD__ );
$oldRevs = array();
foreach ( $res as $row ) {
$oldRevs[] = $row->rev_id;
}
$this->output( "done.\n" );

unable to itherate through the array perl

I have this perl script:
my %perMpPerMercHash;
foreach my $sheet () { #proper ranges specified
foreach my $row ( ) { #proper ranges specified
#required variables declared.
push(#{$perMpPerMercHash{join("-", $mercId, $mpId)}}, $mSku);
}
}
#Finally 'perMpPerMercHash' will be a hash of array`
foreach my $perMpPerMerc ( keys %perMpPerMercHash ) {
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
}
sub genFile {
my ( $outFileName, #skuArr ) = #_;
my $output = new IO::File(">$outFileName");
my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2);
#mpId is generated.
&prepareMessage($writer, $mpId, #skuArr);
}
sub prepareMessage {
my ( $writer, $mpId, #skuArr ) = #_;
my $count = 1;
print Dumper \#skuArr; #Printing correctly, 8-10 values.
foreach my $sku ( #skuArr ) { #not iterating.
print "loop run" , $sku, "\n"; #printed only once.
}
}
Can somebody please help why this is happening. I am new to perl and could not understand this anomaly.
EDIT:
output of Dumper:
$VAR1 = [
'A',
'B',
'C',
];
When you do
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
You're passing a reference to an array.
So in
sub genFile {
my ( $outFileName, #skuArr ) = #_;
You have to do :
sub genFile {
my ( $outFileName, $skuArr ) = #_;
and then use #$skuArr.
Have a look at references
The modified genFile sub will be:
sub genFile {
my ( $outFileName, $skuArr ) = #_;
my $output = new IO::File(">$outFileName");
my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2);
#mpId is generated.
&prepareMessage($writer, $mpId, #$skuArr);
}
And the other sub don't need to be modified.
Or you can pass always skuArr by reference:
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
...
sub genFile {
my ( $outFileName, $skuArr ) = #_;
...
&prepareMessage($writer, $mpId, $skuArr);
}
sub prepareMessage {
my ( $writer, $mpId, $skuArr ) = #_;
my $count = 1;
print Dumper $skuArr;
foreach my $sku ( #$skuArr ) {
print "loop run" , $sku, "\n";
}
}

How do i correct jqgrid footer information?

I am using jqgrid in one of my application.
Right now i am facing a strange problem and do not know how to rectify it?
Actually in one of the reports (having multiple link) jqgrid is showing wrong footer information that is its showing Page 0 of 0 even if there are records in the table.
This is the code which runs:
if( isset($this->params['named']['ajax']) && $this->params['named']['ajax'] == '1' )
{
$this->autoRender = false;
// get how many rows we want to have into the grid - rowNum parameter in the grid
$limit = $this->params['url']['rows'];
// get index row - i.e. user click to sort. At first time sortname parameter -
// after that the index from colModel
$sidx = $this->params['url']['sidx'];
// sorting order - at first time sortorder
$sord = $this->params['url']['sord'];
$page = $this->params['url']['page'];
// if we not pass at first time index use the first column for the index or what you want
if( !$sidx ) $sidx = 1;
// calculate the number of rows for the query. We need this for paging the result
$findconditions = array();
if(!empty($this->params['named']['batch']))
array_push(&$findconditions, array('Batch.id' => $this->params['named']['batch'] ));
$row = $this->Placement->find('count',array(
'link' => array(
'Student' => array(
'Batch'
)
),
'conditions'=> $findconditions
));
$count = $row;
// calculate the total pages for the query
if( $count > 0 )
{
$total_pages = ceil($count / $limit);
}
else
{
$total_pages = 0;
}
// if for some reasons the requested page is greater than the total
// set the requested page to total page
if( $page > $total_pages ) $page = $total_pages;
// calculate the starting position of the rows
$start = $limit * $page - $limit;
// if for some reasons start position is negative set it to 0
// typical case is that the user type 0 for the requested page
if( $start < 0 ) $start = 0;
// the actual query for the grid data
$limit_range = $start . "," . $limit;
$sort_range = $this->modelClass . '.' . $sidx . " " . $sord;
$this->Placement->recursive = -1;
$where='';
if( $this->params['url']['_search'] == 'true' )
{
//pr($this->params);
$searchconditions = array();
if( isset($this->params['named']['batch']) && !empty($this->params['named']['batch']) )
{
$where.= " Batch.id =".$this->params['named']['batch'];
}
if( isset($this->params['url']['isstillworking']) && !empty($this->params['url']['isstillworking']) )
{
$where.= " AND Placement.isstillworking ='".$this->params['url']['isstillworking']."'";
}
if( isset($this->params['url']['studentname']) && !empty($this->params['url']['studentname']) )
{
$where.=" AND Student.fullname LIKE '" .$this->params['url']['studentname'] . "%'";
}
if( isset($this->params['url']['companyname']) && !empty($this->params['url']['companyname']) )
{
$where.=" AND Company.name LIKE '" .$this->params['url']['companyname'] . "%'";
}
if( isset($this->params['url']['salary']) && !empty($this->params['url']['salary']) )
{
$where.= " AND Placement.salary =".$this->params['url']['salary'];
}
if( isset($this->params['url']['contactnumber1']) && !empty($this->params['url']['contactnumber1']) )
{
$where.= " AND Student.contactnumber1 =".$this->params['url']['contactnumber1'];
}
if( isset($this->params['url']['batchname']) && !empty($this->params['url']['batchname']) )
{
$where.=" AND Batch.name LIKE '" .$this->params['url']['batchname'] . "%'";
}
$sql="SELECT Student.fullname,
Placement.isstillworking,
Company.id,
Company.name,
Placement.id,
Placement.salary,
Placement.created,
Student.id,
Student.contactnumber1,
Batch.id,
Batch.name
FROM placements Placement
INNER JOIN (
SELECT student_id, isstillworking, max( created ) AS other_col
FROM placements
GROUP BY student_id
) AS b ON Placement.student_id = b.student_id
AND Placement.created = b.other_col
INNER JOIN students Student ON ( Student.id = Placement.student_id )
INNER JOIN batches Batch ON ( Student.batch_id = Batch.id )
INNER JOIN companies Company ON ( Company.id = Placement.company_id )
WHERE ".$where.
" AND Student.type='student'
AND Student.trainingcompleted=1
ORDER BY ".$sort_range."
LIMIT ".$limit_range
;
$result=$this->Placement->query($sql);
}
else
{
$sql="SELECT Student.fullname,
Placement.isstillworking,
Company.id,
Company.name,
Placement.id,
Placement.salary,
Placement.created,
Student.id,
Student.contactnumber1,
Batch.id,
Batch.name
FROM placements Placement
INNER JOIN (
SELECT student_id, isstillworking, max( created ) AS other_col
FROM placements
GROUP BY student_id
) AS b ON Placement.student_id = b.student_id
AND Placement.created = b.other_col
INNER JOIN students Student ON ( Student.id = Placement.student_id )
INNER JOIN batches Batch ON ( Student.batch_id = Batch.id )
INNER JOIN companies Company ON ( Company.id = Placement.company_id )
WHERE Batch.id =
".$this->params['named']['batch'].
" AND Student.type='student'
AND Student.trainingcompleted=1
ORDER BY ".$sort_range."
LIMIT ".$limit_range
;
$result=$this->Placement->query($sql);
}
$i = 0;
$response->page = $page;
$response->total = $total_pages;
$response->records = $count;
//pr($result);
foreach($result as $result)
{
$response->rows[$i]['id'] = $result['Placement']['id'];
$student = "<a href='" . APP_URL . "placements/report18/studentid:" . $result['Student']['id']."'>" . $result['Student']['fullname'] . "</a>";
$company = "<a href='" . APP_URL . "companies/view/" . $result['Company']['id'] . "'>" . $result['Company']['name'] . "</a>";
$batch = "<a href='" . APP_URL . "batches/view/" . $result['Batch']['id'] . "'>" . $result['Batch']['name'] . "</a>";
$contactnumber1 =$result['Student']['contactnumber1'];
$response->rows[$i]['cell'] = array($student, $result['Placement']['isstillworking'], $result['Company']['name'], $result['Placement']['salary'], $contactnumber1, $batch);
$i++;
}
echo json_encode($response);
}
I am also attaching the screen shot for reference.
Please help me on this.
Regards,
Pankaj
Why on earth would you not use Cake's ORM layer and Pagination class? If you use Model->query() you have to escape input yourself!

Why do I get "Global symbol '$href_array' requires specific package name" in Perl?

I can't seem to get these arrays to work. I'm getting errors along the following lines:
Global symbol '$href_array' requires specific package name
What does this mean? Here's part of my code:
sub scrape {
my ( $self, $DBhost, $DBuser, $DBpass, $DBname ) = #_;
my ($dbh, $query, $result, $array);
my $DNS = "dbi:mysql:$DBname:$DBhost:3306";
$dbh = DBI->connect($DNS, $DBuser, $DBpass ) or die $DBI::errstr;
if( defined( $self->{_process_image} ) && ( -e 'href_w_' . $self->{_process_image} . ".txt" ) ) {
open ERROR_W, "error_w_" . $self->{_process_image} . ".txt";
open M_HREF_W, "m_href_w_" . $self->{_process_image} . ".txt";
open HREF_W, "href_w_" . $self->{_process_image} . ".txt";
my #m_error_array = ( split( '|||', <ERROR_W> ) );
my #m_href_array = ( split( '|||', <M_HREF_W> ) );
my #href_array = ( split( '|||', <HREF_W> ) );
close ( ERROR_W );
close ( M_HREF_W );
close ( HREF_W );
}else{
my #m_error_array;
my #m_href_array;
my #href_array = ( $self->{_url} );
}
my $z = 0;
while( $href_array ){
if( defined( $self->{_x_more} ) && $z == $self->{_x_more} ) {
break;
}
if( defined( $self->{_process_image} ) ) {
$self->write( 'm_href_w', #m_href_array );
$self->write( 'href_w', #href_array );
$self->write( 'error_w', #m_error_array );
}
$self->{_link_count} = scalar #m_href_array;
}
}
FIXED:
#!/usr/bin/perl
use strict;
use URI;
use File::Basename;
use DBI;
package Crawler;
sub new {
my $class = shift;
my $self = {
_url => shift,
_max_link => 0,
_local => 1
};
bless $self, $class;
return $self;
}
sub process_image {
my ($self, $process_image) = #_;
$self->{_process_image} = $process_image;
}
sub local {
my ($self, $local) = #_;
$self->{_local} = $local;
}
sub max_link {
my ($self, $max_link) = #_;
$self->{_max_link} = $max_link;
}
sub x_more {
my ($self, $x_more) = #_;
$self->{_x_more} = $x_more;
}
sub resolve_href {
my ($base, $href) = #_;
my $uri = URI->new($href);
return $uri->rel($base);
}
sub write {
my ( $self, $ref, $data ) = #_;
open FILE, '>>' . $ref . '_' . $self->{_process_image} . '.txt';
print FILE join( '|||', $data );
close( FILE );
}
sub scrape {
my #m_error_array;
my #m_href_array;
my #href_array;
my ( $self, $DBhost, $DBuser, $DBpass, $DBname ) = #_;
my ($dbh, $query, $result, $array);
my $DNS = "dbi:mysql:$DBname:$DBhost:3306";
$dbh = DBI->connect($DNS, $DBuser, $DBpass ) or die $DBI::errstr;
if( defined( $self->{_process_image} ) && ( -e 'href_w_' . $self->{_process_image} . ".txt" ) ) {
open ERROR_W, "error_w_" . $self->{_process_image} . ".txt";
open M_HREF_W, "m_href_w_" . $self->{_process_image} . ".txt";
open HREF_W, "href_w_" . $self->{_process_image} . ".txt";
my #m_error_array = ( split( '|||', <ERROR_W> ) );
my #m_href_array = ( split( '|||', <M_HREF_W> ) );
my #href_array = ( split( '|||', <HREF_W> ) );
close ( ERROR_W );
close ( M_HREF_W );
close ( HREF_W );
}else{
#href_array = ( $self->{_url} );
}
my $z = 0;
while( #href_array ){
if( defined( $self->{_x_more} ) && $z == $self->{_x_more} ) {
break;
}
if( defined( $self->{_process_image} ) ) {
$self->write( 'm_href_w', #m_href_array );
$self->write( 'href_w', #href_array );
$self->write( 'error_w', #m_error_array );
}
$self->{_link_count} = scalar #m_href_array;
}
}
1;
#$query = "SELECT * FROM `actwebdesigns.co.uk` ORDER BY ID DESC";
#$result = $dbh->prepare($query);
#$result->execute();
#while( $array = $result->fetchrow_hashref() ) {
# print $array->{'URL'} . "\n";
#}
Your code probably won't work the way you think it will. Take, for example, this block:
}else{
my #m_error_array;
my #m_href_array;
my #href_array = ( $self->{_url} );
}
What this does is declare three arrays. That's all fine and good, but because you've used my, they "disappear" when they go out of scope at the end of the else{} block. You probably want to declare them up at the top, before your if...else block and remove the duplicated declarations from both blocks in the if...else section.
Then, you're probably trying to iterate over the #href_array list. You'll want to change your while() to something like:
foreach my $href_array ( #href_array ) {
...
}
All that said, kudos for putting use strict in your code (it must be there, or else you wouldn't have received the warning you did). That's an important thing that programmers new to Perl greatly benefit from, because it warns them about exactly this problem!
It means you are using an undeclared variable $href_array. You need to rewrite this line:
while( $href_array ){
to something like:
foreach my $element (#href_array) {
...although a bit more context is needed in this code to be able to understand what it is doing -- for example, you open a database connection in the scrape method ($dbh = ...), but never use it.
Your code includes while($href_array) but you don't declare a variable called $href_array.
You may have meant #href_array (these are not the same variable), although you don't seem to modify this array inside the loop.
In all these answers no one bothers to tell you that the strict pragma enforces some more stringent coding practices in Perl. See its docs for the details. We explain it fully in Learning Perl too :)
When you get a warning you don't understand, you can get more information with the diagnostics. Here's an example:
use strict;
use diagnostics;
while( $href ) { 1 };
Here's the extended error message:
Execution of /Users/brian/Desktop/t aborted due to compilation errors (#1)
(F) You've said "use strict" or "use strict vars", which indicates
that all variables must either be lexically scoped (using "my" or "state"),
declared beforehand using "our", or explicitly qualified to say
which package the global variable is in (using "::").

Resources