DBI connect, failed: FATAL: sorry, too many clients already - database

I am running a crontab as described below :
* 1 * * * /var/fdp/reportingscript/an_outgoing_tps_report.pl
* 1 * * * /var/fdp/reportingscript/an_processed_rule_report.pl
* 1 * * * /var/fdp/reportingscript/sdp_incoming_traffic_tps_report.pl
* 1 * * * /var/fdp/reportingscript/en_outgoing_tps_report.pl
* 1 * * * /var/fdp/reportingscript/en_processed_rule_report.pl
* 1 * * * /var/fdp/reportingscript/rs_incoming_traffic_report.pl
* 1 * * * /var/fdp/reportingscript/an_summary_report.pl
* 1 * * * /var/fdp/reportingscript/en_summary_report.pl
* 1 * * * /var/fdp/reportingscript/user_report.pl
and getting an error : ( for all scripts the error is same)
DBI connect('dbname=scs;host=192.168.18.23;port=5432','postgres',...) failed: FATAL: sorry, too many clients already at /var/fdp/reportingscript/sdp_incoming_traffic_tps_report.pl line 38.
Moreover, if I am running the script manually one at a time, it doesn't show any error.
For your reference i am attaching the script also for which I have shown the above error:
#!/usr/bin/perl
use strict;
use FindBin;
use lib $FindBin::Bin;
use Time::Local;
use warnings;
use DBI;
use File::Basename;
use CONFIG;
use Getopt::Long;
use Data::Dumper;
my $channel;
my $circle;
my $daysbefore;
my $dbh;
my $processed;
my $discarded;
my $db_name = "scs";
my $db_vip = "192.168.18.23";
my $db_port = "5432";
my $db_user = "postgres";
my $db_password = "postgres";
#### code to redirect all console output in log file
my ( $seco_, $minu_, $hrr_, $moday_, $mont_, $years_ ) = localtime(time);
$years_ += 1900;
$mont_ += 1;
my $timestamp = sprintf( "%d%02d%02d", $years_, $mont_, $moday_ );
$timestamp .= "_" . $hrr_ . "_" . $minu_ . "_" . $seco_;
print "timestamp is $timestamp \n";
my $logfile = "/var/fdp/log/reportlog/sdp_incoming_report_$timestamp";
print "\n output files is " . $logfile . "\n";
open( STDOUT, ">", $logfile ) or die("$0:dup:$!");
open STDERR, ">&STDOUT" or die "$0: dup: $!";
my ( $sec_, $min_, $hr_, $mday_, $mon_, $year_ ) = localtime(time);
$dbh = DBI->connect( "DBI:Pg:dbname=$db_name;host=$db_vip;port=$db_port",
"$db_user", "$db_password", { 'RaiseError' => 1 } );
print "\n Dumper is " . $dbh . "\n";
my $sthcircle = $dbh->prepare("select id,name from circle");
$sthcircle->execute();
while ( my $refcircle = $sthcircle->fetchrow_hashref() ) {
print "\n dumper for circle is " . Dumper($refcircle);
my $namecircle = uc( $refcircle->{'name'} );
my $idcircle = $refcircle->{'id'};
$circle->{$namecircle} = $idcircle;
print "\n circle name : " . $namecircle . "id is " . $idcircle;
}
sub getDate {
my $daysago = shift;
$daysago = 0 unless ($daysago);
my #months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) = localtime( time - ( 86400 * $daysago ) );
# YYYYMMDD, e.g. 20060126
$year_ = $year + 1900;
$mday_ = $mday;
$mon_ = $mon + 1;
return sprintf( "%d-%02d-%02d", $year + 1900, $mon + 1, $mday );
}
GetOptions( "d=i" => \$daysbefore );
my $filedate = getDate($daysbefore);
print "\n filedate is $filedate \n";
my #basedir = CONFIG::getBASEDIR();
print "\n array has basedir" . Dumper(#basedir);
$mon_ = "0" . $mon_ if ( defined $mon_ && $mon_ <= 9 );
$mday_ = "0" . $mday_ if ( defined $mday_ && $mday_ <= 9 );
foreach (#basedir) {
my $both = $_;
print "\n dir is $both \n";
for ( keys %{$circle} ) {
my $path = $both;
my $circleid = $_;
print "\n circle is $circleid \n";
my $circleidvalue = $circle->{$_};
my $file_csv_path = "/opt/offline/reports/$circleid";
my %sdp_hash = ();
print "\n file is $file_csv_path csv file \n";
if ( -d "$file_csv_path" ) {
} else {
mkdir( "$file_csv_path", 0755 );
}
my $csv_new_file
= $file_csv_path
. "\/FDP_"
. $circleid
. "_SDPINCOMINGTPSREPORT_"
. $mday_ . "_"
. $mon_ . "_"
. $year_ . "\.csv";
print "\n file is $csv_new_file \n";
print "\n date:$year_-$mon_-$mday_ \n";
open( DATA, ">>", $csv_new_file );
$path = $path . $circleid . "/Reporting/EN/Sdp";
print "\n *****path is $path \n";
my #filess = glob("$path/*");
foreach my $file (#filess) {
print "\n Filedate ---------> $filedate file is $file \n";
if ( $file =~ /.*_sdp.log.$filedate-*/ ) {
print "\n found file for $circleid \n";
my $x;
my $log = $file;
my #a = split( "-", $file );
my $starttime = $a[3];
my $endtime = $starttime;
my $sdpid;
my $sdpid_value;
$starttime = "$filedate $starttime:00:00";
$endtime = "$filedate $endtime:59:59";
open( FH, "<", "$log" ) or die "cannot open < $log: $!";
while (<FH>) {
my $line = $_;
print "\n line is $line \n";
chomp($line);
$line =~ s/\s+$//;
my #a = split( ";", $line );
$sdpid = $a[4];
my $stat = $a[3];
$x->{$sdpid}->{$stat}++;
}
close(FH);
print "\n Dumper is x:" . Dumper($x) . "\n";
foreach my $sdpidvalue ( keys %{$x} ) {
print "\n sdpvalue us: $sdpidvalue \n";
if ( exists( $x->{$sdpidvalue}->{processed} ) ) {
$processed = $x->{$sdpidvalue}->{processed};
} else {
$processed = 0;
}
if ( exists( $x->{$sdpidvalue}->{discarded} ) ) {
$discarded = $x->{$sdpidvalue}->{discarded};
} else {
$discarded = 0;
}
my $sth_new1 = $dbh->prepare("select id from sdp_details where sdp_name='$sdpid' ");
print "\n sth new is " . Dumper($sth_new1);
$sth_new1->execute();
while ( my $row1 = $sth_new1->fetchrow_hashref ) {
$sdpid_value = $row1->{'id'};
print "\n in hash rowref from sdp_details table " . Dumper($sdpid_value);
}
my $sth_check
= $dbh->prepare(
"select processed,discarded from sdp_incoming_tps where circle_id='$circleidvalue' and sdp_id='$sdpid_value' and start_time='$starttime' and end_time='$endtime'"
);
print "\n Dumper for bhdatabase statement is " . Dumper($sth_check);
$sth_check->execute();
my $duplicate_row = 0;
my ( $success_, $failure_ );
while ( my $row_dup = $sth_check->fetchrow_hashref ) {
print "\n row_dup is " . Dumper($row_dup);
$duplicate_row = 1;
$success_ += $row_dup->{'processed'};
$failure_ += $row_dup->{'discarded'};
}
if ( $duplicate_row == 0 ) {
my $sth
= $dbh->prepare(
"insert into sdp_incoming_tps (id,circle_id,start_time,end_time,processed,discarded,sdp_id) select nextval('sdp_incoming_tps_id'),'$circleidvalue','$starttime','$endtime','$processed','$discarded','$sdpid_value' "
);
$sth->execute();
} else {
$success_ += $processed;
$failure_ += $discarded;
my $sth
= $dbh->prepare(
"update sdp_incoming_tps set processed=$success_,discarded=$failure_ where circle_id='$circleidvalue' and sdp_id='$sdpid_value' and start_time='$starttime' and end_time='$endtime'"
);
$sth->execute();
}
# my $file_csv_path = "/opt/offline/reports/$circleid";
# my %sdp_hash = ();
# if ( -d "$file_csv_path" ) {
# } else {
# mkdir( "$file_csv_path", 0755 );
# }
# my $csv_new_file = $file_csv_path . "\/FDP_" . $circleid . "_SDPINCOMINGTPSREPORT_". $mday_ . "_" . $mon_ . "_" . $year_ . "\.csv";
print "\n file is $csv_new_file \n";
print "\n date:$year_-$mon_-$mday_ \n";
close(DATA);
open( DATA, ">>", $csv_new_file ) or die("cant open file : $! \n");
print "\n csv new file is $csv_new_file \n";
my $sth_new2 = $dbh->prepare("select * from sdp_details");
$sth_new2->execute();
while ( my $row1 = $sth_new2->fetchrow_hashref ) {
my $sdpid = $row1->{'id'};
$sdp_hash{$sdpid} = $row1->{'sdp_name'};
}
#print "\n resultant sdp hash".Dumper(%sdp_hash);
#$mon_="0".$mon_;
print "\n timestamp being matched is $year_-$mon_-$mday_ \n";
print "\n circle id value is $circleidvalue \n";
my $sth_new
= $dbh->prepare(
"select * from sdp_incoming_tps where date_trunc('day',start_time)='$year_-$mon_-$mday_' and circle_id='$circleidvalue'"
);
$sth_new->execute();
print "\n final db line is " . Dumper($sth_new);
my $str = $sth_new->{NAME};
my #str_arr = #$str;
shift(#str_arr);
shift(#str_arr);
my #upper = map { ucfirst($_) } #str_arr;
$upper[4] = "Sdp-Name";
my $st = join( ",", #upper );
$st = $st . "\n";
$st =~ s/\_/\-/g;
#print $fh "sep=,"; print $fh "\n";
print DATA $st;
while ( my $row = $sth_new->fetchrow_hashref ) {
print "\n found matching row \n";
my $row_line
= $row->{'start_time'} . ","
. $row->{'end_time'} . ","
. $row->{'processed'} . ","
. $row->{'discarded'} . ","
. $sdp_hash{ $row->{'sdp_id'} } . "\n";
print "\n row line matched is " . $row_line . "\n";
print DATA $row_line;
}
close(DATA);
}
} else {
next;
}
}
}
}
$dbh->disconnect;
Please help, how can I avoid this error.
Thanks in adv.

The immediate problem, as indicated by the error message, is that running all of those scripts at once requires more database connections than the server will allow. If they run fine individually, then running them individually will fix that.
The underlying problem is that your crontab is wrong. * 1 * * * will run all the scripts every minute from 0100 to 0159 each day. If they take more than one minute to complete, then a new set will start before the previous set completes, requiring an additional set of database connections, which will run through the pool of available connections rather quickly.
I assume that you only need to run your daily scripts once per day, not sixty times, so change that to 5 1 * * * to run them only once, at 0105.
If there's still an issue, run each one on a different minute (which is probably a good idea anyhow):
5 1 * * * /var/fdp/reportingscript/an_outgoing_tps_report.pl
10 1 * * * /var/fdp/reportingscript/an_processed_rule_report.pl
15 1 * * * /var/fdp/reportingscript/sdp_incoming_traffic_tps_report.pl
20 1 * * * /var/fdp/reportingscript/en_outgoing_tps_report.pl
25 1 * * * /var/fdp/reportingscript/en_processed_rule_report.pl
30 1 * * * /var/fdp/reportingscript/rs_incoming_traffic_report.pl
35 1 * * * /var/fdp/reportingscript/an_summary_report.pl
40 1 * * * /var/fdp/reportingscript/en_summary_report.pl
45 1 * * * /var/fdp/reportingscript/user_report.pl

Related

MySql Database profiling and checking its result using perl

I want to see how much time my query taking to execute using perl. Here is my work till now
#!/usr/bin/perl -w
use DBI;
use strict;
use warnings;
use diagnostics;
use POSIX qw(strftime);
my $driver = "mysql";
my $database = "employee";
my $dsn = "DBI:$driver:database=$database";
my $userid = "root";
my $password = "****";
my $sql_statement;
my $sth;
my $data;
my $row;
my $dbh = DBI->connect($dsn, $userid, $password ) or die $DBI::errstr;
my $foo = "SET profiling=on;"."\n"."SELECT * FROM employee;"."\n"."SHOW PROFILES;";
open my $fh, "<", \$foo;
binmode $fh, ":encoding(utf8)";
while ($sql_statement = <$fh>)
{
$data = $dbh->selectall_arrayref($sql_statement) or die "issue is : $dbh->errstr";
}
my $retirement_date;
if (scalar #$data)
{
$retirement_date = strftime( "%F %H:%M:%S", localtime(time() - #$data[0]));
}
print $retirement_date;
but I am not getting any result.
expected output should be like this
5 15.86296022 SELECT * from employee;
where 5 is query id, 15.86296022 is execution time and last query.
Please suggest how to achieve this.
You're not getting any result because you're misunderstanding the data structure you're getting back from selectall_arrayref(). It's an arrayref of arrayrefs - so you need to look two levels deep in it in order to get the actual data.
I replaced the second part of your code with this:
while ($sql_statement = <$fh>) {
warn "$sql_statement\n";
$data = $dbh->selectall_arrayref($sql_statement) or die "issue is : $dbh->errstr";
print join(' | ', #$_), "\n" for #$data;
}
And when I run it, I get this:
$ perl dbprofile
SET profiling=on;
DBD::mysql::db selectall_arrayref failed: fetch() without execute() at dbprofile line 25, <$fh> line 1.
SELECT * FROM employee;
1 | Fred | 2020-09-09
2 | Bill | 2035-06-23
3 | Jane | 2058-03-11
SHOW PROFILES;
1 | 0.00027717 | SELECT * FROM employee
I'm not sure what's going on with the "fetch() without execute()" error and I don't have time to investigate it now. But you can see all of your data - including the profiling information.
Please see if following demo code provides desired output
#!/usr/bin/perl
#
# vim: ai:ts=4:sw=4
#
use strict;
use warnings;
use feature 'say';
use DBI;
use Config::General;
my($dsn,$dbh,$sth,$query,$rv);
my $conf = Config::General->new( "$ENV{HOME}/.my.cnf" );
my %config = $conf->getall;
# Keep password inside code
$config{password} = 'your_password_here';
$dsn = "DBI:mysql:database=$config{database};host=$config{host};port=$config{port}";
$dbh = DBI->connect($dsn, $config{user}, $config{password},
{
RaiseError => 1,
mysql_enable_utf8 => 1
});
$query = "SET PROFILING=on";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
my $db_table = 'your_db_table_here';
$query = "SELECT * FROM $db_table";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
$query = "SHOW PROFILES";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
while( my $row = $sth->fetchrow_arrayref ) {
say join("\t",#{$row});
}
$query = "SET PROFILING=off";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
$sth->finish();
$dbh->disconnect();
Sample of $ENV{HOME}/.my.cnf file
[mysql]
host=yout_db_host_name
user=your_db_user_name
database=your_db_name
default-character-set=utf8
port=3306
Sample of output
1 0.0014745 SELECT * FROM new_movies

Can we use regex with cleartool commands?

I am using below command to retrieve baselines.
cleartool lsbl -fmt "%n\n" -comp comp_name#\vob_name -stream stream_name#\vob_name
I am searching for a way to display baselines which is numerically equal to/ less than certain given baseline. Is there any way to achieve it?
Case 1 : If output is
abc_6.2168
abc_7.4587
abc_8.2950
abc_9.3032
If I want to display baseline which is numerically equal to / less (and closest) to abc_8. Hence, the expected result in Case 1 should be : abc_8.2950.
Case 2 : If output is
abc_6.2168
abc_7.4587
abc_9.3032
Expected result should be : abc_7.4587
NOTE : Trying this on Groovy (Jenkins pipeline)
use strict;
use warnings;
use DBI;
my $bsl_find = $ARGV[0]; #baseline build package name
my $bsl;
my $c=0;
my $mat;
my $previous_str = q{};
my $final_baseline;
my $prev_num_count=1;
my $prev_num_len=1;
my $split_strng;
my $baseline_var = q{};
my $baseline_file;
my $all_baseline_file = $ARGV[1]; #file which contains the list of all retreived baselines as per ARGV[0]
my $app = $ARGV[2]; #the name of the application for which baseline is to be selected
my $filename = 'D:\\baseline_'.$app.'\\'.'new_'.$all_baseline_file.'.txt';
$baseline_file = 'D:\\baseline_'.$app.'\\'.'final_'.$all_baseline_file.'.txt';
$all_baseline_file = 'D:\\baseline_'.$app.'\\'.$all_baseline_file.'.txt';
open(my $fh, '<:encoding(UTF-8)',$filename)
or die "Could not open file '$filename' $!";
while (my $strng = <$fh>) {
chomp $strng;
#print "The line is : $strng \n";
$strng=~ s/^\s+|\s+$//;
#print " \n strng after trim is $strng.";
my $num_count = (split '_', $strng)[-1];
my $num_count_bsl_param = (split '_', $bsl_find)[-1];
my $num_len = length ($num_count);
my $num_len_bsl_param = length ($num_count_bsl_param);
my $a = substr($bsl_find, -$num_len_bsl_param);
my $b = substr($strng, -$num_len);
$split_strng = '_'.$a;
my ($substrng) = split /$split_strng/, $bsl_find;
if ($substrng =~ m/([^\_]+)$/)
{
$substrng=$1;
}
if ( ($a == $b) && (index($strng, $substrng) != -1) )
{
print "\n Match found";
$mat = $strng;
print "\n baseline found is : $mat";
$final_baseline = $mat;
print "\n final bsl is $bsl_find";
$baseline_var = $strng;
#exit 0;
goto label;
}
elsif ( ($a < $b) && (index($strng, $substrng) != -1) )
{
if ( (grep{/$bsl_find/} $filename) && ($previous_str eq "") ){
print "\n final baseline decided : $bsl_find";
$baseline_var = $bsl_find;
goto label;
}
elsif ( ($previous_str ne "") )
{
print "\n final baseline is ...: $previous_str";
$baseline_var = $previous_str;
goto label;
}
}
elsif ( ($a < $b) && ($previous_str ne "") && (index($strng, $substrng) != -1) )
{
if ( ($a > $c) && (index($previous_str, $substrng) != -1) )
{
print "\n baseline found is : $previous_str";
$final_baseline = $previous_str;
print " \n final is $final_baseline";
$baseline_var = $previous_str;
goto label;
}
}
elsif ( ($a < $b) && (index($bsl_find, $substrng) != -1) && ($previous_str ne "") && (index($previous_str, $substrng) == -1) )
{
print "\n Baseline not found of type $bsl_find.... final baseline is : $previous_str";
$baseline_var = $previous_str;
goto label;
}
close(fh);
}
if ($baseline_var eq "")
{
open my $fh ,"<",$filename;
my $last_line;
$last_line = $_,while (<$fh>);
print $last_line;
print " \n Baseline is $last_line";
$baseline_var = $last_line;
goto label;
close(fh);
}
label:
print " \n\n Writing $baseline_var to $baseline_file...";
#$baseline_var = $baseline_var.'.';
$baseline_var=~ s/^\s+|\s+$//;
print " \n \n baseline_var is $baseline_var. ";
unlink $baseline_file;
open(my $fh, '<:encoding(UTF-8)',$all_baseline_file)
or die "Could not open file '$all_baseline_file' $!";
while (my $word = <$fh>) {
chomp $word;
#print "\n word is $word.";
if ( $word =~ /\./ )
{
if( $word =~ m/$baseline_var\./ )
{
print "\n found $baseline_var. in $word";
open(FH1, '>', $baseline_file) or die $!;
print FH1 "$word";
}
}
else
{
if( $word eq $baseline_var )
{
print "\n found $baseline_var. in $word";
open(FH1, '>', $baseline_file) or die $!;
print FH1 "$word";
}
}
close(fh);
}
close(FH1);
}
Is there any way to achieve it?
Not with ClearCase/cleartool alone, which means you need to parse its output, and that depends on your OS/shell.
Something like, in Windows CMD shell, with Git For Windows shell in its path:
cleartool lsbl ... | sort -V |awk 'BEGIN{a=$0;FS="._"}$2 ^< 9{print $0;}'|tail -1
(the ^< is needed to escape the <, and prevent the CMD to interpret that as a redirection)

How to provide default output

I'm trying to get the following Perl program to print a default value, if no result is returned from a query. I am not sure how to do that.
When I run the script, I get the error: Can't call method "country_name" on an undefined value. Are there Any ideas on how I can change the program to print a default value, if no results are returned?
#!/usr/bin/perl update IP addresses with country
use strict;
use warnings;
use Net::IPInfoDB;
my $psql = "/usr/local/pgsql/current/bin/psql";
my $db = 'cpi';
my $args = "-U postgres -qc";
my $date = `/bin/date +\%y\%m\%d%H`;
my $reportfile = "/tmp/multiiplogins-$date";
my $sendmail = "/usr/sbin/sendmail -t -fcpi\#user.com";
my $mailsubject = "Login Report";
my $mailto = 'email#user.com';
my $query = "SELECT userid, login, email, logins, ips FROM (SELECT userid,login,email, count(userid) AS logins, count(ipaddr) AS ips FROM (SELECT l.userid, u.login, u.email, l.ipaddr FROM synloginaccess l, synusers u$
my $query2 = "SELECT l.userid, login, email, ipaddr FROM synloginaccess l, synusers u where l.accesstime > (now() - interval '24 hours') and l.type=2 and l.userid=u.userid ORDER BY l.userid;";
open (REPORT, ">$reportfile");
my $command = qq/$psql $db $args "$query"/;
my $command2 = qq/$psql $db $args "$query2"/;
my $result = `$command`;
my $result2 = `$command2`;
my $g = Net::IPInfoDB->new;
$g->key("api_key");
#we split $login into an array, line-by-line
my #lines = split("\n", $result2);
for my $line (#lines) {
#now we iterate through every line one-by-one
if ($line =~ /(?<ip>\d+\.\d+\.\d+\.\d+)/) {
my $city = $g->get_city("$1");
my $addr = $g->get_country("$1");
print "$line " . "| " . "\t" . $city->city_name . ", " . $addr->country_name ."\n";
print REPORT "$line " . "| " . "\t" . $city->city_name . ", ". $addr->country_name ."\n";
}
else {
print "$line \n ";
}
}
close REPORT;
mailReport();
sub mailReport{
#mail it
open(MAIL, "|$sendmail");
print MAIL "To: $mailto\n";
print MAIL "Subject: $mailsubject\n";
print MAIL "\n";
open (INFILE, "$reportfile");
my #contents = <INFILE>;
my $line;`
$addr ? $addr->country_name : "default"

Perl - Splitting a string

I'm doing an Array that contents each word of a phrase. When I try to split it and print the length then the console gives me an enormous number such as 111039391231319239188238139123919232913123... (more lines)
why?
Here's my code:
$mynames = $texto3;
print $mynames. "\n";
#nameList = split(' ', $texto3);
#print #nameList.length();
for ($to = 0; $to<#nameList.length; $to++){
if($to<#nameList.length) {
#nameList[$to] = #nameList[$to] . "_" . #nameList[$to++];
}
print $to;
#print #nameList[$to] . "\n";
}
$string_level2 = join(' ', #nameList);
#print $string_level2;
To get the length of an array use scalar #nameList instead of #nameList.length.
A typical for-loop uses the less-than operator when counting up, e.g.:
for ( $to = 0; $to < scalar(#nameList); $to++ ) ...
You should never use a post-increment unless you understand the side effects. I believe the following line:
#nameList[$to] = #nameList[$to] . "_" . #nameList[$to++];
... should be written as ...
$nameList[$to] = $nameList[$to] . "_" . $nameList[$to + 1];
Finally the comparison you use should account for the boundary condition (because you refer to $to + 1 inside the loop):
if( $to < (scalar(#nameList) - 1) ) {
$nameList[ $to ] = $nameList[ $to ] . "_" . $nameList[ $to + 1 ];
}

Need help collapsing a list and obtaining totals in perl

Hi I have a large list of data:
http://paste-it.net/public/y17027d/
It is 67859 rows by 10 columns. The 6th column contains values that represent Z-Scores from 1 to 6 in .01 increments. What I would like to do is to total all of the other column values that have the same Z-score value, but my current code is not working.
What I have now prints out values but the totals for each Z-Score are incorrect.
Here is my code:
#! /usr/bin/perl
use strict;
use warnings;
use POSIX;
use Data::Dumper;
my $input = $ARGV[0];
open (DATAFILE, $input) or die $!;
open(OUT,">>"."final.output.txt");
my($line,$fMeasure,$filename,$recall,$precision,$z_score,$computer_calls,$johns_calls,$false_negatives,$false_positives,$true_positives,$count);
$fMeasure=$filename=$recall=$precision=$z_score=$computer_calls=$johns_calls=$false_negatives=$false_positives=$true_positives=$count = 0;
my %stats=();
my %zscore=();
while($line = <DATAFILE>){
# Chop off new line character, skip the comments and empty lines.
chomp($line);
my #temp = split(/\t/, $line);
$true_positives = $temp[0];
$false_positives = $temp[1];
$false_negatives = $temp[2];
$johns_calls = $temp[3];
$computer_calls = $temp[4];
$z_score = $temp[5];
$fMeasure = $temp[6];
$precision = $temp[7];
$recall = $temp[8];
$filename = $temp[9];
$stats{$z_score}{$filename}[0] = $true_positives;
$stats{$z_score}{$filename}[1] = $false_positives;
$stats{$z_score}{$filename}[2] = $johns_calls;
$stats{$z_score}{$filename}[3] = $computer_calls;
$stats{$z_score}{$filename}[4] = $fMeasure;
$stats{$z_score}{$filename}[5] = $precision;
$stats{$z_score}{$filename}[6] = $recall;
$stats{$z_score}{$filename}[6] = $filename;
$zscore{$z_score}++;
}
my $false_positives_new = 0;
my $true_positives_new = 0;
my $johns_calls_new = 0;
my $computer_calls_new = 0;
my $file_name = 0;
foreach $z_score ( sort keys %stats ) {
foreach $filename( keys %{$stats{$z_score}} ){
my $tp = $stats{$z_score}{$filename}[0];
my $fp = $stats{$z_score}{$filename}[1];
my $jc = $stats{$z_score}{$filename}[2];
my $cc = $stats{$z_score}{$filename}[3];
my $fn = $stats{$z_score}{$filename}[6];
#print "$z_score\t$jc\n";
$false_positives_new = $false_positives + $fp;
$true_positives_new = $true_positives + $tp;
$johns_calls_new = $johns_calls + $jc;
$computer_calls_new = $computer_calls + $cc;
#print OUT "$fn\n";
}
print OUT"$true_positives_new\t$false_positives_new\t$johns_calls_new\t$computer_calls_new\t$z_score \n";
$false_positives_new = 0;
$true_positives_new = 0;
$johns_calls_new = 0;
$computer_calls_new = 0;
$file_name = 0;
}
close(OUT);
close (DATAFILE);
I know that I must be doing something wrong but I am not able to figure out what. Any help would be greatly appreciated. Thank you
OK. I was able to get the data from pastebin and I think the following code does what you want.
#! /usr/bin/perl
use strict; use warnings;
use Data::Dumper;
my ($input) = #ARGV;
open my $DATAFILE, '<', $input
or die "Cannot open '$input': $!";
my #field_names = qw(
fMeasure
recall
precision
z_score
computer_calls
johns_calls
false_negatives
false_positives
true_positives
count
);
my #track_fields = qw(
false_positives
false_negatives
johns_calls
computer_calls
);
my (%stats, %by_zscore);
while ( my $line = <$DATAFILE> ) {
last unless $line =~ /\S/;
chomp $line;
my #temp = split /\t/, $line;
my $filename = pop #temp;
my %fields;
#fields{ #field_names } = #temp;
my $z_score = $fields{z_score};
$stats{ $z_score }{$filename} = \#temp;
for my $f ( #track_fields ) {
$by_zscore{$z_score}{ $f } += $fields{ $f };
}
}
print Dumper \%by_zscore;
I think you want to say
$false_positives_new = $false_positives_new + $fp;
# etc.
instead of
$false_positives_new = $false_positives + $fp;

Resources