MySql Database profiling and checking its result using perl - database

I want to see how much time my query taking to execute using perl. Here is my work till now
#!/usr/bin/perl -w
use DBI;
use strict;
use warnings;
use diagnostics;
use POSIX qw(strftime);
my $driver = "mysql";
my $database = "employee";
my $dsn = "DBI:$driver:database=$database";
my $userid = "root";
my $password = "****";
my $sql_statement;
my $sth;
my $data;
my $row;
my $dbh = DBI->connect($dsn, $userid, $password ) or die $DBI::errstr;
my $foo = "SET profiling=on;"."\n"."SELECT * FROM employee;"."\n"."SHOW PROFILES;";
open my $fh, "<", \$foo;
binmode $fh, ":encoding(utf8)";
while ($sql_statement = <$fh>)
{
$data = $dbh->selectall_arrayref($sql_statement) or die "issue is : $dbh->errstr";
}
my $retirement_date;
if (scalar #$data)
{
$retirement_date = strftime( "%F %H:%M:%S", localtime(time() - #$data[0]));
}
print $retirement_date;
but I am not getting any result.
expected output should be like this
5 15.86296022 SELECT * from employee;
where 5 is query id, 15.86296022 is execution time and last query.
Please suggest how to achieve this.

You're not getting any result because you're misunderstanding the data structure you're getting back from selectall_arrayref(). It's an arrayref of arrayrefs - so you need to look two levels deep in it in order to get the actual data.
I replaced the second part of your code with this:
while ($sql_statement = <$fh>) {
warn "$sql_statement\n";
$data = $dbh->selectall_arrayref($sql_statement) or die "issue is : $dbh->errstr";
print join(' | ', #$_), "\n" for #$data;
}
And when I run it, I get this:
$ perl dbprofile
SET profiling=on;
DBD::mysql::db selectall_arrayref failed: fetch() without execute() at dbprofile line 25, <$fh> line 1.
SELECT * FROM employee;
1 | Fred | 2020-09-09
2 | Bill | 2035-06-23
3 | Jane | 2058-03-11
SHOW PROFILES;
1 | 0.00027717 | SELECT * FROM employee
I'm not sure what's going on with the "fetch() without execute()" error and I don't have time to investigate it now. But you can see all of your data - including the profiling information.

Please see if following demo code provides desired output
#!/usr/bin/perl
#
# vim: ai:ts=4:sw=4
#
use strict;
use warnings;
use feature 'say';
use DBI;
use Config::General;
my($dsn,$dbh,$sth,$query,$rv);
my $conf = Config::General->new( "$ENV{HOME}/.my.cnf" );
my %config = $conf->getall;
# Keep password inside code
$config{password} = 'your_password_here';
$dsn = "DBI:mysql:database=$config{database};host=$config{host};port=$config{port}";
$dbh = DBI->connect($dsn, $config{user}, $config{password},
{
RaiseError => 1,
mysql_enable_utf8 => 1
});
$query = "SET PROFILING=on";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
my $db_table = 'your_db_table_here';
$query = "SELECT * FROM $db_table";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
$query = "SHOW PROFILES";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
while( my $row = $sth->fetchrow_arrayref ) {
say join("\t",#{$row});
}
$query = "SET PROFILING=off";
$sth = $dbh->prepare($query) or die $dbh->errstr;
$rv = $sth->execute() or die $dbh->errstr;
#say $DBI::errstr if $rv < 0;
$sth->finish();
$dbh->disconnect();
Sample of $ENV{HOME}/.my.cnf file
[mysql]
host=yout_db_host_name
user=your_db_user_name
database=your_db_name
default-character-set=utf8
port=3306
Sample of output
1 0.0014745 SELECT * FROM new_movies

Related

Database connection script crashes after 4 successful connections

I need to query a group of SAP sybase databases for some information and print that as a comma spearated list. So I figure I write a perl script that connects to any of those databases via DBI module. Here is what I came up with.
my $user = "someuser";
my $passwd = "somepassword";
my #sids=(filled with DB identifiers);
my $output="";
my $size;
my $version;
my $id;
my $dsn;
my $dbh;
my $sid;
my #row;
my $sth1;
my $sth2;
foreach $sid (#sids) {
print $sid."\n";
$dsn = "dbi:Sybase:server=$sid;charset=iso_1;tdsLevel=CS_TDS_50";
print $dsn."\n";
$dbh = DBI->connect($dsn, $user, $passwd,{ PrintError => 0,RaiseError => 0, AutoCommit => 1, syb_enable_utf8 => 1});
print "DBI OK\n" if defined ($dbh);
$sth1 = $dbh->prepare('select SUM(size) from master..sysusages WHERE dbid = 4 AND segmap = 3');
$sth2 = $dbh->prepare('select ##version');
$sth1->execute;
while (#row = $sth1->fetchrow) {
$size = $row[0];
}
$size = $size * 16 / 1024;
$sth1->finish;
$sth2->execute;
while (#row = $sth2->fetchrow) {
$version = $row[0];
}
$sth2->finish;
$output = $sid.",".$size.",".$version;
$dbh->disconnect;
print $output."\n";
}
When I execute this, it crashes after 4th iteration, because the connection handle is not set. So the connection of the fifth DB does not work anymore.
Can't call method "prepare" on an undefined value at ./check_sybasedbs.pl line 36.
Line 36 is the preparation of statement 1.
I tried putting sleep commands at various positions. I also tried to explicitly clean up the variables that are reused via undef. Now I am out of ideas and would really appreciate your input.
Your code could be written as sample below (please see if ... else ... block for $dbh)
use strict;
use warnings;
use feature 'say';
use DBI;
my($user, $passwd) = qw/someuser somepassword/;
my #sids = qw/server1 server2 ... server#/;
foreach my $sid (#sids) {
my $dsn = "dbi:Sybase:server=$sid;charset=iso_1;tdsLevel=CS_TDS_50";
say "DSN: $dsn";
my $dbh = DBI->connect($dsn, $user, $passwd, { PrintError => 1,
RaiseError => 1,
AutoCommit => 1,
syb_enable_utf8 => 1
}
);
if( not defined ($dbh) ) {
say "WARNING: Could not connect to $dsn";
} else {
say "INFO: DB connection established";
my($size,$version);
my $query = 'SELECT
SUM(size)
FROM
master..sysusages
WHERE
dbid = 4
AND
segmap = 3
';
my $sth = $dbh->prepare($query);
$sth->execute;
while (#row = $sth->fetchrow) {
$size = $row[0];
}
$sth->finish;
$query = 'select ##version';
$sth = $dbh->prepare($query);
$sth->execute;
while (#row = $sth->fetchrow) {
$version = $row[0];
}
$sth->finish;
$dbh->disconnect;
$size = $size * 16 / 1024;
say "SID: $sid, SIZE: $size, VERSION: $version";
}
}
NOTE: use strict; use warnings; helps to avoid many pitfalls, use diagnostics; helps to identify a problem in difficult cases
NOTE: $sth->fetchrow_hashref allows address hash element by name, no need to count index of array as in case $sth->fetch_rowarray
in my naiv thinking I hid some lines of code that I was convinced could not be the reason for this misbehaviour. As it turns out, it was. So the reason for my problem was a simple logical error, that caused the password, that was used after a connect to a certain DB to be wrong.

How can I find the common items from two arrays fast?

I am trying to findy the common lines among two tab separated files based one field.
One line of the first file:
1 52854 s64199.1 A . . . PR GT 0/0
One line of the second file:
chr1 52854 . C T 215.302 . AB=0.692308;ABP=7.18621;AC=1;AF=0.5;AN=2;AO=9;CIGAR=1X;DP=13;DPB=13;DPRA=0;EPP=3.25157;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=17.5429;PAIRED=0;PAIREDR=0.25;PAO=0;PQA=0;PQR=0;PRO=0;QA=318;QR=138;RO=4;RPP=3.25157;RPPR=5.18177;RUN=1;SAF=0;SAP=22.5536;SAR=9;SRF=1;SRP=5.18177;SRR=3;TYPE=snp;technology.illumina=1;BVAR GT:DP:RO:QR:AO:QA:GL 0/1:13:4:138:9:318:-5,0,-5
Based on the second field (52854) in this example I have a much.
Here is my code which finds the common ones, but my files are quite large and takes a lot of time.
Is there any way to speed up the process?
Thank you very much in advance.
#!/app/languages/perl/5.14.2/bin/perl
use strict;
use warnings;
my $map_file = $ARGV[0];
my $vcf_file = $ARGV[1];
open my $map_info, $map_file or die "Could not open $map_file: $!";
my #map_array = ();
my #vcf_array = ();
while( my $mline = <$map_info>) {
chomp $mline;
my #data1 = split('\t', $mline);
my $pos1 = $data1[1];
push (#map_array, $pos1);
}
open my $vcf_info, $vcf_file or die "Could not open $vcf_file: $!";
while( my $line = <$vcf_info>) {
if ($line !~ m/^#/) {
push (#vcf_array, $line);
}
}
foreach my $a (#map_array) {
chomp $a;
foreach my $b (#vcf_array) {
chomp $b;
my #data = split('\t', $b);
my $pos2 = $data[1];
my $ref2 = $data[3];
my $allele = $data[4];
my $genotype = $data[9];
if ($a == $pos2) {
print $pos2 . "\t" . $ref2. "\t".$allele."\t".$genotype. "\n";
#print "$b\n";
}
}
}
Here's a version that should run much faster than your own
It reads the map file and stores each pos field in hash %wanted. Then it reads through the second file and checks whether the record is in the list of wanted values. If so then it splits the record and prints the fields you require
Note that I haven't been able to test this beyond making sure that it compiles
use strict;
use warnings;
use 5.010;
use autodie;
my ( $map_file, $vcf_file ) = #ARGV;
my %wanted;
{
open my $map_fh, '<', $map_file;
while ( <$map_fh> ) {
chomp;
my $pos = ( split /\t/, $_, 3 )[1];
++$wanted{$pos};
}
}
{
open my $vcf_fh, '<', $vcf_file;
while ( <$vcf_fh> ) {
next if /^#/;
chomp;
my $pos = ( split /\t/, $_, 3 )[1];
next unless $wanted{$pos};
my ( $ref, $allele, $genotype ) = ( split /\t/ )[3, 4, 9];
print join("\t", $pos, $ref, $allele, $genotype), "\n";
}
}
Below please find minimal modification of your script for hash based searches
use strict;
use warnings;
my $map_file = $ARGV[0];
my $vcf_file = $ARGV[1];
my %vcf_hash;
open( my $vcf_info, $vcf_file) or die "Could not open $vcf_file: $!";
while( my $line = <$vcf_info>) {
next if $line =~ m/^#/; # Skip comment lines
chomp $line;
my (#data) = split(/\t/, $line);
die unless #data >= 10; # Check number of fields in the input line
my ($pos) = $data[1];
# $. - line number in the file
$vcf_hash{$pos}{$.} = \#data;
}
open( my $map_info, $map_file) or die "Could not open $map_file: $!";
while( my $mline = <$map_info>) {
chomp $mline;
my (#data) = split(/\t/, $mline);
die unless #data >= 2; # Check number of fields in the input line
my ($pos) = $data[1];
if( exists $vcf_hash{$pos}) {
my $hash_ref = $vcf_hash{$pos};
for my $n (sort{$a<=>$b} keys %$hash_ref) {
my $array_ref = $hash_ref->{$n};
my $pos2 = $array_ref->[1];
my $ref2 = $array_ref->[3];
my $allele = $array_ref->[4];
my $genotype = $array_ref->[9];
print $pos2 . "\t" . $ref2. "\t".$allele."\t".$genotype. "\n";
}
}
}
The script may be improved further to reduce memory use if you use huge data files.
There is not a need to keep your map_file in memory but just keys. It is good to make them keys in a hash which you use for existence checking. You don't have to keep your vcf_file in memory as well, but you can just make a decision to output or not.
#!/app/languages/perl/5.14.2/bin/perl
use strict;
use warnings;
use autodie;
use constant KEY => 1;
use constant FIELDS => ( 1, 3, 4, 9 );
my ( $map_file, $vcf_file ) = #ARGV;
my %map;
{
my $fh;
open $fh, '<', $map_file;
while (<$fh>) {
$map{ ( split /\t/, $_, KEY + 2 )[KEY] } = undef;
}
}
{
my $fh;
open $fh, '<', $vcf_file;
while (<$fh>) {
next if /^#/;
chomp;
my #data = split /\t/;
print join "\t", #data[FIELDS] if exists $map{ $data[KEY] };
}
}

How to provide default output

I'm trying to get the following Perl program to print a default value, if no result is returned from a query. I am not sure how to do that.
When I run the script, I get the error: Can't call method "country_name" on an undefined value. Are there Any ideas on how I can change the program to print a default value, if no results are returned?
#!/usr/bin/perl update IP addresses with country
use strict;
use warnings;
use Net::IPInfoDB;
my $psql = "/usr/local/pgsql/current/bin/psql";
my $db = 'cpi';
my $args = "-U postgres -qc";
my $date = `/bin/date +\%y\%m\%d%H`;
my $reportfile = "/tmp/multiiplogins-$date";
my $sendmail = "/usr/sbin/sendmail -t -fcpi\#user.com";
my $mailsubject = "Login Report";
my $mailto = 'email#user.com';
my $query = "SELECT userid, login, email, logins, ips FROM (SELECT userid,login,email, count(userid) AS logins, count(ipaddr) AS ips FROM (SELECT l.userid, u.login, u.email, l.ipaddr FROM synloginaccess l, synusers u$
my $query2 = "SELECT l.userid, login, email, ipaddr FROM synloginaccess l, synusers u where l.accesstime > (now() - interval '24 hours') and l.type=2 and l.userid=u.userid ORDER BY l.userid;";
open (REPORT, ">$reportfile");
my $command = qq/$psql $db $args "$query"/;
my $command2 = qq/$psql $db $args "$query2"/;
my $result = `$command`;
my $result2 = `$command2`;
my $g = Net::IPInfoDB->new;
$g->key("api_key");
#we split $login into an array, line-by-line
my #lines = split("\n", $result2);
for my $line (#lines) {
#now we iterate through every line one-by-one
if ($line =~ /(?<ip>\d+\.\d+\.\d+\.\d+)/) {
my $city = $g->get_city("$1");
my $addr = $g->get_country("$1");
print "$line " . "| " . "\t" . $city->city_name . ", " . $addr->country_name ."\n";
print REPORT "$line " . "| " . "\t" . $city->city_name . ", ". $addr->country_name ."\n";
}
else {
print "$line \n ";
}
}
close REPORT;
mailReport();
sub mailReport{
#mail it
open(MAIL, "|$sendmail");
print MAIL "To: $mailto\n";
print MAIL "Subject: $mailsubject\n";
print MAIL "\n";
open (INFILE, "$reportfile");
my #contents = <INFILE>;
my $line;`
$addr ? $addr->country_name : "default"

Running a PL/SQL procedure in a Perl script

I have a Perl script which takes a file as input and has PL/SQL (for statements and DBMS_OUTPUT.PUT_LINE) in it. I need to run make a database connection and run that file in Perl script.The pl/sql has Begin declare end section,with for statements on it which is writing data of 3 columns separated using commas(DBMS_OUTPUT.PUT_LINE) I have shown what I have tried below. Is it correct?
my $dbh = DBI->connect( "dbi:Oracle:$db", $username, $passwd ) ||
die( $DBI::errstr . "\n" );
$dbh->{AutoCommit} = 0;
$dbh->{PrintError} = 1;
open FILE, $sql_file or die "Could not open file";
$query = '';
while($line = <FILE>) {
chomp($line);
$query .= $line;
}
my $sth = $dbh->prepare($query);
$sth->execute();
while ( my #row = $sth->fetchrow_array() ) {
foreach (#row) {
print "$_";
}
print "\n";
}
$sth->fetch(), $sth->fetchrow_*(), and friends all fetch records from a result set. In Oracle PL/SQL blocks don't normally return result sets. So calling $sth->fetchrow_array() after running a PL/SQL block won't return any results.
If your using DBMS_OUTPUT.PUT_LINE to output results you will need to use the dbms_output_* functions provided by DBD::Oracle.
my $dbms_output_byte_limit = 1000000;
$dbh->func( $dbms_output_byte_limit, 'dbms_output_enable' );
my $sth = $dbh->prepare($query);
$sth->execute();
my #text = $dbh->func( 'dbms_output_get' );

Need help collapsing a list and obtaining totals in perl

Hi I have a large list of data:
http://paste-it.net/public/y17027d/
It is 67859 rows by 10 columns. The 6th column contains values that represent Z-Scores from 1 to 6 in .01 increments. What I would like to do is to total all of the other column values that have the same Z-score value, but my current code is not working.
What I have now prints out values but the totals for each Z-Score are incorrect.
Here is my code:
#! /usr/bin/perl
use strict;
use warnings;
use POSIX;
use Data::Dumper;
my $input = $ARGV[0];
open (DATAFILE, $input) or die $!;
open(OUT,">>"."final.output.txt");
my($line,$fMeasure,$filename,$recall,$precision,$z_score,$computer_calls,$johns_calls,$false_negatives,$false_positives,$true_positives,$count);
$fMeasure=$filename=$recall=$precision=$z_score=$computer_calls=$johns_calls=$false_negatives=$false_positives=$true_positives=$count = 0;
my %stats=();
my %zscore=();
while($line = <DATAFILE>){
# Chop off new line character, skip the comments and empty lines.
chomp($line);
my #temp = split(/\t/, $line);
$true_positives = $temp[0];
$false_positives = $temp[1];
$false_negatives = $temp[2];
$johns_calls = $temp[3];
$computer_calls = $temp[4];
$z_score = $temp[5];
$fMeasure = $temp[6];
$precision = $temp[7];
$recall = $temp[8];
$filename = $temp[9];
$stats{$z_score}{$filename}[0] = $true_positives;
$stats{$z_score}{$filename}[1] = $false_positives;
$stats{$z_score}{$filename}[2] = $johns_calls;
$stats{$z_score}{$filename}[3] = $computer_calls;
$stats{$z_score}{$filename}[4] = $fMeasure;
$stats{$z_score}{$filename}[5] = $precision;
$stats{$z_score}{$filename}[6] = $recall;
$stats{$z_score}{$filename}[6] = $filename;
$zscore{$z_score}++;
}
my $false_positives_new = 0;
my $true_positives_new = 0;
my $johns_calls_new = 0;
my $computer_calls_new = 0;
my $file_name = 0;
foreach $z_score ( sort keys %stats ) {
foreach $filename( keys %{$stats{$z_score}} ){
my $tp = $stats{$z_score}{$filename}[0];
my $fp = $stats{$z_score}{$filename}[1];
my $jc = $stats{$z_score}{$filename}[2];
my $cc = $stats{$z_score}{$filename}[3];
my $fn = $stats{$z_score}{$filename}[6];
#print "$z_score\t$jc\n";
$false_positives_new = $false_positives + $fp;
$true_positives_new = $true_positives + $tp;
$johns_calls_new = $johns_calls + $jc;
$computer_calls_new = $computer_calls + $cc;
#print OUT "$fn\n";
}
print OUT"$true_positives_new\t$false_positives_new\t$johns_calls_new\t$computer_calls_new\t$z_score \n";
$false_positives_new = 0;
$true_positives_new = 0;
$johns_calls_new = 0;
$computer_calls_new = 0;
$file_name = 0;
}
close(OUT);
close (DATAFILE);
I know that I must be doing something wrong but I am not able to figure out what. Any help would be greatly appreciated. Thank you
OK. I was able to get the data from pastebin and I think the following code does what you want.
#! /usr/bin/perl
use strict; use warnings;
use Data::Dumper;
my ($input) = #ARGV;
open my $DATAFILE, '<', $input
or die "Cannot open '$input': $!";
my #field_names = qw(
fMeasure
recall
precision
z_score
computer_calls
johns_calls
false_negatives
false_positives
true_positives
count
);
my #track_fields = qw(
false_positives
false_negatives
johns_calls
computer_calls
);
my (%stats, %by_zscore);
while ( my $line = <$DATAFILE> ) {
last unless $line =~ /\S/;
chomp $line;
my #temp = split /\t/, $line;
my $filename = pop #temp;
my %fields;
#fields{ #field_names } = #temp;
my $z_score = $fields{z_score};
$stats{ $z_score }{$filename} = \#temp;
for my $f ( #track_fields ) {
$by_zscore{$z_score}{ $f } += $fields{ $f };
}
}
print Dumper \%by_zscore;
I think you want to say
$false_positives_new = $false_positives_new + $fp;
# etc.
instead of
$false_positives_new = $false_positives + $fp;

Resources