Perl: best way to pass array of hash to a sub - arrays

My class looks like the below:
package CSVKeepCols;
use strict;
use warnings;
use Text::CSV;
use Data::Dumper;
my $text;
my $del;
my #cols;
my $output = '';
sub load {
my $class = shift;
my $self = {};
bless $self;
return $self;
}
sub input {
my $class = shift;
$text = shift;
return $class;
}
sub setOpts {
my ($class, $opts) = #_;
$del = $opts->{'delimeter'};
#cols = $opts->{'columns'};
}
sub process {
my #lines = split /\n|\r|\n\r|\r\n/, $text;
my $csv = Text::CSV->new({ sep_char => $del });
foreach (#lines) {
die('Invalid CSV data') if !$csv->parse($_);
$output .= __filterFields($csv->fields()) . "\n";
}
}
sub output {
return $output;
}
sub __filterFields {
my #fields = #_;
my $line = '';
foreach (#cols) {
$line .= ',' if $line;
$line .= $fields[$_];
}
return $line;
}
1;
I am using this class from my code like this:
$parser = load CSVKeepCols();
$parser->input($out);
$parser->setOpts({'delimeter' => ',', 'columns' => [1,2]});
$parser->process();
$out = $parser->output();
I am expecting, the setOpts subroutine will take the hash {'delimeter' => ',', 'columns' => [1,2]} and from there it will set the value of $delto , and #cols to (1,2) so that I can loop through the #cols array.
However, when I try to loop through #cols in the __filterFields subroutine I get error
Use of reference "ARRAY(0x22e32e0)" as array index at CSVKeepCols.pm line 52.
How do I fix this?

In setOpts, you set #cols = $opts->{columns};
$opts->{columns} contains a reference to an array ( [1,2] ).
So in __filterFields:
for ( #cols ){
# $_ is an arrayref [1,2]
# you are using it as an index to retrieve a value from #fields
$line .= $fields[$_];
# Thus the error: "use of reference ARRAY"..." as array index"
# You should be using an integer here.
}
To fix it:
sub setOpts {
# ...
#cols = #{ $opts->{columns} };
}
Edit: removed unnecessary check

Related

filter file by unique and biggest value; combine two arrays into hash

I need to extract by unique genus (first part of the name of species) in one column but with by biggest number in another column in a CSV file when having multiples of the same name.
So if have multiple genus (same first name) then take the biggest number in the last column to select which will represent that genus.
I have extracted the information into arrays, but I am having trouble with combining the two in order to select. I was using
https://perlmaven.com/unique-values-in-an-array-in-perl
to help but I need to include biggest number in last column when have the same genus situation.
use strict;
use warnings;
open taxa_fh, '<', "$ARGV[0]" or die qq{Failed to open "$ARGV[0]" for input: $!\n};
open match_fh, ">$ARGV[0]_genusLongestLEN.csv" or die qq{Failed to open for output: $!\n};my #unique;
my %seen;
my %hash;
while ( my $line = <taxa_fh> ) {
chomp( $line );
my #parts = split( /,/, $line );
my #name = split( / /, $parts[3]);
my #A = $name[0];
my #B = $parts[5];
#seen{#A} = ();
my #merged = (#A, grep{!exists $seen{$_}} #B);
my #merged = (#A, #B);
#hash{#A} = #B;
print "$line\n";
}
close taxa_fh;
close match_fh;
Input example:
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB181888.1.1758,AB181888.1.1758,281609,Protoperidinium crassipes,0,1700
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
AB181892.1.1738,AB181892.1.1738,281611,Protoperidinium divergens,0,1800
AB181894.1.1744,AB181894.1.1744,281612,Protoperidinium leonis,0,1500
AB181899.1.1746,AB181899.1.1746,281613,Protoperidinium pallidum,0,1600
AB181902.1.1741,AB181902.1.1741,261845,Protoperidinium pellucidum,0,1750
AB181904.1.1734,AB181904.1.1734,281614,Protoperidinium punctulatum,0,1599
AB181907.1.1687,AB181907.1.1687,281615,Protoperidinium thorianum,0,1600
AB120001.1.1725,AB120001.1.1725,244960,Gyrodinium spirale,0,1500
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB120003.1.1724,AB120003.1.1724,244962,Gyrodinium rubrum,0,1700
AB120004.1.1723,AB120004.1.1723,244963,Gyrodinium helveticum,0,1500
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
Wanted output:
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({
auto_diag => 2,
binary => 1,
quote_space => 0,
});
my %by_genus;
while ( my $row = $csv->getline(\*ARGV) ) {
my ($genus) = split(' ', $row->[3]);
$by_genus{$genus} = $row
if !$by_genus{$genus}
|| $row->[5] > $by_genus{$genus}[5];
}
$csv->say(select(), $_) for values(%by_genus);
Properly naming the variables makes the code more readable:
#! /usr/bin/perl
use warnings;
use strict;
my %selected;
while (<>) {
my ($species, $value) = (split /,/)[3, 5];
my $genus = (split ' ', $species)[0];
if ($value > ($selected{$genus}{max} || 0)) {
$selected{$genus}{max} = $value;
$selected{$genus}{line} = $_;
}
}
for my $genus (keys %selected) {
print $selected{$genus}{line};
}
The order of the output lines is random.
You can this Perl command line as well
perl -F, -lane ' ($g=$F[3])=~s/(^\S+).*/$1/; if( $mx{$g}<$F[-1])
{ $kv{$g}=$_;$mx{$g}=$F[-1] } END { print $kv{$_} for(keys %kv) } ' file
with the given inputs in cara.txt file, the output is
$ perl -F, -lane ' ($g=$F[3])=~s/(^\S+).*/$1/; if( $mx{$g}<$F[-1])
{ $kv{$g}=$_;$mx{$g}=$F[-1] } END { print $kv{$_} for(keys %kv) } ' cara.txt
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
$
Not fancy but gets the job done
#!/usr/bin/perl
use strict;
my #data = `cat /var/tmp/test.in`;
my %genuses = ();
foreach my $line ( #data ) {
chomp($line);
my #splitline = split(',', $line);
my $genus = $splitline[3];
my $num = $splitline[5];
my ( $name, $extra ) = split(' ', $genus);
if ( exists $genuses{$name}->{'num'} ) {
if ( $genuses{$name}->{'num'} < $num ) {
$genuses{$name}->{'num'} = $num;
$genuses{$name}->{'line'} = $line;
}
else {
next;
}
}
else {
$genuses{$name}->{'num'} = $num;
$genuses{$name}->{'line'} = $line;
}
}
foreach my $genus ( %genuses ) {
print "$genuses{$genus}->{'line'}";
print "\n";
}
Output:
[root#localhost tmp]# ./test.pl
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
Don't see an obvious method that you are sorting your output by

Possible to assign count to array? like #content.$i?

my csv input file
Chapter,Content
A,1
B,3
C,1
C,2
C,3
D,5
My current perl script
open(INFILE,$input)||die "can't open the file";
#line = (split/,/,$_);
#line_last = (split/\n/,$line[1]);
if ($_ =~ /A/){
push #con1, $line[1];
}
elsif ($_ =~ /B/){
push #con2, $line[1];
}
elsif ($_ =~ /C/){
push #con3, $line[1];
}
elsif ($_ =~ /D/){
push #con4, $line[1];
}
close INFILE;
chomp #con1, #con2, #con3, #con4;
print "content =", (join ", ", #con1),"\n";
print "content =", (join ", ", #con2),"\n";
print "content =", (join ", ", #con3),"\n";
print "content =", (join ", ", #con4),"\n";
My current code can work but I wanna make my code shorter & more efficient.
How do I make #con more automate? something like using For loop & use $i to assign to #con to make overall code shorter.
Why it's stupid to `use a variable as a variable name'.
You should use an AoA instead.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my #foos = qw( A B C D );
my $num_eles = 0;
my %index_by_foo = map { $_ => $num_eles++ } #foos;
my #cons_by_foo = map { [] } 1..$num_eles;
while (my $row = $csv->getline($fh)) {
if ( my $i = $index_by_foo{ $row->[0] } ) {
push #{ $cons_by_foo[$i] }, $row->[1];
}
}
for my $i (1..$num_eles-1) {
print("content =", join(", ", #{ $cons_by_foo[$i] }), "\n");
}
That said, a HoA seems a far better fit.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my %cons_by_foo;
while (my $row = $csv->getline($fh)) {
push #{ $cons_by_foo{ $row->[0] } }, $row->[1];
}
for my $foo (sort keys %cons_by_foo) {
print("content $foo =", join(", ", #{ $cons_by_foo{$foo} }), "\n");
}
This is what a hash is great for IMO (hashtable or dictionary in other languages).
note: I haven't tested this code, it's from memory.
my %con;
while (<INFILE>)
{
my #line_last = (split/\n/,$line[1]);
$con{$line[0]} = $con{$line[0]} + ", " + $line[1];
}
foreach my $str (keys sort %con)
{
print "content $con{$str}\n";
}
edit: much better and tested code here.
open(INFILE,"input.txt")||die "can't open the file";
my %con;
while (<INFILE>)
{
next if /^Chapter/;
chomp;
my #line = (split ',' , $_);
push #{$con{$line[0]}}, $line[1];
}
foreach my $str (sort keys %con)
{
my $tmp = join ', ', #{$con{$str}};
print "content $str = $tmp\n";
}
output is:
content A = 1
content B = 3
content C = 1,2,3
content D = 5
I know that isn't your output, but it seems more useful than what you had.
my %cc; #chapter contents
open my $FH, $filename or die "Cannot open $filename";
push #{ $cc{ $$_[0] } }, $$_[1] for map {chomp;[split/,/]} <$FH>;
close $FH;
print "Chapter = $_ Content = " . join(", ", #{ $cc{$_} }) . "\n" for sort keys %cc;
Output:
Chapter = A Content = 1
Chapter = B Content = 3
Chapter = C Content = 1, 2, 3
Chapter = D Content = 5

creating hash of hashes in perl

I have an array with contain values like
my #tmp = ('db::createParamDef xy', 'data $data1', 'model $model1', 'db::createParamDef wl', 'data $data2', 'model $model2')
I want to create a hash of hashes with values of xy and wl
my %hash;
my #val;
for my $file(#files){
for my $mod(#tmp){
if($mod=~ /db::createParamDef\s(\w+)/){
$hash{$file}="$1";
}
else{
my $value = split(/^\w+\s+/, $mod);
push (#val,$values);
}
$hash{$fname}{$1}="#val";
#val=();
}
}
this returns me only the filename and the value of $1, but i'm expecting output to be like this:
%hash=(
'filename1'=>
{
'xy'=>'$data1,$model1',
}
'filename2'=>
{
'wl'=>'$data2,$model2',
}
)
where am I doing wrong?!
This was actually a pretty tricky problem. Try something like this:
#!/bin/perl
use strict;
use warnings;
my #tmp = ('db::createParamDef xy', 'data $data1', 'model $model1', 'db::createParamDef wl', 'data $data2', 'model $model2');
my #files = ('filename1', 'filename2');
my %hash;
my #val;
my $index = 0;
my $current;
for my $mod (#tmp) {
if ( $mod=~ /db::createParamDef\s+(\w+)/){
$current = $1;
$hash{$files[$index]}={$current => ""};
$index++;
#val=();
} else {
my $value = (split(/\s+/, $mod))[1];
push (#val,$value);
}
$hash{$files[$index - 1]}{$current} = join(",", #val);
}
use Data::Dumper;
print Dumper \%hash;
Let me know if you have any questions about how it works!
my #tmp = (
'db::createParamDef xy', 'data $data1', 'model $model1',
'db::createParamDef wl', 'data $data2', 'model $model2'
);
my $count = 0;
my %hash = map {
my %r;
if (my($m) = $tmp[$_] =~ /db::createParamDef\s(\w+)/) {
my $i = $_;
my #vals = map { $tmp[$i+$_] =~ /(\S+)$/ } 1..2;
$r{"filename". ++$count}{$m} = join ",", #vals;
}
%r;
} 0 .. $#tmp;
use Data::Dumper; print Dumper \%hash;
output
$VAR1 = {
'filename1' => {
'xy' => '$data1,$model1'
},
'filename2' => {
'wl' => '$data2,$model2'
}
};

unable to itherate through the array perl

I have this perl script:
my %perMpPerMercHash;
foreach my $sheet () { #proper ranges specified
foreach my $row ( ) { #proper ranges specified
#required variables declared.
push(#{$perMpPerMercHash{join("-", $mercId, $mpId)}}, $mSku);
}
}
#Finally 'perMpPerMercHash' will be a hash of array`
foreach my $perMpPerMerc ( keys %perMpPerMercHash ) {
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
}
sub genFile {
my ( $outFileName, #skuArr ) = #_;
my $output = new IO::File(">$outFileName");
my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2);
#mpId is generated.
&prepareMessage($writer, $mpId, #skuArr);
}
sub prepareMessage {
my ( $writer, $mpId, #skuArr ) = #_;
my $count = 1;
print Dumper \#skuArr; #Printing correctly, 8-10 values.
foreach my $sku ( #skuArr ) { #not iterating.
print "loop run" , $sku, "\n"; #printed only once.
}
}
Can somebody please help why this is happening. I am new to perl and could not understand this anomaly.
EDIT:
output of Dumper:
$VAR1 = [
'A',
'B',
'C',
];
When you do
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
You're passing a reference to an array.
So in
sub genFile {
my ( $outFileName, #skuArr ) = #_;
You have to do :
sub genFile {
my ( $outFileName, $skuArr ) = #_;
and then use #$skuArr.
Have a look at references
The modified genFile sub will be:
sub genFile {
my ( $outFileName, $skuArr ) = #_;
my $output = new IO::File(">$outFileName");
my $writer = new XML::Writer( OUTPUT => $output, DATA_MODE => 1, DATA_INDENT => 2);
#mpId is generated.
&prepareMessage($writer, $mpId, #$skuArr);
}
And the other sub don't need to be modified.
Or you can pass always skuArr by reference:
&genFile($perMpPerMerc, $perMpPerMercHash{$perMpPerMerc});
...
sub genFile {
my ( $outFileName, $skuArr ) = #_;
...
&prepareMessage($writer, $mpId, $skuArr);
}
sub prepareMessage {
my ( $writer, $mpId, $skuArr ) = #_;
my $count = 1;
print Dumper $skuArr;
foreach my $sku ( #$skuArr ) {
print "loop run" , $sku, "\n";
}
}

Warning message in older perl version

I have the following code in my script:
while (my ($key, $value) = each #values) {
if ( $key < $arraySize-1) {
if ( $values[$key+1] eq "user") {
$endcon=1;
}
}
if ( ( $startcon == 1 ) && ( $endcon != 1 ) ) {
$UptimeString .= $value;
}
if ( $value eq "up") {
$startcon=1;
}
if ( $value eq "average:") {
$LoadMinOne=$values[$key+1];
}
}
While compiling it, in perl 5.14, I have no warnings, but in perl 5.10.1, I have this warning: Type of arg 1 to each must be hash (not private array) at ./uptimep.pl line 21, near "#values) "
Line 21 is while (my ($key, $value) = each #values) {
What does this mean?
As said in error message, each must have a hash for parameter, but you give it an array.
You could replace this line:
while (my ($key, $value) = each #values) {
by:
for my $key(0 .. $#values) {
my $value = $values[$key];
According to the doc each accepts array as parameter from perl 5.12.0
as it says, each expects a hash as an argument, not an array.
you can populate a hash first ( my %hash = #values; ) and use it as an argument ( while (my ($key, $value) = each %hash) ).

Resources