creating hash of hashes in perl - arrays

I have an array with contain values like
my #tmp = ('db::createParamDef xy', 'data $data1', 'model $model1', 'db::createParamDef wl', 'data $data2', 'model $model2')
I want to create a hash of hashes with values of xy and wl
my %hash;
my #val;
for my $file(#files){
for my $mod(#tmp){
if($mod=~ /db::createParamDef\s(\w+)/){
$hash{$file}="$1";
}
else{
my $value = split(/^\w+\s+/, $mod);
push (#val,$values);
}
$hash{$fname}{$1}="#val";
#val=();
}
}
this returns me only the filename and the value of $1, but i'm expecting output to be like this:
%hash=(
'filename1'=>
{
'xy'=>'$data1,$model1',
}
'filename2'=>
{
'wl'=>'$data2,$model2',
}
)
where am I doing wrong?!

This was actually a pretty tricky problem. Try something like this:
#!/bin/perl
use strict;
use warnings;
my #tmp = ('db::createParamDef xy', 'data $data1', 'model $model1', 'db::createParamDef wl', 'data $data2', 'model $model2');
my #files = ('filename1', 'filename2');
my %hash;
my #val;
my $index = 0;
my $current;
for my $mod (#tmp) {
if ( $mod=~ /db::createParamDef\s+(\w+)/){
$current = $1;
$hash{$files[$index]}={$current => ""};
$index++;
#val=();
} else {
my $value = (split(/\s+/, $mod))[1];
push (#val,$value);
}
$hash{$files[$index - 1]}{$current} = join(",", #val);
}
use Data::Dumper;
print Dumper \%hash;
Let me know if you have any questions about how it works!

my #tmp = (
'db::createParamDef xy', 'data $data1', 'model $model1',
'db::createParamDef wl', 'data $data2', 'model $model2'
);
my $count = 0;
my %hash = map {
my %r;
if (my($m) = $tmp[$_] =~ /db::createParamDef\s(\w+)/) {
my $i = $_;
my #vals = map { $tmp[$i+$_] =~ /(\S+)$/ } 1..2;
$r{"filename". ++$count}{$m} = join ",", #vals;
}
%r;
} 0 .. $#tmp;
use Data::Dumper; print Dumper \%hash;
output
$VAR1 = {
'filename1' => {
'xy' => '$data1,$model1'
},
'filename2' => {
'wl' => '$data2,$model2'
}
};

Related

Unable to find if one item exists in array of items and return the necessary message in Perl

I have array of IDs. I have one ID which I want to find if that ID exists in the array of IDs in Perl
I tried the following code:
my $ids = [7,8,9];
my $id = 9;
foreach my $new_id (#$ids) {
if ($new_id == $id) {
print 'yes';
} else {
print 'no';
}
}
I get the output as:
nonoyes
Instead I want to get the output as only:
yes
Since ID exists in array of IDs
Can anyone please help ?
Thanks in advance
my $ids = [7,8,9];
my $id = 9;
if (grep $_ == $id, #ids) {
print $id. " is in the array of ids";
} else {
print $id. " is NOT in the array";
}
You just need to remove the else part and break the loop on finding the match:
my $flag = 0;
foreach my $new_id (#$ids) {
if ($new_id == $id) {
print 'yes';
$flag = 1;
last;
}
}
if ($flag == 0){
print "no";
}
Another option using hash:
my %hash = map { $_ => 1 } #$ids;
if (exists($hash{$id})){
print "yes";
}else{
print "no";
}
use List::Util qw(any); # core module
my $id = 9;
my $ids = [7,8,9];
my $found_it = any { $_ == $id } #$ids;
print "yes" if $found_it;
The following piece of code should cover your requirements
use strict;
use warnings;
my $ids = [7,8,9];
my $id = 9;
my $flag = 0;
map{ $flag = 1 if $_ == $id } #$ids;
print $flag ? 'yes' : 'no';
NOTE: perhaps my #ids = [7,8,9]; is better way to assign an array to variable

filter file by unique and biggest value; combine two arrays into hash

I need to extract by unique genus (first part of the name of species) in one column but with by biggest number in another column in a CSV file when having multiples of the same name.
So if have multiple genus (same first name) then take the biggest number in the last column to select which will represent that genus.
I have extracted the information into arrays, but I am having trouble with combining the two in order to select. I was using
https://perlmaven.com/unique-values-in-an-array-in-perl
to help but I need to include biggest number in last column when have the same genus situation.
use strict;
use warnings;
open taxa_fh, '<', "$ARGV[0]" or die qq{Failed to open "$ARGV[0]" for input: $!\n};
open match_fh, ">$ARGV[0]_genusLongestLEN.csv" or die qq{Failed to open for output: $!\n};my #unique;
my %seen;
my %hash;
while ( my $line = <taxa_fh> ) {
chomp( $line );
my #parts = split( /,/, $line );
my #name = split( / /, $parts[3]);
my #A = $name[0];
my #B = $parts[5];
#seen{#A} = ();
my #merged = (#A, grep{!exists $seen{$_}} #B);
my #merged = (#A, #B);
#hash{#A} = #B;
print "$line\n";
}
close taxa_fh;
close match_fh;
Input example:
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB181888.1.1758,AB181888.1.1758,281609,Protoperidinium crassipes,0,1700
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
AB181892.1.1738,AB181892.1.1738,281611,Protoperidinium divergens,0,1800
AB181894.1.1744,AB181894.1.1744,281612,Protoperidinium leonis,0,1500
AB181899.1.1746,AB181899.1.1746,281613,Protoperidinium pallidum,0,1600
AB181902.1.1741,AB181902.1.1741,261845,Protoperidinium pellucidum,0,1750
AB181904.1.1734,AB181904.1.1734,281614,Protoperidinium punctulatum,0,1599
AB181907.1.1687,AB181907.1.1687,281615,Protoperidinium thorianum,0,1600
AB120001.1.1725,AB120001.1.1725,244960,Gyrodinium spirale,0,1500
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB120003.1.1724,AB120003.1.1724,244962,Gyrodinium rubrum,0,1700
AB120004.1.1723,AB120004.1.1723,244963,Gyrodinium helveticum,0,1500
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
Wanted output:
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({
auto_diag => 2,
binary => 1,
quote_space => 0,
});
my %by_genus;
while ( my $row = $csv->getline(\*ARGV) ) {
my ($genus) = split(' ', $row->[3]);
$by_genus{$genus} = $row
if !$by_genus{$genus}
|| $row->[5] > $by_genus{$genus}[5];
}
$csv->say(select(), $_) for values(%by_genus);
Properly naming the variables makes the code more readable:
#! /usr/bin/perl
use warnings;
use strict;
my %selected;
while (<>) {
my ($species, $value) = (split /,/)[3, 5];
my $genus = (split ' ', $species)[0];
if ($value > ($selected{$genus}{max} || 0)) {
$selected{$genus}{max} = $value;
$selected{$genus}{line} = $_;
}
}
for my $genus (keys %selected) {
print $selected{$genus}{line};
}
The order of the output lines is random.
You can this Perl command line as well
perl -F, -lane ' ($g=$F[3])=~s/(^\S+).*/$1/; if( $mx{$g}<$F[-1])
{ $kv{$g}=$_;$mx{$g}=$F[-1] } END { print $kv{$_} for(keys %kv) } ' file
with the given inputs in cara.txt file, the output is
$ perl -F, -lane ' ($g=$F[3])=~s/(^\S+).*/$1/; if( $mx{$g}<$F[-1])
{ $kv{$g}=$_;$mx{$g}=$F[-1] } END { print $kv{$_} for(keys %kv) } ' cara.txt
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
$
Not fancy but gets the job done
#!/usr/bin/perl
use strict;
my #data = `cat /var/tmp/test.in`;
my %genuses = ();
foreach my $line ( #data ) {
chomp($line);
my #splitline = split(',', $line);
my $genus = $splitline[3];
my $num = $splitline[5];
my ( $name, $extra ) = split(' ', $genus);
if ( exists $genuses{$name}->{'num'} ) {
if ( $genuses{$name}->{'num'} < $num ) {
$genuses{$name}->{'num'} = $num;
$genuses{$name}->{'line'} = $line;
}
else {
next;
}
}
else {
$genuses{$name}->{'num'} = $num;
$genuses{$name}->{'line'} = $line;
}
}
foreach my $genus ( %genuses ) {
print "$genuses{$genus}->{'line'}";
print "\n";
}
Output:
[root#localhost tmp]# ./test.pl
AB179736.1.1725,AB179736.1.1725,278986,Pterocorys zancleus,0,1763
AB179735.1.1711,AB179735.1.1711,278983,Eucyrtidium hexagonatum,0,1600
AB120309.1.1800,AB120309.1.1800,4442,Camellia sinensis,0,1700
AB120002.1.1725,AB120002.1.1725,244961,Gyrodinium fusiforme,0,1800
AB181890.1.1709,AB181890.1.1709,281610,Protoperidinium denticulatum,0,1800
Don't see an obvious method that you are sorting your output by

Perl: best way to pass array of hash to a sub

My class looks like the below:
package CSVKeepCols;
use strict;
use warnings;
use Text::CSV;
use Data::Dumper;
my $text;
my $del;
my #cols;
my $output = '';
sub load {
my $class = shift;
my $self = {};
bless $self;
return $self;
}
sub input {
my $class = shift;
$text = shift;
return $class;
}
sub setOpts {
my ($class, $opts) = #_;
$del = $opts->{'delimeter'};
#cols = $opts->{'columns'};
}
sub process {
my #lines = split /\n|\r|\n\r|\r\n/, $text;
my $csv = Text::CSV->new({ sep_char => $del });
foreach (#lines) {
die('Invalid CSV data') if !$csv->parse($_);
$output .= __filterFields($csv->fields()) . "\n";
}
}
sub output {
return $output;
}
sub __filterFields {
my #fields = #_;
my $line = '';
foreach (#cols) {
$line .= ',' if $line;
$line .= $fields[$_];
}
return $line;
}
1;
I am using this class from my code like this:
$parser = load CSVKeepCols();
$parser->input($out);
$parser->setOpts({'delimeter' => ',', 'columns' => [1,2]});
$parser->process();
$out = $parser->output();
I am expecting, the setOpts subroutine will take the hash {'delimeter' => ',', 'columns' => [1,2]} and from there it will set the value of $delto , and #cols to (1,2) so that I can loop through the #cols array.
However, when I try to loop through #cols in the __filterFields subroutine I get error
Use of reference "ARRAY(0x22e32e0)" as array index at CSVKeepCols.pm line 52.
How do I fix this?
In setOpts, you set #cols = $opts->{columns};
$opts->{columns} contains a reference to an array ( [1,2] ).
So in __filterFields:
for ( #cols ){
# $_ is an arrayref [1,2]
# you are using it as an index to retrieve a value from #fields
$line .= $fields[$_];
# Thus the error: "use of reference ARRAY"..." as array index"
# You should be using an integer here.
}
To fix it:
sub setOpts {
# ...
#cols = #{ $opts->{columns} };
}
Edit: removed unnecessary check

Possible to assign count to array? like #content.$i?

my csv input file
Chapter,Content
A,1
B,3
C,1
C,2
C,3
D,5
My current perl script
open(INFILE,$input)||die "can't open the file";
#line = (split/,/,$_);
#line_last = (split/\n/,$line[1]);
if ($_ =~ /A/){
push #con1, $line[1];
}
elsif ($_ =~ /B/){
push #con2, $line[1];
}
elsif ($_ =~ /C/){
push #con3, $line[1];
}
elsif ($_ =~ /D/){
push #con4, $line[1];
}
close INFILE;
chomp #con1, #con2, #con3, #con4;
print "content =", (join ", ", #con1),"\n";
print "content =", (join ", ", #con2),"\n";
print "content =", (join ", ", #con3),"\n";
print "content =", (join ", ", #con4),"\n";
My current code can work but I wanna make my code shorter & more efficient.
How do I make #con more automate? something like using For loop & use $i to assign to #con to make overall code shorter.
Why it's stupid to `use a variable as a variable name'.
You should use an AoA instead.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my #foos = qw( A B C D );
my $num_eles = 0;
my %index_by_foo = map { $_ => $num_eles++ } #foos;
my #cons_by_foo = map { [] } 1..$num_eles;
while (my $row = $csv->getline($fh)) {
if ( my $i = $index_by_foo{ $row->[0] } ) {
push #{ $cons_by_foo[$i] }, $row->[1];
}
}
for my $i (1..$num_eles-1) {
print("content =", join(", ", #{ $cons_by_foo[$i] }), "\n");
}
That said, a HoA seems a far better fit.
use Text::CSV_XS qw( );
my $csv = Text::CSV_XS->new({ binary => 1, auto_diag => 2 });
open my $fh, "<", $qfn)
or die("Can't open \"$qfn\": $!\n");
my %cons_by_foo;
while (my $row = $csv->getline($fh)) {
push #{ $cons_by_foo{ $row->[0] } }, $row->[1];
}
for my $foo (sort keys %cons_by_foo) {
print("content $foo =", join(", ", #{ $cons_by_foo{$foo} }), "\n");
}
This is what a hash is great for IMO (hashtable or dictionary in other languages).
note: I haven't tested this code, it's from memory.
my %con;
while (<INFILE>)
{
my #line_last = (split/\n/,$line[1]);
$con{$line[0]} = $con{$line[0]} + ", " + $line[1];
}
foreach my $str (keys sort %con)
{
print "content $con{$str}\n";
}
edit: much better and tested code here.
open(INFILE,"input.txt")||die "can't open the file";
my %con;
while (<INFILE>)
{
next if /^Chapter/;
chomp;
my #line = (split ',' , $_);
push #{$con{$line[0]}}, $line[1];
}
foreach my $str (sort keys %con)
{
my $tmp = join ', ', #{$con{$str}};
print "content $str = $tmp\n";
}
output is:
content A = 1
content B = 3
content C = 1,2,3
content D = 5
I know that isn't your output, but it seems more useful than what you had.
my %cc; #chapter contents
open my $FH, $filename or die "Cannot open $filename";
push #{ $cc{ $$_[0] } }, $$_[1] for map {chomp;[split/,/]} <$FH>;
close $FH;
print "Chapter = $_ Content = " . join(", ", #{ $cc{$_} }) . "\n" for sort keys %cc;
Output:
Chapter = A Content = 1
Chapter = B Content = 3
Chapter = C Content = 1, 2, 3
Chapter = D Content = 5

Array Manipulation join with out split

#browser = ("NS", "IE", "Opera");
my $add_str = "Browser:";
$count = 0;
foreach (#browser) {
my $br = $_;
$browser[$count] = "$add_str:$br";
$count++ ;
}
is there any other way to do this ? best way ?
You could use map.
#browser = ("NS", "IE", "Opera");
my $add_str = "Browser";
#browser = map { "${add_str}:$_"; } #browser;
In Perl 5, the for loop aliases each item, so you can simply say
#!/usr/bin/perl
use strict;
use warnings;
my #browsers = qw/NS IE Opera/;
my $add_str = "Browser:";
for my $browser (#browsers) {
$browser = "$add_str:$browser";
}
print join(", ", #browsers), "\n";

Resources