Perl count repeated strings in array - arrays

I have an array that is like
my #array = ('cookies','balls','cookies','balls','balls');
but the real one is much bigger/longer.
How do I output the count of each repeated string in the array?
like in the example, cookies is 2 and balls is 3.

I think we can use
map {$count{$_}++;} #array;
instead of
foreach(#array)
{
unless(defined($count{$_}))
{
$count{$_} = 1;
}
else {
$count{$_}++;
}
}
to simplify the code.

"How do I output the count of each repeated string in the array?"
#!/usr/bin/perl
use strict;
use warnings;
my #array = ('cookies','balls','cookies','balls','balls', 'orphan');
my %count;
$count{$_}++ foreach #array;
#removing the lonely strings
while (my ($key, $value) = each(%count)) {
if ($value == 1) {
delete($count{$key});
}
}
#output the counts
while (my ($key, $value) = each(%count)) {
print "$key:$value\n";
}
Prints:
cookies:2
balls:3
Mind, that 'orphan' wasn't output.

Using Perl that's a little more idiomatic than some of the other answers...
use strict;
use warnings;
use 5.010;
my #array = ('cookies','balls','cookies','balls','balls');
my %count;
$count{$_}++ foreach #array;
say "$_: $count{$_}" foreach grep { $count{$_} != 1 } keys %count;

Try this more shorter code u will not get any thing shorter than this
my #array = ('cookies','balls','cookies','balls','balls');
my $hashh = {};
foreach (#array){
if(exists $hashh->{$_}){
$hashh->{$_}++;
} else{
$hashh->{$_} = 1;
}
}
print Dumper($hashh);

Related

Create Multi Dimensional Hash Using Array Elements

I need to create multidimensional hashes with varying depth using array elements as keys. Pseudocode attempt:
Example line:
Statement Coverage for instance Hierarchical/path/with/block/and/module ...
if (/Statement Coverage for instance (.&?) /)
{
#array = split /\//, $1;
}
for (my $eye = 1; $eye <= $#array; $eye++)
{
A miracle happens to %hash!
}
$hash{"path"}{"with"}{"block"}{"and"} now has a value of "module". Remember, the number of keys can vary. Any ideas?
That's what Data::Diver does for you:
my #array = split /\//, $1;
DiveVal(\ my %hash, #array[ 0 .. $#array - 1 ]) = $array[-1];
print Dumper(\%hash);
See my code below. It builds the desired hash recursively.
But I think that you are taking a wrong approach. I obviously don't know what exactly you are trying to achieve, but seems to me, that you should use tree data structure instead of the multidimensional hash.
use strict;
use warnings;
use v5.10;
use Data::Dumper;
my #data = (
'some/path/test',
'some/path/deeper/test',
'another/deeper/path/test',
);
my $resultHr = {};
foreach my $path (#data) {
my #elems = split /\//, $path;
buildHash($resultHr, #elems);
}
say Dumper($resultHr);
sub buildValue {
my $n = shift;
if (#_) {
return {$n => buildValue(#_)};
}
else {
return $n;
}
}
sub buildHash {
my $hr = shift;
my $k = shift;
return unless $k;
if (exists $hr->{$k} && ref $hr->{$k}) {
buildHash($hr->{$k}, #_);
}
else {
$hr->{$k} = buildValue(#_);
}
}

ID tracking while swapping and sorting other two arrays in perl

#! /usr/bin/perl
use strict;
my (#data,$data,#data1,#diff,$diff,$tempS,$tempE, #ID,#Seq,#Start,#End, #data2);
#my $file=<>;
open(FILE, "< ./out.txt");
while (<FILE>){
chomp $_;
#next if ($line =~/Measurement count:/ or $line =~/^\s+/) ;
#push #data, [split ("\t", $line)] ;
my #data = split('\t');
push(#ID, $data[0]);
push(#Seq, $data[1]);
push(#Start, $data[2]);
push(#End, $data[3]);
# push #$data, [split ("\t", $line)] ;
}
close(FILE);
my %hash = map { my $key = "$ID[$_]"; $key => [ $Start[$_], $End[$_] ] } (0..$#ID);
for my $key ( %hash ) {
print "Key: $key contains: ";
for my $value ($hash{$key} ) {
print " $hash{$key}[0] ";
}
print "\n";
}
for (my $j=0; $j <=$#Start ; $j++)
{
if ($Start[$j] > $End[$j])
{
$tempS=$Start[$j];
$Start[$j]=$End[$j];
$End[$j]=$tempS;
}
print"$tempS\t$Start[$j]\t$End[$j]\n";
}
my #sortStart = sort { $a <=> $b } #Start;
my #sortEnd = sort { $a <=> $b } #End;
#open(OUT,">>./trial.txt");
for(my $i=1521;$i>=0;$i--)
{
print "hey";
my $diff = $sortStart[$i] - $sortStart[$i-1];
print "$ID[$i]\t$diff\n";
}
I have three arrays of same length, ID with IDs (string), Start and End with integer values (reading from a file).
I want to loop through all these arrays and also want to keep track of IDs. First am swapping elements in Start with End if Start > End, then I have to sort these two arrays for further application (as I am negating Start[0]-Start[1] for each item in that Start). While sorting, the Id values may change, and as my IDs are unique for each Start and End elements, how can I keep track of my IDs while sorting them?
Three arrays, ID, Start and End, are under my consideration.
Here is a small chunk of my input data:
DQ704383 191990066 191990037
DQ698580 191911184 191911214
DQ724878 191905507 191905532
DQ715191 191822657 191822686
DQ722467 191653368 191653339
DQ707634 191622552 191622581
DQ715636 191539187 191539157
DQ692360 191388765 191388796
DQ722377 191083572 191083599
DQ697520 189463214 189463185
DQ709562 187245165 187245192
DQ540163 182491372 182491400
DQ720940 180753033 180753060
DQ707760 178340696 178340726
DQ725442 178286164 178286134
DQ711885 178250090 178250119
DQ718075 171329314 171329344
DQ705091 171062479 171062503
The above ID, Start, End respectively. If Start > End i swapped them only between those two arrays. But after swapping the descending order may change, but i want them in descending order also their corresponding ID for negation as explained above.
Don't use different arrays, use a hash to keep the related pieces of information together.
#!/usr/bin/perl
use warnings;
use strict;
use enum qw( START END );
my %hash;
while (<>) {
my ($id, $start, $end) = split;
$hash{$id} = [ $start < $end ? ($start, $end)
: ($end, $start) ];
}
my #by_start = sort { $hash{$a}[START] <=> $hash{$b}[START] } keys %hash;
my #by_end = sort { $hash{$a}[END] <=> $hash{$b}[END] } keys %hash;
use Test::More;
is_deeply(\#by_start, \#by_end, 'same');
done_testing();
Moreover, in the data sample you provided, the order of id's is the same regardless of by what you sort them.

In Perl, how can I stringify a two dimensional array?

Technically speaking there are no multi-dimensional arrays in Perl, but you can use single dimensional arrays in Perl to act as if they had more than one dimension.
In Perl each element of an array can be a reference to another array, but syntactically they would look like a two-dimensional array.
I want to convert 2-dimensional integer array into string in Perl. I have declared 2-dimensional integer array as follows:
my #array1=[[1,2,3],[1,2,3],[1,2,3]];
OR
my #array2=((1,2,3),(1,2,3),(1,2,3));
now I need to create a subroutine that will return string as "{{1,2,3},{1,2,3},{1,2,3}}". I have tried the following subroutine:
sub TwoDArrayOutputString {
my ($outputs)= #_;
my $finaloutput="{";
foreach my $output ($outputs) {
foreach my $out (#$output) {
$finaloutput.="{"."}";
#$finaloutput.="{".join(',',#output)."}";
}
$finaloutput.=",";
}
$finaloutput.="}";
return $finaloutput;
}
sub TwoDArrayOutputString1 {
my ($outputs)= #_;
if ( ref($outputs) eq "REF" ) {$outputs = ${$outputs};}
my $finaloutput="{";
foreach my $output ($outputs) {
foreach my $out (#$output) {
$finaloutput.="{"."}";
#$finaloutput.="{".join(',',#output)."}";
}
$finaloutput.=",";
}
$finaloutput.="}";
return $finaloutput;
}
sub TwoDArrayOutputString2{
my ($array)= #_;
my $finaloutput="{";
for my $row ( 0..$#array ) {
my #columns = #{ $array[$row] }; # Dereferencing my array reference
$finaloutput.="{";
for my $column ( #columns ) {
$finaloutput.=$column.",";
}
$finaloutput=substr($finaloutput,0,length($finaloutput)-1);
$finaloutput.="}".",";
}
$finaloutput=substr($finaloutput,0,length($finaloutput)-1);
$finaloutput.="}";
return $finaloutput;
}
print TwoDArrayOutputString(#array1)."\n";
print TwoDArrayOutputString1(#array1)."\n";
print TwoDArrayOutputString2(#array1)."\n"."\n"."\n"."\n";
print TwoDArrayOutputString(#array2)."\n";
print TwoDArrayOutputString1(#array2)."\n";
print TwoDArrayOutputString2(#array2)."\n"."\n"."\n"."\n";
Output:
{{}{}{},}
{{}{}{},}
}
{,}
{,}
}
and my expected output is {{1,2,3},{1,2,3},{1,2,3}}.
First off, both of your syntaxes are wrong (compared to what I think you think they do):
my #array1=[[1,2,3],[1,2,3],[1,2,3]];
This results in #array1 holding a single reference to an anonymous array which further holds three references to three anonymous arrays when what I think you want is:
my $array1 = [[1,2,3],[1,2,3],[1,2,3]];
$array1 now is a reference to an array that holds three references to three anonymous arrays.
my #array2=((1,2,3),(1,2,3),(1,2,3));
In this case, you are just fooling yourself with all the extra parentheses: All you have is a single array whose elements are 1, 2, 3, 1, 2, 3, 1, 2, 3.
You say
now I need to create a subroutine that will return string as {{1,2,3},{1,2,3},{1,2,3}}.
That is an odd requirement. Why exactly do you need to create such a subroutine?
If you want to serialize the array as a string, you'd be better off using one of the more standard and interoperable ways of doing it, and pick a format such as JSON, YAML, XML, Data::Dumper, or something else.
For example:
$ perl -MJSON::MaybeXS=encode_json -E '#array1=([1,2,3],[1,2,3],[1,2,3]); say encode_json \#array1'
[[1,2,3],[1,2,3],[1,2,3]]
or
$ perl -MData::Dumper -E '#array1=([1,2,3],[1,2,3],[1,2,3]); say Dumper \#array1'
$VAR1 = [
[
1,
2,
3
],
[
1,
2,
3
],
[
1,
2,
3
]
];
or
$ perl -MYAML::XS -E '#array1=([1,2,3],[1,2,3],[1,2,3]); say Dump \#array1'
---
- - 1
- 2
- 3
- - 1
- 2
- 3
- - 1
- 2
- 3
or
$ perl -MXML::Simple -E '#array1=([1,2,3],[1,2,3],[1,2,3]); say XMLout(\#array1)'
<opt>
<anon>
<anon>1</anon>
<anon>2</anon>
<anon>3</anon>
</anon>
<anon>
<anon>1</anon>
<anon>2</anon>
<anon>3</anon>
</anon>
<anon>
<anon>1</anon>
<anon>2</anon>
<anon>3</anon>
</anon>
</opt>
If your purpose is to learn how to traverse a multi-dimensional structure and print it, doing it correctly requires attention to a few details. You could study the source of YAML::Tiny:
sub _dump_array {
my ($self, $array, $indent, $seen) = #_;
if ( $seen->{refaddr($array)}++ ) {
die \"YAML::Tiny does not support circular references";
}
my #lines = ();
foreach my $el ( #$array ) {
my $line = (' ' x $indent) . '-';
my $type = ref $el;
if ( ! $type ) {
$line .= ' ' . $self->_dump_scalar( $el );
push #lines, $line;
} elsif ( $type eq 'ARRAY' ) {
if ( #$el ) {
push #lines, $line;
push #lines, $self->_dump_array( $el, $indent + 1, $seen );
} else {
$line .= ' []';
push #lines, $line;
}
} elsif ( $type eq 'HASH' ) {
if ( keys %$el ) {
push #lines, $line;
push #lines, $self->_dump_hash( $el, $indent + 1, $seen );
} else {
$line .= ' {}';
push #lines, $line;
}
} else {
die \"YAML::Tiny does not support $type references";
}
}
#lines;
}
Now, for your simple case, you could do something like this:
#!/usr/bin/env perl
use feature 'say';
use strict;
use warnings;
my #array = ([1, 2, 3], [4, 5, 6], [7, 8, 9]);
say arrayref_to_string([ map arrayref_to_string($_), #array]);
sub arrayref_to_string { sprintf '{%s}', join(q{,}, #{$_[0]}) }
Output:
{{1,2,3},{4,5,6},{7,8,9}}
You could do something like below:
#!/usr/bin/perl
use strict;
use warnings;
my #array1=[[1,2,3],[1,2,3],[1,2,3]];
foreach my $aref (#array1){
foreach my $inner (#$aref){
print "{";
foreach my $elem (#$inner){
print "$elem";
print ",";
}
print "}";
}
}
PS: I did not understand second array in your example i.e. my #array2=((1,2,3),(1,2,3),(1,2,3));. It's basically just my #array2=(1,2,3,1,2,3,1,2,3);.
One way could be with Data::Dumper. But correctly pass array or array-refs to Dumper. Your #array2 is one-dimensional array.
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my #array1=[[1,2,3],[1,2,3],[1,2,3]];
my $string = Dumper(#array1);
$string =~ s/\n|\s+|.*?=|;//g;
$string =~ s/\[/\{/g;
$string =~ s/\]/\}/g;
print $string."\n";
output:
{{1,2,3},{1,2,3},{1,2,3}}

Compare Arrays and Delete Arrays

I have below three sets ( arrays ) I need to perform an operation like this ( (A-B)UC ) on.
Can someone have the logic of this in Perl?
Here is my code I can able check for is B subset of A or not but I could not able to do "A-B":
my #array = (MAJOR,MINOR,MM,DD,YY);
my #exclude = (MM,MINOR,YY);
my #include = (LICENSE,VALID);
foreach (#exclude) {
if ( $_ ~~ #array ) {
print "\n $_ is defined in variables and it will be excluded \n";
#array = grep {!/\$_/} #array;
print "#array \n";
}
else {
print "\n $_ is not defined under variables please check the files \n";
exit 100;
}
}
foreach (#array){
print "$_ \n";
}
I suspect something is wrong in my logic with grep operation i.e. delete operation.
One problem with the grep is that $_ in the outer loop is redefined inside the grep block to each element of #array. You need to have different names. Also, your regex was lacking anchors; however, instead of a regex, just use string inequality. Try this:
my #array = qw(MAJOR MINOR MM DD YY);
my #exclude = qw(MM MINOR YY);
my #include = qw(LICENSE VALID);
foreach my $e (#exclude) {
if ( $e ~~ #array ) {
print "\n $e is defined in variables and it will be excluded \n";
#array = grep {$e ne $_} #array;
print "#array \n";
} else {
print "\n $e is not defined under variables please check the files \n";
exit 100;
}
}
use strict and warnings to alert you to many pitfalls in perl.
A hash is the most natural way to represent a set in perl.
use strict;
use warnings;
my #array = ('MAJOR','MINOR','MM','DD','YY');
my #exclude = ('MM','MINOR','YY');
my #include = ('LICENSE','VALID');
my %set;
# add #array to set
#set{#array} = ();
# remove #exclude
delete #set{#exclude};
# add #include
#set{#include} = ();
# array of elements resulting
my #result = sort keys %set;
You could use a set to do those kind of operations. I used a non-standard module Set::Scalar to help me with it:
#!/usr/bin/env perl
use warnings;
use strict;
use Set::Scalar;
my #array = qw(MAJOR MINOR MM DD YY);
my #exclude = qw(MM MINOR YY);
my #include = qw(LICENSE VALID);
my $array_set = Set::Scalar->new(#array);
my $exclude_set = Set::Scalar->new(#exclude);
my $include_set = Set::Scalar->new(#include);
my $result = $array_set->difference($exclude_set)->union($include_set);
use Data::Dumper;
print Dumper #$result;
Run it like:
perl script.pl
That yields:
$VAR1 = 'VALID';
$VAR2 = 'MAJOR';
$VAR3 = 'DD';
$VAR4 = 'LICENSE';

Perl - Split Array in to Smaller Evenly Distributed Arrays

How would I split a Perl array of arbitrary size in to a variable number of smaller arrays with the number of elements in each smaller array being distibuted as evenly possible? The original array must not be destroyed.
Off the top of my head:
use strict;
use warnings;
use Data::Dumper; # for debugging only
print Dumper(distribute(7, [1..30]));
# takes number+arrayref, returns ref to array of arrays
sub distribute {
my ($n, $array) = #_;
my #parts;
my $i = 0;
foreach my $elem (#$array) {
push #{ $parts[$i++ % $n] }, $elem;
};
return \#parts;
};
This guarantees that number of elements in #parts may only differ by one. There's anonther solution that would count the numbers beforehand and use splicing:
push #parts, [ #$array[$offset..$offset+$chunk] ];
$offset += chunk;
# alter $chunk if needed.
Here's a version using List::MoreUtils:
use strict;
use warnings;
use List::MoreUtils qw(part);
use Data::Dumper;
my #array = 1..9;
my $partitions = 3;
my $i = 0;
print Dumper part {$partitions * $i++ / #array} #array;
If you don't care what winds up in each array:
use strict;
use warnings;
use List::MoreUtils qw(part);
use Data::Dumper;
my $i = 0;
my $numParts = 2;
my #part = part { $i++ % $numParts } 1 .. 30;
print Dumper #part;
#Dallaylaen's answer doesn't quite work because you can't pass an array into a subroutine in Perl. Instead of passing in an array (or a list as Dallaylaen did in his example) you must pass in a reference to an array:
my #arrayIn = (1..30);
my #arrayOfArrays = distribute(7, \#arrayIn);
sub distribute {
my ($n, $array) = #_;
my #parts;
my $i = 0;
foreach my $elem (#$array) {
push #{ $parts[$i++ % $n] }, $elem;
};
return #parts;
};

Resources