I'm parsing an excel spreadsheet and trying to combine data rows by id across tables and files. This is a condensed, simplified version of what I have. With a PHP/JS background, I prefer conceptualizing hashes as objects, so %aoo means array of objects instead of array of hashes...
#!/usr/bin/env perl
use v5.10.0;
use strict;
use warnings;
use Data::Dump;
use Data::Dumper;
# Array of objects
# Each object is a row from a table
my $aoo1 = [
{
"id" => 1,
"name" => "Dan",
"team" => "red"
},
{
"id" => 2,
"name" => "Arnold",
"team" => "red"
},
{
"id" => 3,
"name" => "Kristen",
"team" => "red"
}
];
my #aoo2 = (
{
"id" => 1,
"position" => "web developer",
},
{
"id" => 2,
"position" => "CEO",
},
{
"id" => 3,
"position" => "Secretary",
}
);
my #aoo3 = (
{
"id" => 1,
"tenure" => "1yr",
},
{
"id" => 2,
"tenure" => "25yr",
},
{
"id" => 3,
"tenure" => "5yr",
}
);
# object of arrays
# each property is a table name from spreadsheet
my %ooa;
%ooa = (
"People List" => $aoo1,
"Position List" => \#aoo2,
"Tenure List" => \#aoo3
);
# dd \%ooa;
while (my ($list_name, $aoo) = each %ooa)
{
# $aoo reftype is array | [ %object, %object, %object ]
# Do something to look into other objects for same id...
}
I want to be able to create a new object for each unique row in the file, so I can filter the values and then write it to a CSV file.
Ex. of the end result
%complete_row = (
'id' => 1,
'name' => 'Dan',
'team' => 'red',
'position => 'Web Dev',
'tenure' => '1yr'
);
Put the 2nd and 3rd arrays into hashes mapping ID to the hash. Then loop through the people and use the ID to get the data from position and tenure hashes.
use strict;
use warnings;
use Data::Dumper;
my $people = [
{
id => 1,
name => "Dan",
team => "red"
},
{
id => 2,
name => "Arnold",
team => "red"
},
{
id => 3,
name => "Kristen",
team => "red"
}
];
my $positions = [
{
id => 1,
position => "web developer",
},
{
id => 2,
position => "CEO",
},
{
id => 3,
position => "Secretary",
}
];
my $tenures = [
{
id => 1,
tenure => "1yr",
},
{
id => 2,
tenure => "25yr",
},
{
id => 3,
tenure => "5yr",
}
];
# hash each by ID
my %position_hash = map { $_->{id} => $_ } #$positions;
my %tenure_hash = map { $_->{id} => $_ } #$tenures;
# combine
my $complete = [];
foreach my $person (#$people) {
my $id = $person->{id};
my $complete_row = {
%$person,
position => $position_hash{$id}->{position},
tenure => $tenure_hash{$id}->{tenure},
};
push #$complete, $complete_row
}
print "complete = " . Dumper($complete);
This should work:
my %newHash;
foreach my $arrRef(map {$ooa{$_}} keys %ooa) { #reading all values of ooa hash, each value is an array ref
foreach my $hashRef(#$arrRef) { #reading each array element, each array element is a hash ref
foreach my $key(keys %{$hashRef}) { #reading all keys of each internal hash
$newHash{$hashRef->{'id'}}{$key} = $hashRef->{$key}; #building new hash of hashes with id as key and value as hash ref
}
}
}
my #newArray = map {$newHash{$_}} keys %newHash; #converting hash of hashes into array of hashes
Related
So I have following array of hash:
my_array = [
{
"date" => "2022-12-01",
"pic" => "Jason",
"guard" => "Steven",
"front_desk" => "Emily"
},
{
"date" => "2022-12-02",
"pic" => "Gilbert",
"guard" => "Johnny",
"front_desk" => "Bella"
},
{
"date" => "2022-12-03",
"pic" => "Steven",
"guard" => "Gilbert",
"front_desk" => "Esmeralda"
}
]
My question is how do I change the structure of my array (grouping) by date in Ruby (Rails 7). Or in other word, I want to change my array into something like this:
my_array = [
{
"2022-12-01" => {
"pic" => "Jason",
"guard" => "Steven",
"front_desk" => "Emily"
{
},
{
"2022-12-02" => {
"pic" => "Gilbert",
"guard" => "Johnny",
"front_desk" => "Bella"
}
},
{
"2022-12-03" => {
"pic" => "Steven",
"guard" => "Gilbert",
"front_desk" => "Esmeralda"
}
}
]
Anyway, thanks in advance for the answer
I have tried using group_by method to group by its date, but it doesn't give the output I wanted
I've tried this method:
my_array.group_by { |element| element["date"] }.values
If you simply want a 1:1 mapping of your input objects to an output object of a new shape, then you just need to use Array#map:
my_array.map {|entry| {entry["date"] => entry.except("date")} }
(Hash#except comes from ActiveSupport, and is not standard Ruby, but since you're in Rails it should work just fine).
Both solutions assume the key "date" will be unique. If we cannot make this assumption safely, then each date should be mapped to an array of hashes.
my_array.each_with_object({}) do |x, hsh|
date = x["date"]
hsh[date] ||= []
hsh[date] << x.except("date")
end
Result:
{
"2022-12-01" => [
{"pic"=>"Jason", "guard"=>"Steven", "front_desk"=>"Emily"}
],
"2022-12-02" => [
{"pic"=>"Gilbert", "guard"=>"Johnny", "front_desk"=>"Bella"}
],
"2022-12-03" => [
{"pic"=>"Steven", "guard"=>"Gilbert", "front_desk"=>"Esmeralda"}
]
}
Or you may like:
my_array
.sort_by { |x| x["date"] }
.group_by { |x| x["date"] }
.transform_values { |x| x.except("date") }
I have a bunch of Hashes inside of an array. When checking my keys and values I get the expected output except for some special cases as they refer to more Arrays/Hashes.
Think of something like this:
#AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
My function iterates through the array and displays my keys and values as in the following:
sub function(...){
print "$key => $value\n";
}
husband => homer
wife => marge
people => ARRAY(0x6b0d80)
Bob => 24
Lukas => 37
vegetables => HASH(0x2570d38)
tomato => red
carrot => orange
Now I want to access my keys and values, but when getting something like ARRAY or HASH as value, I want to disregard that hash and not print it.
Is there some kind of way to only access Values with type scalar?
So far I tried this:
if ($value eq 'ARRAY') {
}
elsif ($value eq ref {}) {
}
else {
print "$key => $value\n";
}
But, it ends up printing exactly the same as above and does not disregard the other data structures.
For an arbitrary data structure like yours, you can use Data::Traverse:
use warnings;
use strict;
use Data::Traverse qw(traverse);
my #AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
traverse { print "$a => $b\n" if /HASH/ } \#AoH;
Output:
wife => marge
husband => homer
Bob => 24
Lukas => 37
carrot => orange
tomato => red
Following demo code does not utilize external modules, provided for educational purpose.
use strict;
use warnings;
use feature 'say';
my #AoH = ( { 'husband' => "homer", 'wife' => "marge" },
{ 'people' => [{'Bob'=> 24, 'Lukas'=> 37}] },
{ 'vegetables' => { 'tomato' => "red", 'carrot' => "orange"} });
drill_in( \#AoH );
sub drill_in {
my $data = shift;
if( ref $data eq 'ARRAY' ) {
drill_in($_) for #$data;
} elsif ( ref $data eq 'HASH' ) {
while( my($k, $v ) = each %{$data} ) {
(ref $v eq 'ARRAY' or ref $v eq 'HASH') ? drill_in($v) : say "$k => $v";
}
}
}
Output
husband => homer
wife => marge
Lukas => 37
Bob => 24
tomato => red
carrot => orange
What's the better way to get the highest value from an array of hashes? I want to get highest ID value from each file, content in my array (keys are file name and ID).
my #array contains these values
[
{ file => "messages0.0", id => "1", },
{ file => "messages0.1", id => "2", },
{ file => "messages0.3", id => "3", },
{ file => "messages1.0", id => "1", },
{ file => "messages1.1", id => "2", },
{ file => "messages2.0", id => "1", },
{ file => "messages2.1", id => "1", }
]
If I use
my #new_array = sort { $b->{id} <=> $a->{id} } #array;
If I have value greater than 10 then sort function doesn't works correctly
messages0.0.log;1
messages1.0.log;1
messages2.0.log;1
messages2.1.log;1
messages1.0.log;10
messages1.0.log;11
Here is my array content (with field separated by ; for a better view
messages1.0.log;12
messages1.0.log;11
messages1.0.log;10
messages1.0.log;9
messages0.0.log;8
messages1.0.log;8
messages0.0.log;7
messages1.0.log;7
messages0.0.log;6
messages1.0.log;6
messages0.0.log;5
messages1.0.log;5
messages2.0.log;5
messages2.1.log;5
messages0.0.log;4
messages1.0.log;4
messages2.0.log;4
messages2.1.log;4
messages2.0.log;3
messages2.1.log;3
messages0.0.log;3
messages0.2.log;3
messages0.3.log;3
messages1.0.log;3
messages2.0.log;3
messages2.1.log;3
messages0.3.log;2
messages0.2.log;2
messages0.0.log;2
messages1.0.log;2
messages2.0.log;2
messages2.1.log;2
messages0.0.log;1
messages0.2.log;1
messages0.3.log;1
messages1.0.log;1
messages1.1.log;1
messages2.0.log;1
messages2.1.log;1
My desired output is
messages1.0.log;12
messages0.0.log;8
messages2.0.log;5
messages2.1.log;5
messages0.2.log;3
messages0.3.log;3
messages1.1.log;1
#!/usr/bin/perl
use strict;
use warnings;
my $STAT = ".logstatistics";
open( STAT, '>', $STAT ) or die $!;
my #new_array = sort { $b->{id} <=> $a->{id} } #array;
# Print Log statistics
foreach my $entry ( #new_array ) {
print STAT join ';', $entry->{file}, "$entry->{id}\n";
}
close( STAT );
To help me with the analysis I've written the following code to load the array from a file
open( STAT, $STAT );
while ( <STAT> ) {
my #lines = split /\n/;
my ( $file, $id ) = $lines[0] =~ /\A(.\w.*);(\d.*)/;
push #array, { file => $file, id => $id, };
}
close( STAT );
I've solved my problem with an if statement into data loading into #array.
if the old value of the file name is the same as the current value it is skipped.
In this way, I have only one value for each file.
This seems to do what you want.
#!/usr/bin/perl
use strict;
use warnings;
use feature 'say';
# This seems to be the data structure that you are working with
my #data = ( {
file => 'messages1.0.log', id => 12,
}, {
file => 'messages1.0.log', id => 11,
}, {
file => 'messages1.0.log', id => 10,
}, {
file => 'messages1.0.log', id => 9,
}, {
file => 'messages0.0.log', id => 8,
}, {
file => 'messages1.0.log', id => 8,
}, {
file => 'messages0.0.log', id => 7,
}, {
file => 'messages1.0.log', id => 7,
}, {
file => 'messages0.0.log', id => 6,
}, {
file => 'messages1.0.log', id => 6,
}, {
file => 'messages0.0.log', id => 5,
}, {
file => 'messages1.0.log', id => 5,
}, {
file => 'messages2.0.log', id => 5,
}, {
file => 'messages2.1.log', id => 5,
}, {
file => 'messages0.0.log', id => 4,
}, {
file => 'messages1.0.log', id => 4,
}, {
file => 'messages2.0.log', id => 4,
}, {
file => 'messages2.1.log', id => 4,
}, {
file => 'messages2.0.log', id => 3,
}, {
file => 'messages2.1.log', id => 3,
}, {
file => 'messages0.0.log', id => 3,
}, {
file => 'messages0.2.log', id => 3,
}, {
file => 'messages0.3.log', id => 3,
}, {
file => 'messages1.0.log', id => 3,
}, {
file => 'messages2.0.log', id => 3,
}, {
file => 'messages2.1.log', id => 3,
}, {
file => 'messages0.3.log', id => 2,
}, {
file => 'messages0.2.log', id => 2,
}, {
file => 'messages0.0.log', id => 2,
}, {
file => 'messages1.0.log', id => 2,
}, {
file => 'messages2.0.log', id => 2,
}, {
file => 'messages2.1.log', id => 2,
}, {
file => 'messages0.0.log', id => 1,
}, {
file => 'messages0.2.log', id => 1,
}, {
file => 'messages0.3.log', id => 1,
}, {
file => 'messages1.0.log', id => 1,
}, {
file => 'messages1.1.log', id => 1,
}, {
file => 'messages2.0.log', id => 1,
}, {
file => 'messages2.1.log', id => 1,
});
my %stats;
# Walk your input data, making a note of the highest
# id associated with every file.
for (#data) {
if (($stats{$_->{file}} // 0) < $_->{id}) {
$stats{$_->{file}} = $_->{id};
}
}
# Walk the %stats hash in sorted order, printing
# the file and the maximum associated id.
for ( sort my_clever_sort keys %stats) {
say join ';', $_, $stats{$_};
}
# (Slightly) clever sorting algorithm
sub my_clever_sort {
# Extract the floating point numbers from the filenames
my ($str_num_a) = $a =~ /(\d+\.\d+)/;
my ($str_num_b) = $b =~ /(\d+\.\d+)/;
# Sort by id (descending) and then filename (ascending)
return ($stats{$b} <=> $stats{$a}) || ($str_num_a <=> $str_num_b);
}
Instead of
my #new_array = sort { $a->{id} cmp $b->{id} } #array;
try this
my #new_array = sort { $a->{id} <=> $b->{id} } #array;
The <=> operator treats the fields to compare as numbers instead of strings. It will treat 10 as greater than 3, so it will treat 10 as greater than 03.
The cmp operator treats your values as strings, so it will sort 21 before 3 just as it would sort BA before C.
I have this hash that contains some information:
my %hash = (
key_1 => {
year => 2000,
month => 02,
},
key_2 => {
year => 2000,
month => 02,
},
key_3 => {
year => 2000,
month => 03,
},
key_4 => {
year => 2000,
month => 05,
},
key_5 => {
year => 2000,
month => 01,
}
);
I wan't to create an array of hashes in which each of the array elements, lists every single hash key/value pairs that has the same year and month.
So basically I want to create something like this:
$VAR1 = [
'key_1' => {
'month' => 2,
'year' => 2000
},
'key_2' => {
'month' => 2,
'year' => 2000
}
], [
'key_3' => {
'month' => 3,
'year' => 2000
}
], [
'key_4' => {
'month' => 3,
'year' => 2000
}
], [
'key_5' => {
'year' => 2000,
'month' => 1
}
];
The real question here is: How can I compare a hash key key value's to other key key value's and make a map out of it.
Thank you for your time! =)
I'm getting a slightly different results - key_3 and key_4 should belong to the same group.
my %by_year_and_month;
undef $by_year_and_month{ $hash{$_}{year} }{ $hash{$_}{month} }{$_}
for keys %hash;
my $result;
for my $year (keys %by_year_and_month) {
for my $month (keys %{ $by_year_and_month{$year} }) {
push #$result, [ map { $_ => { month => $month, year => $year } }
keys %{ $by_year_and_month{$year}{$month} } ];
}
}
my %hash = (
key_1 => {year => 2000, month => 02},
key_2 => {year => 2000, month => 02},
key_3 => {year => 2000, month => 03},
key_4 => {year => 2000, month => 03},
key_5 => {year => 2000, month => 01}
);
my #v=qw(year month); # keys compared
my #k=sort keys %hash;
my(#a,%i);
push#{$a[sub{$i{join$;,map$_[0]{$_},#v}//=#a}->($hash{$_})]},($_=>$hash{$_}) for#k;
use Data::Dumper; print Dumper(\#a);
You say you want the following, but it makes no sense.
$VAR1 = [$k,$v,$k,$v...], [...], [...], ...;
The accepted answer assumed you meant you want the following:
$VAR1 = [ [$k,$v,$k,$v,...], [...], [...], ... ];
That can be obtained as follows:
sub group_id { sprintf "%04d-%02d", #{ $_[0] }{qw( year month )} }
my %grouped;
for my $k (keys(%hash)) {
my $v = $hash{$k};
push #{ $grouped{ group_id($v) } }, $k, $v;
}
my $VAR1 = [ map { $grouped{$_} } sort keys %grouped ];
But that's a very weird format. The following would make far more sense:
$VAR1 = [ { $k=>$v,$k=>$v,...}, {...}, {...}, ... ];
That can be obtained as follows:
sub group_id { sprintf "%04d-%02d", #{ $_[0] }{qw( year month )} }
my %grouped;
for my $k (keys(%hash)) {
my $v = $hash{$k};
$grouped{ group_id($v) }{$k} = $v;
}
my $VAR1 = [ map { $grouped{$_} } sort keys %grouped ];
In both cases, the key is to use a hash to group similar things.
In both cases, the resulting groups are ordered chronologically.
I am facing an issue with sorting an array of hashes; hashes having alphanumeric values by which I need to sort.
Refer to this question! My question is an extension of this problem.
I tried the solution in the above question but didn't get the required output.
$arr_ref = [
{ brand => "A.1", PO => "1.a", supplier => "X" },
{ brand => "A.2", PO => "2.a", supplier => "X" },
{ brand => "B.1", PO => "1.b", supplier => "X" },
{ brand => "B.2", PO => "2.b", supplier => "X" },
{ brand => "B.3", PO => "1.c", supplier => "Y" },
]
I need to sort by Brand or PO.
#sort the array reference and place the array back into the standard_set
$arr_ref = [sort by_brand #$arr_ref];
sub by_brand {
$a->{brand} cmp $b->{brand}
}
Complexity is the key; can start with numeric or alphabetic character.
The Brand or PO can be of different size, as well. The delimiter may be a dot or hypen.
Can we solve this depending on the input received in $arr_ref?
You were really close. You just need to remove the [ and ] brackets and dereference the array you want to assign to #$array_ref = ....
use strict;
use warnings;
my $arr_ref = [
{ brand => "B.3", PO => "1.c", supplier => "Y" },
{ brand => "B.2", PO => "2.b", supplier => "X" },
{ brand => "B.1", PO => "1.b", supplier => "X" },
{ brand => "A.2", PO => "2.a", supplier => "X" },
{ brand => "A.1", PO => "1.a", supplier => "X" },
];
my #sorted = sort { $a->{brand} cmp $b->{brand} } #$arr_ref;
use Data::Dump;
dd #sorted;
Output:
(
{ brand => "A.1", PO => "1.a", supplier => "X" },
{ brand => "A.2", PO => "2.a", supplier => "X" },
{ brand => "B.1", PO => "1.b", supplier => "X" },
{ brand => "B.2", PO => "2.b", supplier => "X" },
{ brand => "B.3", PO => "1.c", supplier => "Y" },
)
Try this sort method:
sub by_brand_or_po {
my ($a_ba, $a_bn) = split /\.|-/, $a->{brand};
my ($b_ba, $b_bn) = split /\.|-/, $b->{brand};
my ($a_pa, $a_pn) = split /\.|-/, $a->{PO};
my ($b_pa, $b_pn) = split /\.|-/, $b->{PO};
return( $a_ba cmp $b_ba or $a_bn <=> $b_bn or
$a_pa cmp $b_pa or $a_pn <=> $b_pn );
}
It prioritizes the brand alpha over numeric over PO alpha over PO numeric, and will split over a dot or hyphen.
Sort it using Schwartzian transform and correct comparator for each field.
use strict;
use warnings;
use Data::Dumper;
my $arr_ref = [
{ brand => "A.1", PO => "1.a", supplier => "X" },
{ brand => "A.2", PO => "2.a", supplier => "X" },
{ brand => "B.1", PO => "1.b", supplier => "X" },
{ brand => "B.2", PO => "2.b", supplier => "X" },
{ brand => "B.3", PO => "1.c", supplier => "Y" },
];
my #sorted = map $_->[0], sort {
$a->[1][0] cmp $b->[1][0]
or $a->[1][1] <=> $b->[1][1]
or $a->[1][2] <=> $b->[1][2]
or $a->[1][3] cmp $b->[1][3]
} map [ $_, [ map split( /[.-]/, $_, 2 ), #$_{qw(brand PO)} ] ],
#$arr_ref;
print Dumper( \#sorted );
Try use brand "A.10" to spot the difference.
use Sort::Key::Natural!
use Sort::Key::Natural qw(natkeysort);
my #sorted_by_brand = natkeysort { $_->{brand} } #data;
my #sorted_by_po = natkeysort { $_->{po} } #data;