First off, I have to find the existence of the pass and fail files in the subdirectories. Then, I need to read the first line of the pass/fail file. I thought of separating the $file1 and $file to differentiate it. I'm very new to perl so I know my approach is very bad.
I trying to figure out how to combine my current code to read the files I checked exists.
use strict;
use File::Find 'find';
my $file = 'pass.txt';
my $file1 = 'fail.txt';
my #directory = ('ram1','ram2');
sub check
{
if ( -e $_ && $_ eq $file )
{
print "Found file '$_' in directory '$File::Find::dir'\n";
}
elsif ( -e $_ && $_ eq $file1 )
{
print "Found file '$_' in directory '$File::Find::dir'\n";
}
}
find (\&check,#directory);
Is it possible I use the code below for the first if condition? I know it doesn't work but I'm not sure what to do next as the fail and pass text are inside the directories.
if (open my $File::Find::dir, '<', $file){
my $firstLine = <$File::Find::dir>;
close $firstLine;
Any suggestions would be helpful!
If you just want to look just in ram1 and ram2, there's no point in using File::Find. That is used for recursive searches, meaning if you want to search all the subdirectories of ram1 and ram2. (And for that, I'd use File::Find::Rule over File::Find; it's much cleaner.)
my #dir_qfns = ( 'ram1', 'ram2' );
for my $dir_qfn (#dir_qfns) {
for my $fn ('pass.txt', 'fail.txt') {
my $file_qfn = "$dir_qfn/$fn";
open(my $fh, '<', $file_qfn)
or warn("Can't open \"$file_qfn\": $!\n"), next;
defined( my $first_line = <$fh> )
or warn("\"$file_qfn\" is empty\n"), next;
print("$file_qfn: $first_line");
}
}
If it's ok for a file to be missing, then you can ignore that error (ENOENT).
Similarly, you don't need to output an error message if the file is empty.
my #dir_qfns = ( 'ram1', 'ram2' );
for my $dir_qfn (#dir_qfns) {
for my $fn ('pass.txt', 'fail.txt') {
my $file_qfn = "$dir_qfn/$fn";
my $fh;
if (!open($fh, '<', $file_qfn)) {
warn("Can't open \"$file_qfn\": $!\n") if $!{ENOENT};
next;
}
defined( my $first_line = <$fh> )
or next;
print("$file_qfn: $first_line");
}
}
if (open my $f, '<', 'pass.txt') {
my $firstLine = <$f>;
close $f;
}
OP's code does not make much sense. Perhaps OP is looking for something of next kind
use strict;
use warnings;
use feature 'say';
my $dir = shift || 'some_dir_to_start_from';
my #files = qw/pass.txt fail.txt/;
my $match = join '|', #files;
my $regex = qr/\b($match)\b/;
files_lookup($dir,$regex);
exit 0;
sub files_lookup {
my $dir = shift;
my $re = shift;
for ( glob("$dir/*") ) {
files_lookup($_) if -d;
next unless /$re/;
if( -f ) {
say "File: $_";
open my $fh, '<', $_
or die "Couldn't open $_";
my $line = <$fh>;
say $line;
close $fh;
}
}
Related
I am trying to findy the common lines among two tab separated files based one field.
One line of the first file:
1 52854 s64199.1 A . . . PR GT 0/0
One line of the second file:
chr1 52854 . C T 215.302 . AB=0.692308;ABP=7.18621;AC=1;AF=0.5;AN=2;AO=9;CIGAR=1X;DP=13;DPB=13;DPRA=0;EPP=3.25157;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=17.5429;PAIRED=0;PAIREDR=0.25;PAO=0;PQA=0;PQR=0;PRO=0;QA=318;QR=138;RO=4;RPP=3.25157;RPPR=5.18177;RUN=1;SAF=0;SAP=22.5536;SAR=9;SRF=1;SRP=5.18177;SRR=3;TYPE=snp;technology.illumina=1;BVAR GT:DP:RO:QR:AO:QA:GL 0/1:13:4:138:9:318:-5,0,-5
Based on the second field (52854) in this example I have a much.
Here is my code which finds the common ones, but my files are quite large and takes a lot of time.
Is there any way to speed up the process?
Thank you very much in advance.
#!/app/languages/perl/5.14.2/bin/perl
use strict;
use warnings;
my $map_file = $ARGV[0];
my $vcf_file = $ARGV[1];
open my $map_info, $map_file or die "Could not open $map_file: $!";
my #map_array = ();
my #vcf_array = ();
while( my $mline = <$map_info>) {
chomp $mline;
my #data1 = split('\t', $mline);
my $pos1 = $data1[1];
push (#map_array, $pos1);
}
open my $vcf_info, $vcf_file or die "Could not open $vcf_file: $!";
while( my $line = <$vcf_info>) {
if ($line !~ m/^#/) {
push (#vcf_array, $line);
}
}
foreach my $a (#map_array) {
chomp $a;
foreach my $b (#vcf_array) {
chomp $b;
my #data = split('\t', $b);
my $pos2 = $data[1];
my $ref2 = $data[3];
my $allele = $data[4];
my $genotype = $data[9];
if ($a == $pos2) {
print $pos2 . "\t" . $ref2. "\t".$allele."\t".$genotype. "\n";
#print "$b\n";
}
}
}
Here's a version that should run much faster than your own
It reads the map file and stores each pos field in hash %wanted. Then it reads through the second file and checks whether the record is in the list of wanted values. If so then it splits the record and prints the fields you require
Note that I haven't been able to test this beyond making sure that it compiles
use strict;
use warnings;
use 5.010;
use autodie;
my ( $map_file, $vcf_file ) = #ARGV;
my %wanted;
{
open my $map_fh, '<', $map_file;
while ( <$map_fh> ) {
chomp;
my $pos = ( split /\t/, $_, 3 )[1];
++$wanted{$pos};
}
}
{
open my $vcf_fh, '<', $vcf_file;
while ( <$vcf_fh> ) {
next if /^#/;
chomp;
my $pos = ( split /\t/, $_, 3 )[1];
next unless $wanted{$pos};
my ( $ref, $allele, $genotype ) = ( split /\t/ )[3, 4, 9];
print join("\t", $pos, $ref, $allele, $genotype), "\n";
}
}
Below please find minimal modification of your script for hash based searches
use strict;
use warnings;
my $map_file = $ARGV[0];
my $vcf_file = $ARGV[1];
my %vcf_hash;
open( my $vcf_info, $vcf_file) or die "Could not open $vcf_file: $!";
while( my $line = <$vcf_info>) {
next if $line =~ m/^#/; # Skip comment lines
chomp $line;
my (#data) = split(/\t/, $line);
die unless #data >= 10; # Check number of fields in the input line
my ($pos) = $data[1];
# $. - line number in the file
$vcf_hash{$pos}{$.} = \#data;
}
open( my $map_info, $map_file) or die "Could not open $map_file: $!";
while( my $mline = <$map_info>) {
chomp $mline;
my (#data) = split(/\t/, $mline);
die unless #data >= 2; # Check number of fields in the input line
my ($pos) = $data[1];
if( exists $vcf_hash{$pos}) {
my $hash_ref = $vcf_hash{$pos};
for my $n (sort{$a<=>$b} keys %$hash_ref) {
my $array_ref = $hash_ref->{$n};
my $pos2 = $array_ref->[1];
my $ref2 = $array_ref->[3];
my $allele = $array_ref->[4];
my $genotype = $array_ref->[9];
print $pos2 . "\t" . $ref2. "\t".$allele."\t".$genotype. "\n";
}
}
}
The script may be improved further to reduce memory use if you use huge data files.
There is not a need to keep your map_file in memory but just keys. It is good to make them keys in a hash which you use for existence checking. You don't have to keep your vcf_file in memory as well, but you can just make a decision to output or not.
#!/app/languages/perl/5.14.2/bin/perl
use strict;
use warnings;
use autodie;
use constant KEY => 1;
use constant FIELDS => ( 1, 3, 4, 9 );
my ( $map_file, $vcf_file ) = #ARGV;
my %map;
{
my $fh;
open $fh, '<', $map_file;
while (<$fh>) {
$map{ ( split /\t/, $_, KEY + 2 )[KEY] } = undef;
}
}
{
my $fh;
open $fh, '<', $vcf_file;
while (<$fh>) {
next if /^#/;
chomp;
my #data = split /\t/;
print join "\t", #data[FIELDS] if exists $map{ $data[KEY] };
}
}
I got stuck with logic behind loops (while & foreach) and AoH. I have basic knowledge about loops and arrays of hashes, but I can't quite understand how to combine them into 1 single and simple solution. My task is to check regular user's password age, if it is older than n-days (last part is OK for me, I know how to solve it, using GetOptions etc,.).
To accomplish that I figured out a solution:
1 Load file /etc/passwd into script, preform regex search to find out regular users. Regular users in Linux like systems have IDs from 1000 and above, so I use this regex to find out those:
/(\w+)[:]x[:]1[0-9]{3}/
2 Load results of regex serch in to array:
my (#Usernames, %pwdsettings);
while (my $pwdsettings = <$fh2>) {
if ($pwdsettings =~ /(\w+)[:]x[:]1[0-9]{3}/) {
$pwdsettings{"Username"} = $1;
push (#Usernames, \%pwdsettings);
}
}
3 Preform chage check for every entry in array:
my $pwdsett_dump = "tmp/pwdsett-dump.txt";
...
foreach (#Usernames) {
system("chage -l $_ > $pwdsett_dump")
}
4 Open $pwdsett_dump and then preform second regex search to get date of last password change. After, load results into existing hash inside array (AoH):
open (my $fh3, "<", $pwdsett_dump) or die "Could not open file '$pwdsett_dump': $!";
while (my $array = <$fh3>) {
if ($array =~ /^Last\s+password\s+change\s+:\s(\w{3})\s+(\d{2}),\s+(\d{4})/) {
$pwdsettings{"Month"} = $1;
$pwdsettings{"Day"} = $2;
$pwdsettings{"Year"} = $3;
}
}
But, somewhere it went terribly wrong. My script loads only 1 user in to AoH, second user is never loaded and I get $VAR1->[0].
What I want is to understand how AoH and loops are created in right way.
Full script:
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my $pwdsett_dump = "tmp/pwdsett-dump.txt";
my $usernames_dump = "tmp/usernames-dump.txt";
system("cat /etc/passwd > $usernames_dump");
open (my $fh2, "<", $usernames_dump) or die "Could not open file '$usernames_dump': $!";
my (#Usernames, %pwdsettings);
while (my $pwdsettings = <$fh2>) {
if ($pwdsettings =~ /(\w+)[:]x[:]1[0-9]{3}/) {
$pwdsettings{"Username"} = $1;
push (#Usernames, \%pwdsettings);
}
}
foreach (#Usernames) {
system("chage -l $_ > $pwdsett_dump")
}
open (my $fh3, "<", $pwdsett_dump) or die "Could not open file '$pwdsett_dump': $!";
while (my $array = <$fh3>) {
if ($array =~ /^Last\s+password\s+change\s+:\s(\w{3})\s+(\d{2}),\s+(\d{4})/) {
$pwdsettings{"Month"} = $1;
$pwdsettings{"Day"} = $2;
$pwdsettings{"Year"} = $3;
}
}
print Dumper \#Usernames;
you need to append the file when you output meaning use ">>" instead of ">" which will overwrite the file.
system("chage -l $_ >> $pwdsett_dump") as you are running it in loop you are overwriting each time the loop executes.
Use:
foreach (#Usernames) {
system("chage -l $_ >> $pwdsett_dump")
}
########sample script
#!/usr/bin/perl
use strict;
use warnings;
my $usernames_dump = "/etc/passwd";
open (my $fh2, "<", $usernames_dump) or die "Could not open file '$usernames_dump': $!";
my #pwdsettings;
my $i =0;
my #pwdsett_dump;
while (<$fh2>) {
if ($_ =~ /(\w+)[:]x[:]1[0-9]{3}/) {
my #user = split(/:/, $_);
$pwdsettings[$i] = $user[0];
$pwdsett_dump[$i] = `chage -l $user[0]|grep Last`;
$pwdsett_dump[$i] =~ s/Last.*://;
$pwdsett_dump[$i] =~ s/,//;
my #m = split(/ /,$pwdsett_dump[$i]);
print "$user[0]\t Date: $m[2] Month: $m[1] Year: $m[3]\n";
$i++;
}
}
Output: testuser Date: 12 Month: May Year: 2015
I have two text files that contain user records. I have to compare these two files and figure out which users are missing from File1. And delete these Orphans from file2.
#!/usr/local/bin/perl -w
use strict;
use warnings;
use autodie;
use Text::Diff;
use List::Compare;
use Data::Dumper;
my $Users1 = "Users1.txt";
my $Users2 ="Users2.txt";
my %hash1;
my %hash2;
my %new_hash;
my #sorted_1;
my #sorted_2;
my #list_keys1;
my #list_keys2;
open(my $fh1, '<:encoding(UTF-8)', $Users1) or die "Colud not open the file!";
while(my $record1 = <$fh1>)
{
chomp $record1;
my #list1 = split( '/', $record1);
foreach my $item(#list1)
{
$new_hash{$list1[1]} = $list1[0];
$hash1{$list1[1]} = $list1[0];
}
while ( my ($key, $value) = each(%hash1) ) {
push (#list_keys1, $key);
#sorted_1 = sort #list_keys1;
}
}
print "\t\tHash values for USERS1:\n";
print Dumper \%hash1;
open(my $fh2, '<:encoding(UTF-8)', $Users2) or die "Colud not open the file!";
while(my $record2 = <$fh2>)
{
chomp $record2;
my #list2 = split( '/', $record2);
foreach my $item(#list2)
{
$hash2{$list2[1]} = $list2[0];
}
while ( my ($key, $value) = each(%hash2) )
{
push (#list_keys2, $key);
#sorted_2 = sort #list_keys2;
}
}
print "\n\n\t\tHash values for Users2:\n";
print Dumper \%hash2;
#hash1{#list_keys1} = 1;
#hash2{#list_keys2} = 1;
foreach(keys %hash2)
{
print "\nThis user does not exist(to be deleted): $_\n" unless exists $hash1{$_};
}
foreach (keys %hash1)
{
print "\nNew User (to be added):$_\n" unless exists $hash2{$_};
}
close ($fh1);
close ($fh2);
Questions:
I am not able to sort the user ID (String) alphabetically(here, USER IDs are random strings of length 7). Is there any limitations when it comes to sorting array/hashes in Perl?
I am not able to compare two hashes and get the differences. What would be the most efficient way to do that?
Are there any additional libraries that I need to install in order to handle this part of code?
Sample records from file:
File1:
ASIA/ASEDF46
INDIA/PSDfT5V
CHINA/FSDfT5V
INDIA/AA44TYB
USA/BBRTT67
File 2:
INDIA/PSDfT5V
CHINA/FSDfT5V
INDIA/AA44TYB
USA/BBRTT67
UK/ZK9EELO
use strict;
use warnings;
use autodie;
open my $in, '<', 'in.txt';
open my $in2, '<', 'in_2.txt';
my (%data1, %data2);
while(<$in>){
chomp;
my #split = split/\//;
$data1{$split[0]} = $split[1];
}
while(<$in2>){
chomp;
my #split = split/\//;
$data2{$split[0]} = $split[1];
}
foreach(sort keys %data1){
print "User: $_ Value: $data1{$_}\n" if $data2{$_};
}
I was checking out this link here: How could I write a Perl script to calculate the MD5 sum of every file in a directory?
It gets the md5 of each file in a specified directory. What i want to do is take those md5's and compare them against an array. This is what i have so far.
use warnings;
use strict;
use Digest::MD5 qw(md5_hex);
my $dirname = "./";
opendir( DIR, $dirname );
my #files = readdir(DIR);
closedir(DIR);
print "#files\n";
foreach my $file (#files) {
if ( -d $file || !-r $file ) { next; }
open( my $FILE, $file );
binmode($FILE);
print Digest::MD5->new->addfile($FILE)->hexdigest, " $file\n";
my #array = ('667fc8db8e5519cacbf8f9f2af2e0b08');
if (#array ~~ $FILE) {
print "matches array", "\n";
} else {
print "doesnt match array", "\n";
}
}
system ( 'pause' )
But with this, i always get doesnt match array no matter if it does match the array perfectly. I can print #array and it will even show the same md5 values of the file. But like i said it just always says "doesnt match array". ive never got it to say "matches array" on any file. Thank you for looking :)
EDIT:
This is what i have now.
use warnings;
use strict;
use Digest::MD5 qw(md5_hex);
my $dirname = "./";
opendir( DIR, $dirname );
my #files = readdir(DIR);
closedir(DIR);
print "#files\n";
foreach my $file (#files) {
next if -d $file || !-r $file;
open( my $FILE, $file );
binmode($FILE);
#print digest::MD5->new->addfile($FILE)->hexdigest, " $file\n";
Sdigest = Digest::MD5->new->addfile($FILE)->hexdigest, " $file\n";
my #array = ('667fc8db8e5519cacbf8f9f2af2e0b08');
if($digest eq $array[0]) {
print "matches array", "\n";
} else {
print "doesnt match array", "\n";
}
}
system ( 'pause' );
Thanks to all for your help. You guys are awesome ;)
Please do not use smartmatch ~~. It was declared experimental in the latest release of Perl, and the semantics are likely to change in the future.
The best solution is to create a hash of the fingerprints you know:
my %fingerprints;
$fingerprints{"667fc8db8e5519cacbf8f9f2af2e0b08"} = undef;
If you want to load a whole array of fingerprints into the hash so that we can easily test for existence, you can use a hash slice:
#fingerprints{#array} = ();
Next, we store the fingerprint of the current file in a variable:
my $digest = Digest::MD5->new->addfile($FILE)->hexdigest;
Then we test if that $digest exists in the hash of fingerprints:
if (exists $fingerprints{$digest}) {
print "$digest for <$file> -- FOUND\n";
}
else {
print "$digest for <$file>\n";
}
Using a hash is usually faster than looping through an array (If you do multiple lookups).
Suggested complete program:
use strict;
use warnings;
use feature qw< say >;
use autodie; # automatic error handling
use Digest::MD5;
my ($dirname, $fingerprint_file) = #ARGV; # takes two command line arguments
length $dirname or die "First argument must be a directory name\n";
length $fingerprint_file or die "Second argument must be a file with fingerprints\n";
# load the fingerprints
my %fingerprints;
open my $fingerprints_fh, "<", $fingerprint_file;
while (<$fingerprints_fh>) {
chomp;
$fingerprints{$_} = undef;
}
close $fingerprints_fh;
opendir my $directory, $dirname;
while(my $file = readdir $directory) {
next if not -f $file;
open my $fh, "<:raw", "$dirname/$file";
my $digest = Digest::MD5->new->addfile($fh)->hexdigest;
close $fh;
if (exists $fingerprints{$digest}) {
say qq($digest "$file" -- FOUND);
}
else {
say qq($digest "$file");
}
}
closedir $directory;
Example invocation
> perl script.pl . digests.txt
Perhaps the following will be helpful:
use warnings;
use strict;
use Digest::MD5 qw(md5_hex);
use File::Basename;
my $dirname = './';
my %MD5s = (
'667fc8db8e5519cacbf8f9f2af2e0b08' => 1,
'8c0452b597bc2c261ded598a65b043b9' => 1
);
for my $file ( grep { !-d and -r } <$dirname*> ) {
open my $FILE, '<', $file or die $!;
binmode $FILE;
my $md5hexdigest = Digest::MD5->new->addfile($FILE)->hexdigest;
close $FILE;
print basename ($file), " md5hexdigest $md5hexdigest ";
if ( $MD5s{$md5hexdigest} ) {
print "matches hash", "\n";
}
else {
print "doesn't match hash", "\n";
}
}
Sample output:
XOR_String_Match.pl md5hexdigest 8c0452b597bc2c261ded598a65b043b9 matches hash
zipped.txt md5hexdigest d41d8cd98f00b204e9800998ecf8427e doesn't match hash
Like this:
my $digest = Digest::MD5->new->addfile($FILE)->hexdigest, " $file\n";
then
if($digest eq $array[0])
By the way, it would maybe be slightly more idiomatic to say (earlier on in your code):
next if -d $file || !-r $file;
I'm relatively new to Perl and I've come across this project that I'm having a bit of a hard time with.
The object of the project is to compare two csv files, one of which would contain:
$name, $model, $version
and the other which would contain:
$name2,$disk,$storage
in the end the RESULT file will contain that matched lines and put together the information like so:
$name, $model, $version, $disk,$storage.
I've managed to do this, but my problem is that when one of the elements in missing the program breaks. When it encounters a line in the file missing an element it stops at that line. How can I fix this problem? any suggestions or a way as to how I can perhaps make it skip that line and continue on?
Here's my code:
open( TESTING, '>testing.csv' ); # Names will be printed to this during testing. only .net ending names should appear
open( MISSING, '>Missing.csv' ); # Lines with missing name feilds will appear here.
#open (FILE,'C:\Users\hp-laptop\Desktop\file.txt');
#my (#array) =<FILE>;
my #hostname; #stores names
#close FILE;
#***** TESTING TO SEE IF ANY OF THE LISTED ITEMS BEGIN WITH A COMMA AND DO NOT HAVE A NAME.
#***** THESE OBJECTS ARE PLACED INTO THE MISSING ARRAY AND THEN PRINTED OUT IN A SEPERATE
#***** FILE.
#open (FILE,'C:\Users\hp-laptop\Desktop\file.txt');
#test
if ( open( FILE, "file.txt" ) ) {
}
else {
die " Cannot open file 1!\n:$!";
}
$count = 0;
$x = 0;
while (<FILE>) {
( $name, $model, $version ) = split(","); #parsing
#print $name;
chomp( $name, $model, $version );
if ( ( $name =~ /^\s*$/ )
&& ( $model =~ /^\s*$/ )
&& ( $version =~ /^\s*$/ ) ) #if all of the fields are blank ( just a blank space)
{
#do nothing at all
}
elsif ( $name =~ /^\s*$/ ) { #if name is a blank
$name =~ s/^\s*/missing/g;
print MISSING "$name,$model,$version\n";
#$hostname[$count]=$name;
#$count++;
}
elsif ( $model =~ /^\s*$/ ) { #if model is blank
$model =~ s/^\s*/missing/g;
print MISSING"$name,$model,$version\n";
}
elsif ( $version =~ /^\s*$/ ) { #if version is blank
$version =~ s/^\s*/missing/g;
print MISSING "$name,$model,$version\n";
}
# Searches for .net to appear in field "$name" if match, it places it into hostname array.
if ( $name =~ /.net/ ) {
$hostname[$count] = $name;
$count++;
}
#searches for a comma in the name feild, puts that into an array and prints the line into the missing file.
#probably won't have to use this, as I've found a better method to test all of the feilds ( $name,$model,$version)
#and put those into the missing file. Hopefully it works.
#foreach $line (#array)
#{
#if($line =~ /^\,+/)
#{
#$line =~s/^\,*/missing,/g;
#$missing[$x]=$line;
#$x++;
#}
#}
}
close FILE;
for my $hostname (#hostname) {
print TESTING $hostname . "\n";
}
#for my $missing(#missing)
#{
# print MISSING $missing;
#}
if ( open( FILE2, "file2.txt" ) ) { #Run this if the open succeeds
#open outfile and print starting header
open( RESULT, '>resultfile.csv' );
print RESULT ("name,Model,version,Disk, storage\n");
}
else {
die " Cannot open file 2!\n:$!";
}
$count = 0;
while ( $hostname[$count] ne "" ) {
while (<FILE>) {
( $name, $model, $version ) = split(","); #parsing
#print $name,"\n";
if ( $name eq $hostname[$count] ) # I think this is the problem area.
{
print $name, "\n", $hostname[$count], "\n";
#print RESULT"$name,$model,$version,";
#open (FILE2,'C:\Users\hp-laptop\Desktop\file2.txt');
#test
if ( open( FILE2, "file2.txt" ) ) {
}
else {
die " Cannot open file 2!\n:$!";
}
while (<FILE2>) {
chomp;
( $name2, $Dcount, $vname ) = split(","); #parsing
if ( $name eq $name2 ) {
chomp($version);
print RESULT"$name,$model,$version,$Dcount,$vname\n";
}
}
}
$count++;
}
#open (FILE,'C:\Users\hp-laptop\Desktop\file.txt');
#test
if ( open( FILE, "file.txt" ) ) {
}
else {
die " Cannot open file 1!\n:$!";
}
}
close FILE;
close RESULT;
close FILE2;
I think you want next, which lets you finish the current iteration immediately and start the next one:
while (<FILE>) {
( $name, $model, $version ) = split(",");
next unless( $name && $model && $version );
...;
}
The condition that you use depends on what values you'll accept. In my examples, I'm assuming that all values need to true. If they need to just not be the empty string, maybe you check the length instead:
while (<FILE>) {
( $name, $model, $version ) = split(",");
next unless( length($name) && length($model) && length($version) );
...;
}
If you know how to validate each field, you might have subroutines for those:
while (<FILE>) {
( $name, $model, $version ) = split(",");
next unless( length($name) && is_valid_model($model) && length($version) );
...;
}
sub is_valid_model { ... }
Now you just need to decide how to integrate that into what you are already doing.
You should start by adding use strict and use warnings to the top of your program, and declaring all variables with my at their point of first use. That will reveal a lot of simple mistakes that are otherwise difficult to spot.
You should also use the three-parameter for of open and lexical filehandles, and the Perl idiom for checking exceptions on opening files is to add or die to an open call. if statements with an empty block for the success path waste space and become unreadable. An open call should look like this
open my $fh, '>', 'myfile' or die "Unable to open file: $!";
Finally, it is much safer to use a Perl module when you are handling CSV files as there are a lot of pitfalls in using a simple split /,/. The Text::CSV module has done all the work for you and is available on CPAN.
You problem is that, having read to the end of the first file, you don't rewind or reopen it before reading from the same handle again in the second nested loop. That means no more data will be read from that file and the program will behave as if it is empty.
It is a bad strategy to read through the same file hundreds of times just to pair up coresponding records. If file is of a reasonable size you should build a data structure in memory to hold the information. A Perl hash is ideal as it allows you to look up the data corresponding to a given name instantly.
I have written a revision of your code that demonstrates these points. It would be awkward for me to test the code as I have no sample data, but if you continue to have problems please let us know.
use strict;
use warnings;
use Text::CSV;
my $csv = Text::CSV->new;
my %data;
# Read the name, model and version from the first file. Write any records
# that don't have the full three fields to the "MISSING" file
#
open my $f1, '<', 'file.txt' or die qq(Cannot open file 1: $!);
open my $missing, '>', 'Missing.csv'
or die qq(Unable to open "MISSING" file for output: $!);
# Lines with missing name fields will appear here.
while ( my $line = csv->getline($f1) ) {
my $name = $line->[0];
if (grep $_, #$line < 3) {
$csv->print($missing, $line);
}
else {
$data{$name} = $line if $name =~ /\.net$/i;
}
}
close $missing;
# Put a list of .net names found into the testing file
#
open my $testing, '>', 'testing.csv'
or die qq(Unable to open "TESTING" file for output: $!);
# Names will be printed to this during testing. Only ".net" ending names should appear
print $testing "$_\n" for sort keys %data;
close $testing;
# Read the name, disk and storage from the second file and check that the line
# contains all three fields. Remove the name field from the start and append
# to the data record with the matching name if it exists.
#
open my $f2, '<', 'file2.txt' or die qq(Cannot open file 2: $!);
while ( my $line = $csv->getline($f2) ) {
next unless grep $_, #$line >= 3;
my $name = shift #$line;
next unless $name =~ /\.net$/i;
my $record = $data{$name};
push #$record, #$line if $record;
}
# Print the completed hash. Send each record to the result output if it
# has the required five fields
#
open my $result, '>', 'resultfile.csv' or die qq(Cannot open results file: $!);
$csv->print($result, qw( name Model version Disk storage ));
for my $name (sort keys %data) {
my $line = $data{$name};
if (grep $_, #$line >= 5) {
$csv->print($result, $data{$name});
}
}