Friday, December 19, 2014

Perl Compare two files search match fields

more file1
798355.2    3188172.8
800105.4    3188210.2
797867.9    3188222.7
798267.9    3188222.7
798667.3    3188222.6

more file2
798355.4    3188172.7   2000.00    1    10
800105.4    3188210.2   3000.00    2    20
797867.9    3188222.7   1684.05    3    30
798267.9    3188222.7   8000.00    4    40
798667.9    3188222.7   1700.46    5    50

output
798355.2    3188172.8    NA    NA    NA
800105.4    3188210.2    3000.00    2    20
797867.9    3188222.7    1684.05    3    30
798267.9    3188222.7    8000.00    4    40
798667.3    3188222.6    NA    NA    NA

#!/usr/bin/perl
my $file1 = 'file1.txt';
my @data1;
open( FILE1, $file1 ) or die "Can't open file '$file1': $!";
while( <FILE1> ) {
chomp;
my @row1 = split;
push @data1, \@row1;
}
close( FILE1 );

my $file2 = 'file2.txt';
my @data2;
open( FILE2, $file2 ) or die "Can't open file '$file2': $!";
while( <FILE2> ) {
chomp;
my @row2 = split;
push @data2, \@row2;
}
close( FILE2 );

for my $i (0..$#data1)   #no row slave
{
for my $j (0..$#data2)   #no row master
{
$dx[$i]=($data1[$i][0]-$data2[$j][0])*($data1[$i][0]-$data2[$j][0]);  #key1 match
$dy[$i]=($data1[$i][1]-$data2[$j][1])*($data1[$i][1]-$data2[$j][1]);  #key2 match
$dz[$i]=($dx[$i]+$dy[$i]); #distance
push @dza,$dz[$i];
}
push @dzt,$dza[minindex(\@dza)];
push @dzt2,$data2[minindex(\@dza)][2];  #get data col 3
push @dzt3,$data2[minindex(\@dza)][3];  #get data col 4
push @dzt4,$data2[minindex(\@dza)][4];  #get data col 5

@dza=();
}

for my $i (0..$#data1)
{
print "$data1[$i][0]","\t";
print "$data1[$i][1]","\t";
if($dzt[$i]==0)
   {
print "$dzt2[$i]","\t";
print "$dzt3[$i]","\t";
print "$dzt4[$i]","\n";
   } else
{
print "NA","\t";
print "NA","\t";
print "NA","\n";
}
}


sub minindex {
  my( $aref, $idx_min ) = ( shift, 0 );
  $aref->[$idx_min] < $aref->[$_] or $idx_min = $_ for 1 .. $#{$aref};
  return $idx_min;
}








No comments: