#!/usr/bin/perl -s

# matchcol - merge two tab-separated tables by matching on a common column
# Steve Kinzler, steve@kinzler.com, Sep 03/Sep 04/Feb 07
# https://kinzler.com/me/home.html#unix

$usage = "usage: $0 [ -l ] [ -x ] [ -X ] [ -a ] [ -m | -M ]
       file1 col1 file2 col2
	-l	abut the last matching line of file2 instead of the first
	-x	exclude the matched col2 in abutted file2 lines
	-X	exclude col1 in file1 lines
	-a	don't abut the file2 line
	-m	don't output file1 lines that don't have a file2 match
	-M	output only file1 lines that don't have a file2 match
Files are interpreted as tab-separated tables.  Input columns are numbered
as integers from zero.  Matching is the string equivalence of the given
column values.  By default, the output is file1 with lines abutted with
any first matching line of file2, possibly reused.\n";
die $usage if $h || $#ARGV != 3 || $ARGV[1] !~ /^\d+$/
		 || $m && $M	|| $ARGV[3] !~ /^\d+$/;

$i = $ARGV[3];
open(FILE2, "< $ARGV[2]") || die "$0: cannot open $ARGV[2] ($!)\n";
while (<FILE2>) {
	chop;
	$v = (@_ = split(/\t/))[$i];
	splice(@_, $i, 1), $_ = join("\t", @_) if $x;
	$file2{$v} = $_ if $l || ! defined $file2{$v};
}
close FILE2;

$i = $ARGV[1];
open(FILE1, "< $ARGV[0]") || die "$0: cannot open $ARGV[0] ($!)\n";
while (<FILE1>) {
	chop;
	$v = (@_ = split(/\t/))[$i];
	next if $m && ! defined $file2{$v};
	next if $M &&   defined $file2{$v};
	splice(@_, $i, 1), $_ = join("\t", @_) if $X;
	print $_;
	print "\t", $file2{$v} if ! $a && defined $file2{$v};
	print "\n";
}
close FILE1;