#!/usr/bin/perl -s
umask 077;

# xls2tsv - filter Excel worksheets into tab-separated text
# Steve Kinzler, steve@kinzler.com, Feb 05
# https://kinzler.com/me/home.html#unix

# Note: could enhance for xlsx support with Spreadsheet::XLSX
#	(but doesn't have the whole ::Simple interface)
#	see also ssconvert(1) from Gnumeric, used in vshnurc
use Spreadsheet::ParseExcel::Simple;
use File::Temp qw/tempdir tempfile/;

$usage = <<EOF;
usage: $0 [ -s ] [ -t ] [ -w ] [ -f ] [ file.xls ]
	-s	don't delete whitespace from the beginning/end of data strings
	-t	don't drop empty data past last non-empty first-line data col
	-w	don't separate worksheets with a blank line
	-f	only output the first worksheet
EOF
die $usage if $h || $#ARGV > 0;

if (@ARGV) {
	$file = $ARGV[0];
} else {
	$tdir = tempdir(CLEANUP => 1) || die $!;
	($fh, $file) = tempfile(DIR => $tdir, SUFFIX => '.xls');
	print $fh $_ while <>;
	close $fh;
}

@sheets = Spreadsheet::ParseExcel::Simple->read($file)->sheets;
@sheets = ($sheets[0]) if $f;

while (@sheets) {
	$sheet = shift @sheets;
	$row   = 0;
	while ($sheet->has_data) {
		@data = $sheet->next_row; $row++;
		map { s/\t/<TAB>/gs; s/\n/<NL>/gs; s/\r/<CR>/gs } @data;
		map { s/^\s*//; s/\s*$// } @data unless $s;

		unless ($t) {
			if ($row == 1) {
				$ncol = $#data + 1;
				pop @data, $ncol-- while $data[$#data] eq '';
			} else {
				pop @data while $#data + 1 > $ncol &&
						$data[$#data] eq '';
			}
		}

		print join("\t", @data), "\n";
	}
	print "\n" if ! $w && @sheets;
}
