#!/usr/bin/perl -ws
use strict;
$|++;

# stripmime - attempt to strip any HTML alt and attachments from MIME email
# Steve Kinzler, steve@kinzler.com, Jun 09
# https://kinzler.com/me/home.html#other

# derived from a script by Randal L Schwartz, merlyn@stonehenge.com, Jan 01
# http://www.perlmonks.org/index.pl?node_id=53404

# This is safe for all emails, but we suggest restricting to those with
# /boundary/ in the header's Content-Type for efficiency.  This script
# assumes some conventional MIME structures and may not work as desired
# with others, just possibly leaving things unstripped.  It also assumes
# all lines beginning with "From " after a blank line are mailbox envelopes.

my $usage = "usage: $0 [ -t | -a ] [ -r ] [ -b | -f ] [ file ... ]
	-t	strip any HTML alternative in the usual places (default)
	-a	also strip any attachments in the usual places
	-r	don't strip CRs from CR-NL line endings in the plaintext
	-b	take input as a mbox and filter all messages
	-f	take input as a mbox and filter first message\n";
die $usage if $::h; 1 if $::h || $::t || $::t || $::a || $::b || $::r;

use MIME::Parser;
use MIME::Entity;
use MIME::Words qw(:all);

my $parser = MIME::Parser->new;
$parser->output_to_core(1);
$parser->tmp_to_core(1);

my @msgs = ();
if ($::b || $::f) {
	@msgs = split(/\n\nFrom /, join('', <>));
	for (0 .. $#msgs - 1) { $msgs[$_] .= "\n\n"		 }
	for (1 .. $#msgs)     { $msgs[$_]  = 'From ' . $msgs[$_] }
} else {
	@msgs = (join('', <>));
}

my $didone = 0;
foreach (@msgs) {
	print, next if $::f and $didone++;

	my @msg  = split(/\n/, $_ . 'EOMsg'); pop @msg;
	for (0 .. $#msg) { $msg[$_] .= "\n" }
	exit 0		if ! @msg;
	my $envT = (@msg  and $msg[0] =~ /^From /) ? shift @msg : '';
	warn("$0: passing thru (not a mail message)\n"), print($envT, @msg),
		exit 0  if ! @msg or $msg[0] !~ /^([-\w]+:\s|$)/;
	my $envB = ($envT and $msg[$#msg] =~ /^$/) ? pop @msg   : '';
	my $ent  = eval { $parser->parse_data(join('', @msg)) };
	warn($@), print($envT, @msg, $envB), exit 1 if $@;
	@msg	 = ();

	my $did = 0;
	$did += &striphtml($ent);
	$did += &striphtml($ent->parts(0)) and
		    $ent->sync_headers(Nonstandard => 'ERASE')
			if &mixed($ent->effective_type);
	$did += &stripattach($ent) if $::a;

	# 2013-01-07: just to eg convert base64 messages to suggested
	&strip($ent)	       if ! $did && $ent->parts <= 1
			       && $ent->effective_type =~ /^text\//;
	&strip($ent->parts(0)) if ! $did && $ent->parts > 1
		     && $ent->parts(0)->effective_type =~ /^text\//;

	# 2014-02-21:
	&decode($ent->head, 'Subject');

	print $envT;
	$ent->print;
	print $envB;
}

###############################################################################

sub mixed {
	$_[0] eq 'multipart/mixed' or $_[0] eq 'multipart/related';
}

sub striphtml {
	my $ent = $_[0];
	return 0 unless $ent->effective_type eq 'multipart/alternative'
		    and $ent->parts == 2
		    and $ent->parts(0)->effective_type eq 'text/plain'
		    and $ent->parts(1)->effective_type eq 'text/html';
	&strip($ent); 1;
}

sub stripattach {
	my $ent = $_[0];
	return 0 unless &mixed($ent->effective_type)
		    and $ent->parts > 1
		    and $ent->parts(0)->effective_type eq 'text/plain';
	&strip($ent); 1;
}

sub strip {	# just re-encodes to suggested encoding if already singlepart
	my $ent = $_[0];
	$ent->make_multipart if $ent->parts <= 1;
	my $txt = $ent->parts(0)->bodyhandle->as_string;
	$txt =~ s/\r\n/\n/g unless $::r;
	$ent->parts([MIME::Entity->build(
	#	Data => $ent->parts(0)->body_as_string		# bug in orig
	#	. "\n\n[[HTML alternate version deleted]]\n");	# don't want
		Data => $txt, Encoding => -SUGGEST,
		Type => $ent->parts(0)->effective_type)]);
	$ent->make_singlepart;
	$ent->sync_headers(Nonstandard => 'ERASE');
}

sub decode {
	$_[0]->replace($_[1], scalar decode_mimewords($_[0]->get($_[1])));
}
