#!/usr/bin/perl
#
# zip_policy.pl 0.02
#
# Enforces file name and encrypted content policy inside ZIP archives.
# Exits with return value of 1 if banned or encrypted content is
# found. DOES NOT modify the zip file... use this to reject or
# remove an entire ZIP file attachment that has banned content.
#
# For use with Anomy Sanitizer 1.56+ (http://mailtools.anomy.net/)
#
#    Copyright 2004 Advosys Consulting Inc., Ottawa
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
# Use this as the scanner for attachments named "*.zip"
#
# Requires the following Perl modules:
#	- Archive::Zip v0.12 or later
#   - Additional modules required by Archive::Zip (see Required Modules
#     section of Archive::Zip documentation)
#
# Example uses:
#
# USAGE ONE: You use Anomy only to enforce e-mail attachment policies
# and rely on an external downstream SMTP anti-virus product to actually
# scan e-mail for malware:
#
#    file_list_1 = (?i)(\.zip\s*)
#    file_list_1_policy = accept:drop:drop:drop
#    file_list_1_scanner = 0:::/usr/local/bin/zip_policy.pl %FILENAME
#
# Note: policy "accept" tells Anomy to skip all remaining rules.
#
# USAGE TWO: You use Anomy to both enforce e-mail attachment policies
# and also to use a command-line antivirus product to look for for malware:
#
#    file_list_1 = (?i)(\.zip\s*)
#    file_list_1_policy = unknown:drop:drop:drop
#    file_list_1_scanner = 0:::/usr/local/bin/zip_policy.pl %FILENAME
#
#    file_list_2 = .*
#    file_list_2_policy = accept:accept:save:save
#    file_list_2_scanner = 0:5:3,4:/usr/local/bin/avp.sh %FILENAME
#
# Note: Policy "unknown" tells Anomy to invoke the next rule, so a zip
# accepted by the policy filter will still be scanned for viruses. DO
# NOT USE "accept" for zip_policy.pl or the virus scanner will never be called!
#
# IMPLEMENTATION NOTES:
#
# Detecting encrypted files:
# 	If the ZIP uses the newer encryption schemes created by Winzip
# 	or PKWare's PKZip, this program will either detect it
# 	or die horribly with exit code 255.
# 	Set up Anomy to reject any exit code other than 1 to be safe.
#
# Multi-volume archives
# 	ZIP files created as disk-spanning (muti-volume) will
# 	be rejected. Exit code will be 255
#
# Changelog:
#   v0.01 Initial release
#   v0.02 Regexp now case insensitive and looks for trailing whitespace.
#         Checks for Zip::Archive > .11 before using eocdOffset.



# USER SETTINGS:

# Regular expression of files not permitted in ZIP:
my $filelist = '(?i)\.([23]86|vb[se]|jse|cpl|crt|chm|cpl|in[fsi]|isp|dll|drv|msi|cmd|sc[rt]|sys|bat';
$filelist .= '|pif|lnk|hlp|ms[cip]|reg|asd|sh[bs]|app|ocx|htt|hta|mht|url|exe|ws[cfh]|ops|com';
$filelist .= '|mim|uue?|b64|b[hq]x|xxe)\s*';

# Permit encrypted (and therefore UNSCANNABLE) content?
my $ForbidEncrypted = 1;

# Maximum depth for zips that contain zips. 
# 5 is usually enough. Do not set too high
# or you could be DoS'd by a zip bomb.
my $MaxZipDepth = 5;

# Maximum file size to attempt to extract (bytes)
# Again, do not set too high or you can be attacked
# by a zip bomb.
my $MaxFileSize = 104857600;

# Directory for temp files
# (Suggestion: use a tmpfs partition if possible)
my $tmpdir = '/var/tmp';

# Set $DEBUG =1 for verbose output on STDERR:
my $DEBUG = 1;

# Fill in the following to send STDOUT to a file (for debugging only):
my $LOGFILE = '';

# ---------------------------------------
# Few user servicable parts below

# Force all variables to be declared:
use strict;

# Redirect STDERR to a file if required:
open(STDERR, ">>$LOGFILE") or die "Cannot open log file $LOGFILE: $!\n" if $LOGFILE;

# Import the required libraries:
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );

# Declare global variables:
my $curdepth = 0;	# Current recursion depth
my $crypted = 0;	# Total encrypted files seen
my $disallowed = 0;	# Total files matching regular expression

# Get name of zip file from parameter 1 on command line:
my $zipfilename = shift or die "Usage: $0 /path/to/filename.zip\n";

# Do a few basic checks on parameter:
unless ( -f $zipfilename ) {
	die "Cannot find file '$zipfilename'\n";
}

# Call the main routine:
checkzip($zipfilename);

# Determine results:
print STDERR "Forbidden files = $disallowed, Encrypted = $crypted\n" if $DEBUG;
if ( $disallowed or ($crypted and $ForbidEncrypted) ) {
	exit 1;
}

exit 0;

## Subroutines:

sub checkzip {
# Check contents of zip file for:
#   - files matching regular expression
#   - Encrypted files
#   - Multivolume ZIP headers

	# Define local variables:
	my ($tmpname, $fh, $element, $elementname, $status, $tempname);

	# Grab file name from parameter 1
	my $zipfilename = shift;

	# Increment recursion counter:
	$curdepth++;

	print STDERR "Opening $zipfilename\n" if $DEBUG;
	my $zip = Archive::Zip->new();
	die "Error reading $zipfilename\n" unless $zip->read( $zipfilename ) == AZ_OK;
	my @members = $zip->members();

	# Abort if this is a multi-disk zip:
	die "Multi-volume zip file. Cannot process.\n" if $zip->diskNumberWithStartOfCentralDirectory();

	# Warn about file weirdness (characters prepended to zip, such as a virus would do): 
	if ( $zip->VERSION > .11 and $zip->eocdOffset())
		{
			warn "Zip directory offset doesn't match file! Possibly altered.\n";
	}
	
	# Tally total files matching regexp:
	$disallowed += $zip->membersMatching( $filelist );

	# Now scan through each ZIP file member looking for
	# embedded ZIP files and encrypted files
	foreach $element (@members) {
		# Increment number of encrypted elements:
		$crypted++ if $element->isEncrypted();
		$elementname = $element->fileName();

		# Print internal filename and "(+)" if encrypted:
		print STDERR " $elementname", $element->isEncrypted() ? ' (+)': '', "\n" if $DEBUG;

		# If member filename ends in .ZIP, call this routine again (up to MaxZipDepth times):
		if ( $elementname =~ /\.zip\s*$/i ) {
			if ( $curdepth < $MaxZipDepth ) {
				# Check uncompressed file size:
				die "Embedded ZIP '$elementname' too large to inspect.\n" if $element->uncompressedSize() > $MaxFileSize;
				# Unzip to a temp file:
				# (Yes, using Archive::Zip::tempName would be better but doesn't seem to
				# exist in Archive::Zip 1.09)
				($fh, $tmpname) = Archive::Zip::tempFile( $tmpdir );
				$status = $element->extractToFileNamed( $tmpname );
				die "error $status" unless $status == AZ_OK;
				$fh->close;
				# Remember this temp file name:
				# Recursively call this subroutine to check this embedded zip:
				checkzip($tmpname);
				# ...and finally try to tidy the temp file:
				unlink($tmpname) or warn "Unable to delete tempfile $tmpname\n";	
			} else {
				die "ZIP file exceeded depth of $MaxZipDepth.\n";
			}	
		}
	}
	$curdepth--;
}

