# ----------------------------------------------------------
# TextOCR scanner and image validator SA-plugin v. 3.1
# Written by M. Blapp, ImproWare AG, Switzerland
# ----------------------------------------------------------
# 
# README:
# -------
# 
# textocr.pm is a plugin for spamassassin 3.1+ to detect
# suspect pictures and extract text from them with gocr.
# The OCR dictionary functionaliy has been replaced with
# regexes. The plugin can also verify the validity of the
# pictures and detects spoofing of the content type.
# 
# 
# HISTORY:
# --------
# 
# 31.03.2006, v. 1.00
# 
# Initial revision
# 
# 01.04.2006, v. 1.01
# 
# Added more words to scanlist
# 02.04.2006, v. 1.02
# 
# Check return values of netpbm utils
# 
# 03.04.2006, v 1.1
# 
# Remove the eval function and replace
# it with parsed_metadata(). Now we can
# track errors, count the words we found.
# The plugin detects now forged content type
# entries.
# 
# 03.04.2006, v 1.11
# 
# Add a check for suspect pictures and add some
# score for it. There are new pics going around
# with obfuscated content so ocr scanners are useless
# again :-(
# 
# 04.04.2006, v. 1.2
# 
# The GIF module from Image::ExifTool doesn't recognize
# GIFs without colortable as valid pics and just skips them.
# The result is a matching SPAMPIC_BROKEN_GIF entry which is
# wrong. You should definitly patch your Image::ExifTool installation
# with the provided patch at http://antispam.imp.ch/patches/patch-GIF-Colortable
# 
# 04.04.2006, v. 1.2.1
# 
# Don't scan small pictures, even not for header parsing as it
# seems Image::ExifTool has again problems with this. Fix the
# size calculations.
# 
# 04.04.2006, v. 1.2.2
# 
# Count the non standard Image::ExifTool failures as soft errors
# and add NONSTD_ tests for them.
# 
# 08.04.2006, v 1.3
# 
# Much more words to scan for, added a second method to scan jpeg
# pics which helps with pics having white font and a lot of noisy
# distorts. Rename the SUSPECT_ tests and lower the scores for
# them.
# 
# 08.04.2006, v 1.3.1
# 
# Added two other jpeg scanmethods which give a higher match
# possibility.
# 
# 12.04.2006, v 1.4
# 
# Added a timeout (default 10 seconds) and change the scanmethods.
# Now we scan also normalized pnm files, this seems to help a lot
# on some jpegs. Removed some debug statements.
# 
# 12.04.2006 v. 1.4.1
# 
# Add a scanlimit, only scan a limited number of images.
# Fix a logical error, really redirect all error output to
# stderr as I've implemented some time ago, but now it works.
# 
# 12.04.2006 v. 1.4.2
# 
# Rename some vars to make it more logic, add new spamwords.
# Add some perldoc documentation. Change minpixratio_ocr to
# 4000 as there are more and more supect pics around.
# 
# 14.04.2006 v. 1.5
#
# Important change. Ignore raw pnm files if parsing has failed
# or gocr dumped core (yes this can happen, I'll soon post
# a fix for gocr).
#
# 14.04.2006 v. 1.6
#
# Important change. Alter the whole plugin to use pipes and
# kill stalled pids after we left the 'helper_run_mode'. Added
# three count rules to count alpha nummeric chars.
#
# 14.04.2006 v. 1.6.1
#
# Sort out identical chars. Some moirees and patterns are often found in
# pictures and they show after a OCR scan repeated chars of the same
# type. Not really a sign of words. Added some examples about the ALPHA rules.
# 
# 06.06.2006 v. 1.6.2
#
# Fix typo: pngtpnm -> pngtopnm. Now png pictures finally work too.
#
# 09.06.2006 v. 1.7
#
# Add rules against multiple small pictures in HTML mails where
# OCR is almost useless.
#
# 03.09.2006 v. 1.8
#
# Add support for animated gifs. Mostly contributed by Romeo Benzoni.
# Thanks a lot ! Add ~10 new rules.
#
# Important: You need now p5-Imager and libungif support.
#
# 08.09.2006 v. 1.9
#
# Handle broken gif pictures and try to fix them if possible. I've
# fixed some of the regexes and added a lot of new rules to match
# the recent spams.
#
# 21.10.2006 v. 2.0
#
# Catch the recent image spam with combined pictures and transparent
# backgrounds, or images which have different offsets. Try to catch those
# tricks all together.
#
# 22.10.2006 v. 2.1
#
# Composed anims were not really correctly combined. Fix this issue.
#
# 26.10.2006 v. 2.2
#
# Catch recent spampics with underline colors. Reorganize the plugin a bit.
# Fix logic error introduced in v 2.1
#
# 17.11.2006 v. 3.0
#
# Add fuzzy string support, but match full and simple regex matches
# still directly. Add a maximum score to still do OCR to prevent useless
# picture scans. The wordlist is now a simple arrray at the top of the
# config.
#
# Important: You need now the perl Module String::Approx.
#
# A lot of the new features have been borrowed by the
# Fuzzy OCR Plugin (Thanks Christian !)
#
# 1.12.2006 v. 3.1
#
# Changed ocrtext_minpixels_ocr to need only 20000 pixel pictures.
# Changed priority to 100, allowing metatests which did not work
# previously.
# Added ocrtext_pwords, a list of positive words which give negative
# counts. It's almost left empty since releasing this information would
# give spammers a new opportunity.
#
# DESCRIPTION:
# ------------
# 
# Scan suspect pictures and parse them with gocr. Very big and
# small pictures are skipped. The suspicious word list has to
# be defined in the spamassassin conf.
#
# 
# NOTICE:
# -------
# 
# 'r' and 'n' are very similar, and many ocr programms often
# can't make a difference between them. So just use '[rn] instead
# of a single char.
# 
# 
# INSTALLATION:
# -------------
# 
# You'll need:
# 
# - The perl module Perl-Imager. You need an already installed
#   libungif port. Please make sure gif pictures are really enabled.
#
# - Perl module Image::ExifTool and a patch for GIF pics:
#   http://antispam.imp.ch/patches/patch-GIF-Colortable
#
# - Perl module String::Approx
#
# - Gocr from http://jocr.sourceforge.net and a patch to
#   avoid segfaults with gocr:
#   http://antispam.imp.ch/patches/patch-gocr-segfault
#
# - Netpbm from http://netpbm.sourceforge.net
# 
# - Libungif for fixgif and animated gif support.
#
# You can extract the plugin with 'patch < patch-ocrtext'
#
# Check if you have the necessary tools like giftopnm,jpegtopnm
# pngtopnm, djpeg, pnminvert all in the same path.
#
--- /dev/null	Fri Nov 17 10:46:25 2006
+++ ocrtext.cf	Mon Nov  6 11:11:10 2006
@@ -0,0 +1,108 @@
+gocr_path	/usr/local/bin/gocr
+pnmtools_path	/usr/local/bin
+ocrtext_dscore	15
+
+ocrtext_words	realtime;alert;actquick;announce;headline;charts;increase;below;rating;takeoff;resource;ready::0.1;profit;news::0.1;wallstreet;free::0;pick::0.1;breaking;explosive;strong;spotlight;watch;symbol;stock;investor;offer;international;company;money::0;million;thousand;loose;buy;price::0.1;trade;worldtrade;target::0.1;higher;banking;service;recommendation;viagra;soma::0.1;cialis::0.1;xanax;valium;meridia::0.1;zanaflex;levitra;herbal::0.1;medicine;doctor;pills;legal;penis::0;erection::0.1;supplement;medication;weightloss;growth;drugs;pharmacy;prescription;click::0.1;here::0;software;kunde;volksbank;sparkasse;master;degree;bachelor;diploma;removal;visit;browser;readmore;type::0.1;cheap;shipping;quality;sideeffects;size::0.1;focused;replica::0.1;sale::0.1;bags::0.1;development;technology;expect;long-term;quick::0.1;afford;tradeout;compensate
+
+ocrtext_pwords	information
+
+body		OCRTEXT			eval:ocrtext_check()
+priority	OCRTEXT			100
+
+#
+# Validate the GIF/PNG/JPEG pictures
+#
+body		SPAMPIC_FORGED_CT	eval:ocrtext_eval()
+describe        SPAMPIC_FORGED_CT	Forged content-type in mime header
+score           SPAMPIC_FORGED_CT	3.000
+
+body		SPAMPIC_SUSPECT		eval:ocrtext_eval()
+describe        SPAMPIC_SUSPECT		Suspect image found
+score           SPAMPIC_SUSPECT		0.900
+
+body		GIFANIM_SUSPECT		eval:ocrtext_eval()
+describe        GIFANIM_SUSPECT		Suspect animated gif found
+score           GIFANIM_SUSPECT		2.500
+
+body		SPAMPIC_UNKNOWN		eval:ocrtext_eval()
+describe        SPAMPIC_UNKNOWN		Failed to read image header
+score           SPAMPIC_UNKNOWN		2.000
+
+body		SPAMPIC_NONSTD		eval:ocrtext_eval()
+describe        SPAMPIC_NONSTD		Non standard image header
+score           SPAMPIC_NONSTD		0.200
+
+body		SPAMPIC_BROKEN		eval:ocrtext_eval()
+describe        SPAMPIC_BROKEN		Contains damaged image
+score           SPAMPIC_BROKEN		1.500
+
+body		SPAMPIC_ALPHA_1		eval:ocrtext_eval()
+describe	SPAMPIC_ALPHA_1		Image contains many alphanumeric chars
+score		SPAMPIC_ALPHA_1		0.500
+
+body		SPAMPIC_ALPHA_2		eval:ocrtext_eval()
+describe	SPAMPIC_ALPHA_2		Image contains many alphanumeric chars
+score		SPAMPIC_ALPHA_2		1.000
+
+body		SPAMPIC_ALPHA_3		eval:ocrtext_eval()
+describe	SPAMPIC_ALPHA_3		Image contains many alphanumeric chars
+score		SPAMPIC_ALPHA_3		1.500
+
+body		__SPAMPIC_COUNT_2	eval:ocrtext_eval()
+body		__SPAMPIC_COUNT_3	eval:ocrtext_eval()
+body		__SPAMPIC_COUNT_4	eval:ocrtext_eval()
+body		__SPAMPIC_COUNT_5	eval:ocrtext_eval()
+body		__SPAMPIC_COUNT_6	eval:ocrtext_eval()
+body		__SPAMPIC_COUNT_7	eval:ocrtext_eval()
+rawbody         __HAVE_CID              /src=["']?cid:/i
+
+#
+# Multiple inline pics without text are very suspicios
+#
+meta		SPAMPIC_MULTI_1		(__SPAMPIC_COUNT_2 + (HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 || HTML_IMAGE_ONLY_32) + __HAVE_CID + (IMPPYZOR_CHECK || SPAMPIC_WORDS_1 || SPAMPIC_ALPHA_1 || SPAMPIC_ALPHA_2 || SPAMPIC_ALPHA_3) == 4)
+score           SPAMPIC_MULTI_1		1.000
+
+meta		SPAMPIC_MULTI_2		(__SPAMPIC_COUNT_3 + (HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 || HTML_IMAGE_ONLY_32) + __HAVE_CID + (IMPPYZOR_CHECK || SPAMPIC_WORDS_1 || SPAMPIC_ALPHA_1 || SPAMPIC_ALPHA_2 || SPAMPIC_ALPHA_3) == 4)
+describe        SPAMPIC_MULTI_2		Contains inline pics (3)
+score           SPAMPIC_MULTI_2		2.000
+
+meta		SPAMPIC_MULTI_3		(__SPAMPIC_COUNT_4 + (HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 || HTML_IMAGE_ONLY_32) + __HAVE_CID + (IMPPYZOR_CHECK || SPAMPIC_WORDS_1 || SPAMPIC_ALPHA_1 || SPAMPIC_ALPHA_2 || SPAMPIC_ALPHA_3) == 4)
+describe        SPAMPIC_MULTI_3		Contains inline pics (4)
+score           SPAMPIC_MULTI_3		2.500
+
+meta		SPAMPIC_MULTI_4		(__SPAMPIC_COUNT_5 + (HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 || HTML_IMAGE_ONLY_32) + __HAVE_CID + (IMPPYZOR_CHECK || SPAMPIC_WORDS_1 || SPAMPIC_ALPHA_1 || SPAMPIC_ALPHA_2 || SPAMPIC_ALPHA_3) == 4)
+describe        SPAMPIC_MULTI_4		Contains inline pics (5)
+score           SPAMPIC_MULTI_4		3.000
+
+meta		SPAMPIC_MULTI_5		(__SPAMPIC_COUNT_6 + (HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 || HTML_IMAGE_ONLY_32) + __HAVE_CID + (IMPPYZOR_CHECK || SPAMPIC_WORDS_1 || SPAMPIC_ALPHA_1 || SPAMPIC_ALPHA_2 || SPAMPIC_ALPHA_3) == 4)
+describe        SPAMPIC_MULTI_5		Contains inline pics (6)
+score           SPAMPIC_MULTI_5		4.000
+
+meta		SPAMPIC_MULTI_6		(__SPAMPIC_COUNT_7 + (HTML_IMAGE_ONLY_04 || HTML_IMAGE_ONLY_08 || HTML_IMAGE_ONLY_12 || HTML_IMAGE_ONLY_16 || HTML_IMAGE_ONLY_20 || HTML_IMAGE_ONLY_24 || HTML_IMAGE_ONLY_28 || HTML_IMAGE_ONLY_32) + __HAVE_CID + (IMPPYZOR_CHECK || SPAMPIC_WORDS_1 || SPAMPIC_ALPHA_1 || SPAMPIC_ALPHA_2 || SPAMPIC_ALPHA_3) == 4)
+describe        SPAMPIC_MULTI_6		Contains inline pics (7+)
+score           SPAMPIC_MULTI_6		5.000
+
+
+
+#
+# Summarize the OCR scan results
+#
+body		SPAMPIC_WORDS_1		eval:ocrtext_eval()
+describe        SPAMPIC_WORDS_1		Contains inline spam picture (1)
+score           SPAMPIC_WORDS_1		1.500
+
+body		SPAMPIC_WORDS_2		eval:ocrtext_eval()
+describe        SPAMPIC_WORDS_2		Contains inline spam picture (2)
+score           SPAMPIC_WORDS_2		4.000
+
+body		SPAMPIC_WORDS_3		eval:ocrtext_eval()
+describe        SPAMPIC_WORDS_3		Contains inline spam picture (3)
+score           SPAMPIC_WORDS_3		6.000
+
+body		SPAMPIC_WORDS_4		eval:ocrtext_eval()
+describe        SPAMPIC_WORDS_4		Contains inline spam picture (4)
+score           SPAMPIC_WORDS_4		9.000
+
+body		SPAMPIC_WORDS_5		eval:ocrtext_eval()
+describe        SPAMPIC_WORDS_5		Contains inline spam picture (5+)
+score           SPAMPIC_WORDS_5		12.000
--- /dev/null	Sat Dec  2 09:49:19 2006
+++ ocrtext.pm	Sat Dec  2 09:47:19 2006
@@ -0,0 +1,1170 @@
+=head1 NAME
+
+Mail::SpamAssassin::Plugin::ocrtext - Check for specific keywords in gif/jpg/png attachments, using gocr.
+
+=head1 SYNOPSIS
+
+ loadplugin    Mail::SpamAssassin::Plugin::ocrtext /path/to/ocrtext.pm
+
+ # Words to scan for
+ ocrtext_words				stock,alert,etc ...
+
+ # Positive words to scan for
+ ocrtext_pwords
+
+ # Max pics to scan
+ ocrtext_maxscans                       3
+
+ # Scan timout per pic
+ ocrtext_timeout                        8
+
+ # Maximum score to still do OCR
+ ocrtext_dscore				10
+
+ # Min pixel per kb to to checks
+ ocrtext_minpixratio_suspect            10000
+
+ # Min pixel per kb to do OCR
+ ocrtext_minpixratio_ocr                2000
+
+ # Min pixels to do OCR
+ ocrtext_minpixels_ocr                  20000
+
+ # Max size of pic in kb to do OCR
+ ocrtext_maxsize_ocr                    100
+
+ # Min size of pic in kb to do OCR
+ ocrtext_minsize_ocr                    4
+
+ # Min size of pic in kb to do anything at all
+ ocrtext_minsize                        1
+
+ # Limit 1 of chars an OCR scan can have to match
+ ocrtext_alpha1				32
+
+ # Limit 2 of chars an OCR scan can have to match
+ ocrtext_alpha2				100
+
+ # Limit 3 of chars an OCR scan can have to match
+ ocrtext_alpha3				400
+
+ # Path of the gocr binary
+ gocr_path                              /usr/local/bin/gocr
+
+ # Path of the pnmtools binaries
+ pnmtools_path                          /usr/local/bin
+
+=head1 DESCRIPTION
+
+Checks for specific keywords in gif/jpg/png attachments, using gocr.
+This can be used to detect spam that puts all the real contect in an
+attached image, accompanied with random text and html (no URLs, etc).
+There are also various rules to validate attached images and to detect
+forged content types or broken images.
+
+=head1 AUTHOR
+
+Martin Blapp, mb -at- imp -dot- ch
+
+=head1 COPYRIGHT
+
+Copyright (C) 2004-2006 ImproWare AG. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY IMPROWARE INC. AND ITS CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=cut
+
+package ocrtext;
+use strict;
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Plugin;
+use String::Approx 'adistr';
+use Image::ExifTool;
+use Imager;
+
+our @ISA = qw(Mail::SpamAssassin::Plugin);
+sub dbg { Mail::SpamAssassin::dbg (@_); }
+
+our $threshold = "0.15";
+
+sub new {
+	my ($class, $mailsa, $server) = @_;
+	$class = ref($class) || $class;
+	my $self = $class->SUPER::new($mailsa);
+	bless ($self, $class);
+	$self->set_config($mailsa->{conf});
+	$self->register_eval_rule("ocrtext_check");
+	$self->register_eval_rule("ocrtext_eval");
+	return $self;
+}
+
+sub set_config {
+	my($self, $conf) = @_;
+	my @cmds = ();
+
+	push(@cmds, {
+		setting => 'ocrtext_maxscans',
+		is_admin => 1,
+		default => 3,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_timeout',
+		is_admin => 1,
+		default => 8,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_dscore',
+		is_admin => 1,
+		default => 10,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_minsize_ocr',
+		is_admin => 1,
+		default => 4,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_maxsize_ocr',
+		is_admin => 1,
+		default => 100,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_minpixels_ocr',
+		is_admin => 1,
+		default => 20000,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_minpixratio_ocr',
+		is_admin => 1,
+		default => 2000,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_minpixratio_suspect',
+		is_admin => 1,
+		default => 5000,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_minsize',
+		is_admin => 1,
+		default => 1,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_alpha1',
+		is_admin => 1,
+		default => 32,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_alpha2',
+		is_admin => 1,
+		default => 100,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_alpha3',
+		is_admin => 1,
+		default => 400,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_words',
+		is_admin => 1,
+		default => undef,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
+	});
+
+	push(@cmds, {
+		setting => 'ocrtext_pwords',
+		is_admin => 1,
+		default => undef,
+		type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
+	});
+
+	push (@cmds, {
+		setting => 'pnmtools_path',
+		is_admin => 1,
+		default => undef,
+		code => sub {
+			my ($self, $key, $value, $line) = @_;
+			if (!defined $value || !length $value) {
+				return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+			}
+			$value = Mail::SpamAssassin::Util::untaint_file_path($value);
+			if (!-d $value) {
+				dbg("config: pnmtools_path \"$value\" isn't an directory");
+				return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+			}
+			$self->{pnmtools_path} = $value;
+		}
+	});
+
+	push (@cmds, {
+		setting => 'gocr_path',
+		is_admin => 1,
+		default => undef,
+		code => sub {
+			my ($self, $key, $value, $line) = @_;
+			if (!defined $value || !length $value) {
+				return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+			}
+			$value = Mail::SpamAssassin::Util::untaint_file_path($value);
+			if (!-x $value) {
+				dbg("config: gocr_path \"$value\" isn't an executable");
+				return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+			}
+			$self->{gocr_path} = $value;
+		}
+	});
+
+	$conf->{parser}->register_commands(\@cmds);
+}
+
+sub ocrtext_eval {
+	return 0;
+}
+
+sub ocrtext_check {
+	my ($self, $pms) = @_;
+	my $partcount = 0;
+	my @ocrtext;
+	my $pnmtools_path = $self->{main}->{conf}->{pnmtools_path};
+	my $giffix = "$pnmtools_path/giffix";
+	my $pid0;
+
+	my $maxscans = $pms->{main}->{conf}->{ocrtext_maxscans};
+	my $dscore = $pms->{main}->{conf}->{ocrtext_dscore};
+	my $imagecount = 0;
+	my $imagetcount = 0;
+
+	my $cscore = $pms->get_score();
+	dbg("ocrtext: score is $cscore");
+	if ( $cscore > $dscore ) {
+		dbg("ocrtext: Skip OCR scan, message has already $cscore points of needed $dscore points.");
+		return 0;
+	}
+
+	foreach my $p ( $pms->{msg}->find_parts("image") ) {
+
+		$imagetcount++;
+
+		#
+		# Only scan images up to $maxscans images.
+		#
+		if ($imagecount >= $maxscans) {
+			next;
+		}
+
+		my ( $ctype, $boundary, $charset, $name ) =
+			Mail::SpamAssassin::Util::parse_content_type(
+			$p->get_header('content-type'));
+
+		dbg("ocrtext: findparts() found possible $ctype image");
+
+		my ($tmpfpath, $tmpf) = Mail::SpamAssassin::Util::secure_tmpfile();
+		dbg("ocrtext: created tempfile $tmpfpath");
+
+		my $picture_header = "";
+		my $gotheader = 0;
+		foreach my $out ($p->decode()) {
+			if ($gotheader < 10) {
+				$picture_header .= $out;
+				$gotheader ++;
+			}
+			print $tmpf $out;
+		}
+		close ($tmpf);
+		dbg("ocrtext: saved image as $tmpfpath");
+
+		my $filesize = (stat($tmpfpath))[7];
+		my $minsize = 1024 * $pms->{main}->{conf}->{ocrtext_minsize};
+		if ($filesize <= $minsize) {
+			dbg("ocrtext: Skip pic, size $filesize is smaller than $minsize KB");
+			unlink $tmpfpath;
+			next;
+		}
+
+		my $exifTool = new Image::ExifTool;
+		my %opts;
+
+		$exifTool->Options(Unknown => 1);
+		$exifTool->Options(Verbose => 0);
+		my $success = $exifTool->ExtractInfo($tmpfpath, %opts);
+
+		my $info = $exifTool->GetInfo('FileType', 'FileSize', 'ImageWidth', 'ImageHeight');
+		if (!$success) {
+			if (! $$info{'FileType'} && ! $$info{'ImageHeight'} && ! $$info{'ImageWidth'}) {
+				my $success = 0;
+
+				if ($ctype eq "image/png" || $ctype eq "image/jpeg") {
+					my $hitdesc = "SPAMPIC_BROKEN";
+					$pms->_handle_hit($hitdesc,
+						$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+						$pms->{conf}->{descriptions}->{$hitdesc}
+					);
+					$pms->{tests_already_hit}->{$hitdesc} = 1;
+					dbg("ocrtext: broken pic found, exifTool->ExtractInfo failed");
+				} elsif ($ctype eq "image/gif" || substr($picture_header,0,3) eq "\x47\x49\x46") {
+					if ($pnmtools_path ne "" && -x $giffix) {
+						#
+						# If we got a broken gif, try to fix it.
+						#
+						dbg("ocrtext: broken pic found, try to fix it");
+						my $tmpoutput = "";
+						my $tmpfpathfixed = $tmpfpath . ".fixed";
+
+						$pid0 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID0,
+							$tmpfpath, 1, "$giffix -q > $tmpfpathfixed");
+						my @response = <PID0>;
+						close PID0;
+						while (my $v = shift @response) {
+							$tmpoutput .= $v;
+						}
+						quit_stale_helper(*PID0, $pid0, 0);
+						unlink $tmpfpath;
+						$tmpfpath = $tmpfpathfixed;
+						# Try again ...
+						$success = $exifTool->ExtractInfo($tmpfpath, %opts);
+					}
+				}
+				if (!$success) {
+					my $hitdesc = "SPAMPIC_BROKEN";
+					$pms->_handle_hit($hitdesc,
+						$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+						$pms->{conf}->{descriptions}->{$hitdesc}
+					);
+					$pms->{tests_already_hit}->{$hitdesc} = 1;
+					dbg("ocrtext: broken pic found, exifTool->ExtractInfo failed");
+					dbg("ocrtext: could not extract picture info");
+					unlink $tmpfpath;
+					next;
+				}
+			} else {
+				my $rtype = $$info{'FileType'};
+				if ($rtype eq "GIF" || $rtype eq "PNG" || $rtype eq "JPEG") {
+					my $hitdesc = "SPAMPIC_NONSTD";
+					$pms->_handle_hit($hitdesc,
+						$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+						$pms->{conf}->{descriptions}->{$hitdesc}
+					);
+					$pms->{tests_already_hit}->{$hitdesc} = 1;
+					dbg("ocrtext: non standard $rtype ($ctype) pic found, exifTool->ExtractInfo partly failed");
+				} else {
+					dbg("ocrtext: non standard $rtype ($ctype) pic found, exifTool->ExtractInfo partly failed");
+				}
+			}
+		}
+
+		my $rtype = $$info{'FileType'};
+
+		if (($ctype eq "image/gif" && $rtype ne "GIF") || ($ctype eq "image/png" && $rtype ne "PNG") || ($ctype eq "image/jpeg" && $rtype ne "JPEG")) {
+			dbg("ocrtext: wrong content type, picture is not a $ctype picture");
+			my $hitdesc = "SPAMPIC_FORGED_CT";
+			$pms->_handle_hit($hitdesc,
+				$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+				$pms->{conf}->{descriptions}->{$hitdesc}
+			);
+			$pms->{tests_already_hit}->{$hitdesc} = 1;
+		}
+
+		if ($rtype eq "GIF" || $rtype eq "PNG" || $rtype eq "JPEG") {
+			#
+			# Deal with Kb and bytes
+			#
+			my $size = $$info{'FileSize'};
+			my $bytes;
+			if ($size =~ /kB/i) {
+				$size =~ s/[a-zA-Z]+//g;
+				$bytes = $size * 1024;
+			} elsif ($size =~ /bytes/i) {
+				$size =~ s/[a-zA-Z]+//g;
+				$bytes = $size;
+			} else {
+				$bytes = $size;
+			}
+			my $height = $$info{'ImageHeight'};
+			my $width = $$info{'ImageWidth'};
+			my $pixels = $height * $width;
+			my $pixratio = ($height * $width) / ($bytes / 1024) ;
+
+			dbg("ocrtext: found $rtype image: size=$bytes, height=$height, width=$width");
+
+			my $minpix = $pms->{main}->{conf}->{ocrtext_minpixels_ocr};
+			my $minsize = 1024 * $pms->{main}->{conf}->{ocrtext_minsize_ocr};
+			my $maxsize = 1024 * $pms->{main}->{conf}->{ocrtext_maxsize_ocr};
+			my $pixratio_ocr = $pms->{main}->{conf}->{ocrtext_minpixratio_ocr};
+			my $pixratio_suspect = $pms->{main}->{conf}->{ocrtext_minpixratio_suspect};
+
+			if ($bytes < $minsize) {
+				dbg("ocrtext: skip picture, size $bytes too small, needed $minsize");
+			} elsif ($bytes > $maxsize) {
+				dbg("ocrtext: skip picture, size $bytes too big, needed $maxsize");
+			} elsif ($pixels < $minpix ) {
+				dbg("ocrtext: skip picture, to few pixels: $pixels, needed $minpix");
+			} elsif ($pixratio < $pixratio_ocr) {
+				dbg("ocrtext: skip picture, pixel/size ratio $pixratio too small, needed $pixratio_ocr");
+			} else {
+				#
+				# First check if the pixel/size ratio is suspect. This
+				# should give a small amount of SA hits, even if no
+				# suspect words are detected.
+				#
+				if ($pixratio > $pixratio_suspect) {
+					my $hitdesc = "SPAMPIC_SUSPECT";
+					dbg("ocrtext: SUSPECT $ctype ($rtype) PICTURE FOUND");
+					$pms->_handle_hit($hitdesc,
+						$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+						$pms->{conf}->{descriptions}->{$hitdesc} . "\n$ctype ($rtype)"
+					);
+					$pms->{tests_already_hit}->{$hitdesc} = 1;
+				}
+
+				#
+				# Our picture matches the size requirements, now do text checks.
+				# Redirect any gocr errors to /dev/null
+				#
+				my $gocr_path = $self->{main}->{conf}->{gocr_path};
+				my $gocr = "$gocr_path -v 0 -e /dev/null -i -";
+				my $output = "";
+				my $exitval = 0;
+
+				my $pnminvert = "$pnmtools_path/pnminvert -quiet 2>&1";
+				my $pnmnorm = "$pnmtools_path/pnmnorm -quiet 2>&1";
+				my $pnmgamma = "$pnmtools_path/pnmgamma -quiet 2>&1";
+				my $giftopnm = "$pnmtools_path/giftopnm -quiet";
+				my $giftopnmall = "$pnmtools_path/giftopnm -image=all";
+				my $jpegtopnm = "$pnmtools_path/jpegtopnm -quiet";
+				my $pngtopnm = "$pnmtools_path/pngtopnm -quiet";
+				my $djpeg = "$pnmtools_path/djpeg";
+
+				my $giftopnmexe = "$pnmtools_path/giftopnm";
+				my $jpegtopnmexe = "$pnmtools_path/jpegtopnm";
+				my $pngtopnmexe = "$pnmtools_path/pngtopnm";
+
+				my $pid1;
+				my $pid2;
+				my $pid3;
+				my $pid4;
+				my $pid5;
+				my $pid6;
+				my $pid7;
+				my $pid8;
+				my $pid9;
+				my $pid10;
+				my $pid11;
+				my $pid12;
+
+				#
+				# Limit the scantime
+				#
+				$pms->enter_helper_run_mode();
+				my $timer = Mail::SpamAssassin::Timeout->new({ secs => $self->{main}->{conf}->{ocrtext_timeout} });
+				my $err = $timer->run_and_catch(sub {
+
+				local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
+
+				if ($rtype eq "GIF") {
+					$imagecount++;
+					if ($pnmtools_path ne "" && -x $giftopnmexe) {
+						my $tmpoutput = "";
+						#
+						# Now check how many frames we got
+						#
+						my $frame = 0;
+						my $img_num = 0;
+						my $img_delay = 0;
+						my @imgs = Imager->new;
+						my @imgs_back;
+						@imgs= Imager->read_multi(file => $tmpfpath, type=>'gif');
+						my $img_loc_count = 0;
+						my $has_img_loop = 0;
+						my $has_img_disposal = 0;
+						my $need_cons = 0;
+						my $number = $#imgs + 1;
+						if ($#imgs > 0) {
+							#
+							# We've got an animated gif
+							#
+
+							my $img_top_prev;
+							for(my $i = 0; $i <= $#imgs; $i++) {
+								my $img_delay_new = $imgs[$i]->tags(name => "gif_delay");
+								my $img_top = $imgs[$i]->tags(name => "gif_top");
+								my $img_left = $imgs[$i]->tags(name => "gif_left");
+								my $img_loop = $imgs[$i]->tags(name => "gif_loop");
+								my $img_loop_count = $imgs[$i]->tags(name => "gif_loop_count");
+								my $colors = $imgs[$i]->getcolorcount();
+								my $framecount = $i + 1;
+								my $img_disposal = $imgs[$i]->tags(name => "gif_disposal");
+								if ($img_top && $img_top != $img_top_prev && !$img_left && $img_disposal) {
+									$img_loc_count++;
+								}
+								$img_top_prev = $img_top;
+								if ($img_delay_new > $img_delay && $img_delay <= 1000) {
+									$img_num = $i;
+									$img_delay = $img_delay_new;
+								}
+
+								if (!$img_loop) {
+									$img_loop = "NA";
+								}
+								if ($img_loop eq "0" || $img_loop ne "NA" && $img_loop > 0) {
+									$has_img_loop = 1;
+								} else {
+									$img_loop = "NA";
+								}
+								if ($img_disposal) {
+									$has_img_disposal = 1;
+								}
+								dbg("ocrtext: GIF ANIM frame $i has $img_delay delay, disp=$img_disposal, top=$img_top, left=$img_left, loop=$img_loop");
+
+								if (!$need_cons) {
+									dbg("ocrtext: push frame $i into \@imgs_back");
+										push(@imgs_back, $imgs[$i]);
+								}
+								if (!$need_cons && $img_delay > 1000) {
+									$need_cons = 1;
+								}
+							}
+							#
+							# off by one between Imager and netpbm
+							#
+							$frame = $img_num;
+							$frame++;
+							dbg("ocrtext: Imager thinks spam frame may be nr. $frame");
+						} else {
+							dbg("ocrtext: Only one frame found, skip imager part.");
+							$frame = 1;
+						}
+						if ($number > 1 && $frame == 1 || $number > 1 && $number == $frame) {
+							my $dbgtext = "with $number frames";
+							dbg("ocrtext: SUSPECT GIF ANIM $dbgtext");
+							my $hitdesc = "GIFANIM_SUSPECT";
+							$pms->_handle_hit($hitdesc,
+								$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+								$pms->{conf}->{descriptions}->{$hitdesc}
+							);
+							$pms->{tests_already_hit}->{$hitdesc} = 1;
+							$need_cons = 2;
+						} elsif ($img_loc_count && $frame > 1 || (!$has_img_loop && $number > 1 && $has_img_disposal)) {
+							my $dbgtext = "with $number moving frames";
+							dbg("ocrtext: SUSPECT GIF ANIM $dbgtext");
+							my $hitdesc = "GIFANIM_SUSPECT";
+							$pms->_handle_hit($hitdesc,
+								$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+								$pms->{conf}->{descriptions}->{$hitdesc}
+							);
+							$pms->{tests_already_hit}->{$hitdesc} = 1;
+							$need_cons = 1;
+						}
+						my @gifpics;
+						push(@gifpics, $tmpfpath);
+
+						#
+						# Create a consolidated pic if neccessary. this tries to workaround various tricks
+						# spammers use :-)
+						#
+						if ($need_cons) {
+							my $mypath = $tmpfpath . ".cons";
+							my $img = Imager->new;
+
+							dbg("ocrtext: Consolidate picture");
+							if ($need_cons == 1) {
+								dbg("ocrtext: Save pics up to the offending frame");
+								my @imgs_temp = Imager->new;
+								Imager->write_multi({ file=> $mypath, makemap=>'webmap', type => 'gif'}, @imgs_back);
+
+								dbg("ocrtext: Write pic with limited frames");
+								$img->read(file=>$mypath, gif_consolidate=>1)
+									or die $img->errstr;
+							} else {
+								$img->read(file=>$tmpfpath, gif_consolidate=>1)
+									or die $img->errstr;
+							}
+
+							dbg("ocrtext: Write flat pic with consolidated frames");
+							$mypath = $tmpfpath . ".cons";
+							$img->write(file=>$mypath, type => 'gif')
+								or die $img->errstr;
+
+							push(@gifpics, $mypath);
+						}
+
+						#
+						# Now proceed with the gif(s)
+						#
+
+						my $y = 0;
+						foreach $tmpfpath (@gifpics) {
+							{
+
+								if ($y) {
+									$frame = 1;
+								}
+								dbg("ocrtext: Scan $frame of $tmpfpath");
+
+								$tmpoutput = "";
+								$exitval = 0;
+								$y++;
+
+								$pid1 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID1,
+								$tmpfpath, 1, "$giffix | $giftopnm -image=$frame 2>&1 | $gocr");
+								my @response = <PID1>;
+								close PID1;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID1, $pid1, $exitval);
+							}
+
+							if ($exitval != 0) {
+								#
+								# Only return a bad value if giftopnm or giffix have failed
+								#
+								$tmpoutput = "";
+								$exitval = 0;
+								{
+									$pid2 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID2,
+									$tmpfpath, 1, "$giffix | $giftopnm -image=$frame 2>&1");
+									my @response = <PID2>;
+									close PID2;
+									$exitval = quit_stale_helper(*PID2, $pid2, $exitval);
+								}
+								if ($exitval != 0) {
+									dbg("ocrtext: broken gif pic found");
+									my $hitdesc = "SPAMPIC_BROKEN";
+									$pms->_handle_hit($hitdesc,
+										$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+										$pms->{conf}->{descriptions}->{$hitdesc}
+									);
+									$pms->{tests_already_hit}->{$hitdesc} = 1;
+								}
+							} else {
+								$output .= $tmpoutput;
+								#
+								# Second try, work on a normalized image.
+								#
+								$tmpoutput = "";
+								$exitval = 0;
+								{
+									$pid2 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID2,
+									$tmpfpath, 1, "$giffix | $giftopnm -image=$frame 2>&1 | $pnmnorm | $gocr");
+									my @response = <PID2>;
+									close PID2;
+									while (my $v = shift @response) {
+										$tmpoutput .= $v;
+									}
+									$exitval = quit_stale_helper(*PID2, $pid2, $exitval);
+								}
+								if (! $exitval) {
+									$output .= "---new-page---";
+									$output .= $tmpoutput;
+								}
+
+								#
+								# Next step: use gamma correction
+								#
+								$tmpoutput = "";
+								$exitval = 0;
+								{
+									$pid3 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID3,
+									$tmpfpath, 1, "$giffix | $giftopnm -image=$frame 2>&1 | $pnmgamma | $gocr");
+									my @response = <PID3>;
+									close PID3;
+									while (my $v = shift @response) {
+										$tmpoutput .= $v;
+									}
+									$exitval = quit_stale_helper(*PID3, $pid3, $exitval);
+								}
+								if (! $exitval) {
+									$output .= "---new-page---";
+									$output .= $tmpoutput;
+								}
+
+
+								#
+								# Last try, invert the picture and normalize it.
+								#
+								$tmpoutput = "";
+								$exitval = 0;
+								{
+									$pid4 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID4,
+									$tmpfpath, 1, "$giffix | $giftopnm -image=$frame 2>&1 | $pnminvert | $pnmnorm | $gocr");
+									my @response = <PID4>;
+									close PID4;
+									while (my $v = shift @response) {
+										$tmpoutput .= $v;
+									}
+									$exitval = quit_stale_helper(*PID4, $pid4, $exitval);
+								}
+								if (! $exitval) {
+									$output .= "---new-page---";
+									$output .= $tmpoutput;
+								}
+							}
+						}
+					}
+				} elsif ($rtype eq "JPEG") {
+					$imagecount++;
+					if ($pnmtools_path ne "" && -x $jpegtopnmexe) {
+						my $tmpoutput = "";
+						$exitval = 0;
+						#
+						# First try, just scan the normalized pic
+						#
+						{
+							my $pid5 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID5,
+							$tmpfpath, 1, "$jpegtopnm 2>&1 | $gocr");
+							my @response = <PID5>;
+							close PID5;
+							while (my $v = shift @response) {
+								$tmpoutput .= $v;
+							}
+							$exitval = quit_stale_helper(*PID5, $pid5, $exitval);
+						}
+						if ($exitval != 0) {
+							#
+							# Only return a bad value if jpegtopnm failed
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								$pid6 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID6,
+								$tmpfpath, 1, "$jpegtopnm 2>&1");
+								my @response = <PID6>;
+								close PID6;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID6, $pid6, $exitval);
+							}
+							if ($exitval != 0 && $tmpoutput !~ /End-of-file/) {
+								dbg("ocrtext: broken jpeg pic found");
+								my $hitdesc = "SPAMPIC_BROKEN";
+								$pms->_handle_hit($hitdesc,
+									$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+									$pms->{conf}->{descriptions}->{$hitdesc}
+								);
+								$pms->{tests_already_hit}->{$hitdesc} = 1;
+							}
+						} else {
+							$output .= $tmpoutput;
+							#
+							# Second try, normalize the pic
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								my $pid6 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID6,
+								$tmpfpath, 1, "$jpegtopnm 2>&1 | $pnmnorm | $gocr");
+								my @response = <PID6>;
+								close PID6;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID6, $pid6, $exitval);
+							}
+							if (! $exitval) {
+								$output .= "---new-page---";
+								$output .= $tmpoutput;
+							}
+
+							#
+							# Third try, limit the colors, disable dither
+							# and use a grayscale pic only.
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								my $pid7 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID7,
+								$tmpfpath, 1, "$djpeg -gray -colors 8 -dither none -pnm 2>&1 | $gocr");
+								my @response = <PID7>;
+								close PID7;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID7, $pid7, $exitval);
+							}
+							if (! $exitval) {
+								$output .= "---new-page---";
+								$output .= $tmpoutput;
+							}
+
+							#
+							# Forth try, limit the colors to 8 and invert the pic.
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								my $pid8 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID8,
+								$tmpfpath, 1, "$djpeg -colors 8 -pnm 2>&1 | $pnminvert | $gocr");
+								my @response = <PID8>;
+								close PID8;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID8, $pid8, $exitval);
+							}
+							if (! $exitval) {
+								$output .= "---new-page---";
+								$output .= $tmpoutput;
+							}
+
+							#
+							# Last try, invert the picture and normalize it.
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								my $pid9 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID9,
+								$tmpfpath, 1, "$jpegtopnm 2>&1 | $pnminvert | $pnmnorm | $gocr");
+								my @response = <PID9>;
+								close PID9;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID9, $pid9, $exitval);
+							}
+							if (! $exitval) {
+								$output .= "---new-page---";
+								$output .= $tmpoutput;
+							}
+						}
+					}
+				} elsif ($rtype eq "PNG") {
+					$imagecount++;
+					if ($pnmtools_path ne "" && -x $pngtopnmexe) {
+						my $tmpoutput = "";
+						{
+							my $pid10 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID10,
+							$tmpfpath, 1, "$pngtopnm 2>&1 | $gocr");
+							my @response = <PID10>;
+							close PID10;
+							while (my $v = shift @response) {
+								$tmpoutput .= $v;
+							}
+							$exitval = quit_stale_helper(*PID10, $pid10, $exitval);
+						}
+						if ($exitval != 0) {
+							#
+							# Only return a bad value if pngtopnm failed
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								$pid11 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID11,
+								$tmpfpath, 1, "$pngtopnm 2>&1");
+								my @response = <PID11>;
+								close PID11;
+								$exitval = quit_stale_helper(*PID11, $pid11, $exitval);
+							}
+							if ($exitval != 0) {
+								dbg("ocrtext: broken png pic found");
+								my $hitdesc = "SPAMPIC_BROKEN";
+								$pms->_handle_hit($hitdesc,
+									$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+									$pms->{conf}->{descriptions}->{$hitdesc}
+								);
+								$pms->{tests_already_hit}->{$hitdesc} = 1;
+							}
+						} else {
+							$output .= $tmpoutput;
+							#
+							# Second try, work on a normalized image.
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								my $pid11 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID11,
+								$tmpfpath, 1, "$giffix | $pngtopnm 2>&1 | $pnmnorm | $gocr");
+								my @response = <PID11>;
+								close PID11;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID11, $pid11, $exitval);
+							}
+							if (! $exitval) {
+								$output .= "---new-page---";
+								$output .= $tmpoutput;
+							}
+
+							#
+							# Last try, invert the picture and normalize it.
+							#
+							$tmpoutput = "";
+							$exitval = 0;
+							{
+								my $pid12 = Mail::SpamAssassin::Util::helper_app_pipe_open(*PID12,
+								$tmpfpath, 1, "$giffix | $giftopnm 2>&1 | pnminvert | $pnmnorm | $gocr");
+								my @response = <PID12>;
+								close PID12;
+								while (my $v = shift @response) {
+									$tmpoutput .= $v;
+								}
+								$exitval = quit_stale_helper(*PID12, $pid12, $exitval);
+							}
+							if (! $exitval) {
+								$output .= "---new-page---";
+								$output .= $tmpoutput;
+							}
+						}
+					}
+				}
+
+				});
+				$pms->leave_helper_run_mode();
+
+				#
+				# Kill and close any open helpers.
+				#
+				quit_stale_helper(*PID0, $pid0, 0);
+				if ($rtype eq "GIF") {
+					quit_stale_helper(*PID1, $pid1, 0);
+					quit_stale_helper(*PID2, $pid2, 0);
+					quit_stale_helper(*PID3, $pid3, 0);
+					quit_stale_helper(*PID4, $pid4, 0);
+				} elsif ($rtype eq "JPEG") {
+					quit_stale_helper(*PID5, $pid5, 0);
+					quit_stale_helper(*PID6, $pid6, 0);
+					quit_stale_helper(*PID7, $pid7, 0);
+					quit_stale_helper(*PID8, $pid8, 0);
+					quit_stale_helper(*PID9, $pid9, 0);
+				} elsif ($rtype eq "PNG") {
+					quit_stale_helper(*PID10, $pid10, 0);
+					quit_stale_helper(*PID11, $pid11, 0);
+					quit_stale_helper(*PID12, $pid12, 0);
+				}
+
+				my @words = split(/;/,$pms->{main}->{conf}->{ocrtext_words});
+				my @pwords = split(/;/,$pms->{main}->{conf}->{ocrtext_pwords});
+
+				my $cnt = 0;
+				my $tmpoutput = $output;
+				$tmpoutput =~ s/---new-page---//g;
+				$tmpoutput =~ tr/!;|081/iiioal/;
+				$tmpoutput =~ s/[^a-zA-Z0-9\:]//g;
+				$tmpoutput = lc $tmpoutput;
+				#
+				# Negative words, each of them gives +1 to cnt
+				#
+				foreach my $w (@words) {
+					my $wthreshold;
+					if ($w =~ /^(.*?)::(0(\.\d+){0,1})/) {
+						($w, $wthreshold) = ($1, $2);
+					} else {
+						$wthreshold = $threshold;
+					}
+					$w =~ s/[^a-zA-Z0-9]//g;
+					$w = lc $w;
+
+					my $rw = $w;
+					$rw =~ s/[\:il1]/\[il1\:\]/;
+					$rw =~ s/[a8\@]/\[a8\@\]/;
+					$rw =~ s/[o0]/\[o0\]/;
+					$rw =~ s/[rn]/\[rn\]/;
+					$rw =~ s/[mw]/\[mw\]/;
+					$rw =~ s/[s5]/\[s5\]/;
+					if ($tmpoutput =~ /($rw)/) {
+						dbg("ocrtext: found word \"$w\" as regex match in string \"$1\"");
+						$cnt++;
+					} else {
+						if ($tmpoutput && $tmpoutput ne "") {
+							$_ = lc;
+							my $matched = adistr( $w, $tmpoutput);
+							if ($matched && $matched ne "" && abs($matched) < $wthreshold ) {
+								$cnt++;
+								dbg("ocrtext: found word \"$w\" with fuzz of " . abs($matched));
+							}
+						}
+					}
+				}
+				#
+				# Positive words, each of them gives +1 to cnt
+				#
+				foreach my $w (@pwords) {
+					my $wthreshold;
+					if ($w =~ /^(.*?)::(0(\.\d+){0,1})/) {
+						($w, $wthreshold) = ($1, $2);
+					} else {
+						$wthreshold = $threshold;
+					}
+					$w =~ s/[^a-zA-Z0-9]//g;
+					$w = lc $w;
+
+					my $rw = $w;
+					$rw =~ s/[\:il1]/\[il1\:\]/;
+					$rw =~ s/[a8\@]/\[a8\@\]/;
+					$rw =~ s/[o0]/\[o0\]/;
+					$rw =~ s/[rn]/\[rn\]/;
+					$rw =~ s/[mw]/\[mw\]/;
+					$rw =~ s/[s5]/\[s5\]/;
+					if ($tmpoutput =~ /($rw)/) {
+						dbg("ocrtext: found word \"$w\" as regex match in string \"$1\"");
+						$cnt--;
+					} else {
+						if ($tmpoutput && $tmpoutput ne "") {
+							$_ = lc;
+							my $matched = adistr( $w, $tmpoutput);
+							if ($matched && $matched ne "" && abs($matched) < $wthreshold ) {
+								$cnt--;
+								dbg("ocrtext: found word \"$w\" with fuzz of " . abs($matched));
+							}
+						}
+					}
+				}
+				if ($cnt >= 1) {
+					dbg("ocrtext: found $cnt words in picture");
+					my $hitdesc;
+					if ($cnt == 1) {
+						$hitdesc = "SPAMPIC_WORDS_1";
+					} elsif ($cnt == 2) {
+						$hitdesc = "SPAMPIC_WORDS_2";
+					} elsif ($cnt == 3) {
+						$hitdesc = "SPAMPIC_WORDS_3";
+					} elsif ($cnt == 4) {
+						$hitdesc = "SPAMPIC_WORDS_4";
+					} elsif ($cnt > 4) {
+						$hitdesc = "SPAMPIC_WORDS_5";
+					}
+					$pms->_handle_hit($hitdesc,
+						$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+						$pms->{conf}->{descriptions}->{$hitdesc}
+					);
+					$pms->{tests_already_hit}->{$hitdesc} = 1;
+				}
+
+				my $cnt_pages = 1;
+				while ($output =~ /---new-page---/g) { $cnt_pages++ }
+				if ($output ne "" && $output !~ /^P6/) {
+					#
+					# Remove spaces and points.
+					#
+					my $tmpoutput = $output;
+					$output =~ s/[\s\t ]+//g;
+					$output =~ s/\.//g;
+					dbg("ocrtext: : found output $output");
+					push @ocrtext, $output;
+
+					#
+					# Try to sort out patterns and equal chars. Numbers
+					# are currently not counted at all. S and s are skipped.
+					#
+					$tmpoutput =~ s/---new-page---//;
+					$tmpoutput =~ s/[^a-z0-9]//;
+					my @chars = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','t','u','v','w','x','y','z');
+					foreach my $char (@chars) {
+						$tmpoutput =~ s/$char{2,}/$char/ig;
+					}
+					my $cnt_alpha = $tmpoutput =~ tr/a-zA-Z//;
+					my $cnt_digit = $tmpoutput =~ tr/0-9//;
+					if ($cnt_alpha) { $cnt_alpha = $cnt_alpha / $cnt_pages };
+					if ($cnt_digit) { $cnt_digit = $cnt_digit / $cnt_pages };
+
+					dbg("ocrtext: found $cnt_alpha chars and $cnt_digit digits from ocr output");
+
+					my $ocrtext_alpha1 = $pms->{main}->{conf}->{ocrtext_alpha1};
+					my $ocrtext_alpha2 = $pms->{main}->{conf}->{ocrtext_alpha2};
+					my $ocrtext_alpha3 = $pms->{main}->{conf}->{ocrtext_alpha3};
+
+					if ($cnt_alpha > $ocrtext_alpha1) {
+						my $hitdesc;
+						if ($cnt_alpha < $ocrtext_alpha2) {
+							$hitdesc = "SPAMPIC_ALPHA_1";
+						} elsif ($cnt_alpha >= $ocrtext_alpha2 && $cnt_alpha < $ocrtext_alpha3) {
+							$hitdesc = "SPAMPIC_ALPHA_2";
+						} elsif ($cnt_alpha >= $ocrtext_alpha3) {
+							$hitdesc = "SPAMPIC_ALPHA_3";
+						}
+						$pms->_handle_hit($hitdesc,
+							$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+							$pms->{conf}->{descriptions}->{$hitdesc}
+						);
+						$pms->{tests_already_hit}->{$hitdesc} = 1;
+					}
+				}
+			}
+		}
+		unlink $tmpfpath;
+		unlink $tmpfpath . ".cons";
+	}
+	dbg("ocrtext: Imagecount is $imagetcount");
+	push @ocrtext, "OCRTEXT: Imagecount is $imagetcount";
+	if ($imagetcount > 1) {
+		my $hitdesc;
+		if ($imagetcount == 2) {
+			$hitdesc = "__SPAMPIC_COUNT_2";
+		} elsif ($imagetcount == 3) {
+			$hitdesc = "__SPAMPIC_COUNT_3";
+		} elsif ($imagetcount == 4) {
+			$hitdesc = "__SPAMPIC_COUNT_4";
+		} elsif ($imagetcount == 5) {
+			$hitdesc = "__SPAMPIC_COUNT_5";
+		} elsif ($imagetcount == 6) {
+			$hitdesc = "__SPAMPIC_COUNT_6";
+		} elsif ($imagetcount >= 7) {
+			$hitdesc = "__SPAMPIC_COUNT_7";
+		}
+		$pms->_handle_hit($hitdesc,
+			$pms->{conf}->{scores}->{$hitdesc}, "BODY: ",
+			$pms->{conf}->{descriptions}->{$hitdesc}
+		);
+		$pms->{tests_already_hit}->{$hitdesc} = 1;
+	}
+	
+	return 0;
+}
+
+sub quit_stale_helper {
+        my ($PID, $pid, $exitval) = @_;
+
+        if (defined(fileno(*PID))) {
+                if ($pid) {
+                        kill('KILL',$pid);
+                        dbg("ocrtext: killed stale helper [$pid]");
+                }
+                close PID;
+		$exitval = $?;
+		dbg("ocrtext: [$pid] returned $exitval");
+        }
+	return $exitval;
+}
+
+1;
