#! /bin/sh

# Feed one article at a time from news.admin.net-abuse.sightings to the DCC
#   This script works with the BSD Bourne shell, GNU grep, and other tools.
#   It might not work on other systems without changes.
#
#   A newsfeeds line like the following works:
#    dcc!:!*,news.admin.net-abuse.sightings,@alt.*\
#	:Tp:/var/lib/dcc/libexec/na-spam %s
#   provided you use a cron job to empty the log directory
#   It is also often necessary to get dccproc into $PATH for such programs
#   with something like a symbolic link of /usr/bin/dccproc to
#   /usr/var/news/bin.

# Copyright (c) 2003 by Rhyolite Software
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE
# BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
# OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
#	Rhyolite Software DCC 1.2.16-1.13 $Revision$


exec </dev/null

DCC_HOMEDIR=/var/lib/dcc
DEBUG=
VERBOSE=
CWD=`pwd`
# check the args once to get the home directory
while getopts "xnvh:l:o:s:" c; do
    case $c in
	x) DEBUG=yes; set -x;;
	h) DCC_HOMEDIR="$OPTARG";;
	*) ;;
    esac
done
. $DCC_HOMEDIR/dcc_conf

SDIR=/var/spool/news
DCCARGS="-C -h $DCC_HOMEDIR -E"
DCCCNT="-t many"
LDIR=`echo $DCCM_LOGDIR | sed -e "s@^[^/]@$DCC_HOMEDIR/&@"`

USAGE="`basename $0`: [-xnv] [-h homedir] [-l logdir] [-o dccargs] [-s spooldir] article1 articl2 ..."
OPTIND=1
while getopts "xnvh:l:o:s:" c; do
    case $c in
	x) ;;
	n) DCCCNT=-Q;;
	v) VERBOSE=yes;;
	h) ;;
	l) LDIR=`echo "$OPTARG" | sed -e "s@^[^/]@$CWD/&@"`;;
	o) DCCARGS="$DCCARGS $OPTARG";;
	s) SDIR=`echo "$OPTARG" | sed -e "s@^[^/]@$CWD/&@"`;;
	*) echo 1>&2 "$USAGE"; exit 1;;
    esac
done
shift `expr $OPTIND - 1 || true`
if test "$#" -eq 0; then
    exit 0
fi

cd $DCC_HOMEDIR
if test ! -d $LDIR; then
    echo 1>&2 "$LDIR does not exist"
    exit 1
fi

for NM in $*; do
    # compute path to the article
    FNM=`echo "$NM" | sed -e "s@^[^/]@$SDIR/&@"`
    # basic log file
    BNM=$LDIR/msg.`basename $NM`
    # current spam file name starts as the article
    CNM=$FNM
    # next temporary file
    TNM=$BNM.tmp

    if test ! -s $CNM; then
	date >$BNM "+%m/%d %H:%M:%S $FNM is not a valid article file"
	continue
    fi

    if test "`sed -e '/^$/,$d' $CNM | grep -i '^Subject: *\[Email]'`" = ""; then
	if test "$DCCCNT" = "-Q"; then
	    date "+%m/%d %H:%M:%S $FNM is not an email sighting"
	fi
	continue
    fi

    NLINES=`wc -l <$CNM`
    if test "$NLINES" -lt 8; then
	date >$BNM "+%m/%d %H:%M:%S $FNM had only $NLINES lines"
	continue
    fi

    # we can handle only some multipart MIME sightings
    sed -e '/^$/,$d' $CNM >${TNM}1
    MIMEBOUND=
    if egrep -i '^Content-Type: +multipart' ${TNM}1 >/dev/null; then
	MIMEBOUND="`sed -n -e 's@.*boundary="\([^"/]*\)".*@\1@p' ${TNM}1`"
	if test -z "$MIMEBOUND"; then
	    date >$BNM "+%m/%d %H:%M:%S $FNM cannot handle this multipart MIME"
	    if test "$DEBUG" != yes; then
		rm -f ${TNM}*
	    fi
	    continue
	fi
    fi

    # trim netnews header and NANAS trailer and some common trailing junk
    sed -e '1,/^$/d' -e `expr $NLINES - 8`',$d'				\
	-e '/^-\{1,\}BEGIN PGP SIGNATURE-\{1,\}/,$d'			\
	-e '/^ \{0,1\}-[- ]* [Ee]nd of [Ff]orwarded [Mm]essage/,$d'	\
	-e '/^### end of message body ########################/,$d'	\
	    $CNM >${TNM}1
    CNM=${TNM}1

    # Discard articles that don't contain a block starting 
    #	with common mail headers by trimming junk before known headers.
    #
    #	First look for common quote leaders.
    QL="`sed -n -e '/^[>{}|] \{0,1\}[Rr]eceived: /{'			\
		-e 's/\(^[>{}|] \{0,1\}\).*/\1/'			\
		-e 's/. /&\\\{0,1\\\}/'					\
		-e p -e q -e } $CNM`"
    if test -z "$QL"; then
	# allow a blank quote leader
	QL="`sed -n -e '/^ \{1,4\}[Rr]eceived: /{'			\
		    -e 's/\(^ \{1,4\}\).*/\1/'				\
		    -e p -e q -e } $CNM`"
	if test -z "$QL"; then
	    PAT="-e :hit -e p"
	else
	    PAT="-e :hit -e 's/^$QL//p'" 
	fi
    else
	# restore broken lines while removing non-blank quote leader
	PAT="-e :hit -e 's/^$QL//' -e N					\
	    -e '/\n[	 ]/bquit' -e '/\n\n/bquit'			\
	    -e '/\n$QL/{' -e P -e D -e bhit -e } -e 's/\n//' -e bhit	\
	    -e :quit -e P -e q"
    fi
    GPAT=': /,$bhit'
    eval sed -n -e '"/^${QL}[Rr]eturn-[Pp]ath${GPAT}"'			\
	    -e '"/^${QL}[Rr]eceived${GPAT}"'				\
	    -e '"/^${QL}[Mm]essage-[Ii][Dd]${GPAT}"'			\
	    -e '"/^${QL}[Ss]ender${GPAT}"'				\
	    -e '"/^${QL}[Rr]eply-[Tt]o${GPAT}"'				\
	    -e '"/^${QL}From${GPAT}"'					\
	    -e '"/^${QL}To${GPAT}"'					\
	    -e '"/^${QL}Subject${GPAT}"'				\
	    -e d $PAT							\
		$CNM >${TNM}2
    # Ensure that the supposed headers contain at least a Received line
    if test -z "`sed -n -e '/^$/,$d' -e '/^Received: /p' ${TNM}2`"; then
	date >$BNM "+%m/%d %H:%M:%S $FNM lacks recognizable SMTP headers"
	if test "$DEBUG" != yes; then
	    rm -f ${TNM}*
	fi
	continue
    fi
    CNM=${TNM}2

    # Ensure there is a body.
    BLEN=`sed -n -e '1,/^$/d' -e '/./,$p' $CNM | tr -d '=\n\r\t ' | wc -c`
    if test $BLEN  -lt 3; then
	date >$BNM "+%m/%d %H:%M:%S $FNM lacks SMTP headers"
	if test "$DEBUG" != yes; then
	    rm -f ${TNM}*
	fi
	continue
    fi

    # Ignore truncated DCC log files
    if grep '^### log truncated ###' ${CNM} >/dev/null; then
	date >$BNM "+%m/%d %H:%M:%S $FNM was truncated"
	if test "$DEBUG" != yes; then
	    rm -f ${TNM}*
	fi
	continue
    fi

    # discard MIME junk after the message
    if test ! -z "$MIMEBOUND"; then
	sed -e "/$MIMEBOUND/"',$d' ${CNM} >${TNM}3
	CNM=${TNM}3
    else
	sed -e '/##^-END-RAW-SPAM/,${'					\
		-e '/..*##^-END-RAW-SPAM/s/##^-END-RAW-SPAM//p'		\
		-e d -e } ${CNM} >${TNM}3
	CNM=${TNM}3
    fi

    ln -f $CNM $BNM
    if test "$VERBOSE" = yes; then
	date "+%m/%d %H:%M:%S report $FNM"
	dccproc $DCCARGS -QC -i $BNM -o $BNM.query
    fi
    if test "$DCCCNT" != "-Q"; then
	dccproc $DCCARGS $DCCCNT -l $LDIR -i $BNM -o/dev/null
    fi
    if test "$DEBUG" != yes; then
	rm -f ${TNM}*
    fi
    if test "$VERBOSE" != yes; then
	rm $BNM
    fi
done
