#!/bin/bash
#
# dvdtguess - try to guess the title of a DVD by it's MPEG-Filename
#
# Version   0.1 written by Wolfgang Wershofen (mailto: itconsult at wershofen.de)
#

# ------------------------------
# Function specification
#
usage()
{
 cat <<EOF
Usage: `basename $0` filename
Guessed Titlestring is returned via /dev/stdout

EOF
exit 1
}

tfile="$@"
[ "$tfile" == "-h" ] && usage
tstring=""

#
# Title-Guessing:
#----------------
# This is a terrible hack and the results may be poor. If you don't like it, either supply
# a title with the -t or -T option or rename your files to give better results. Sorry
#
# Most of the time, we're working on dBox-streams from udrec
# The name of these files starts with the tv-station (in uppercase) and ends
# with date and time. In addition whitespace and special characters have been replaced with underscores
# First turn underscores into spaces, then ignore everything before the first word with lowercase
# or numerics with more than one digit.
# At the end, remove all words, which are pure numeric with 8 resp. 6 digits
#
fext=."${tfile##*.}"
fBase=$(basename "$tfile" "$fext")
spacename=`echo "$fBase" | tr '_' ' ' | tr --squeeze-repeats ' '`
words=`echo "$spacename" | wc -w`
for i in `seq 1 $words`; do
	actword=`echo "$spacename" | cut -d' ' -f$i`
	if [ -z "$tstring" ]; then									# if we're at the beginning
   	    if [ "$(echo $actword | tr --squeeze-repeats [A-Z] ' ')" != " " ]; then		# if the word is not complete upper-case
        	if [ "$(echo $actword | tr --squeeze-repeats [0-9] ' ')" != " " -o ${#actword} -gt 1 ]; then	# and not one numeric digit (e.g. PREMIERE 1, SAT 1, PRO 7)
				tstring=$actword								# then begin writing the title
			fi
		fi
 	elif [ "$(echo $actword | tr --squeeze-repeats [0-9] ' ')" != " " ] || [ ${#actword} -lt 6 ]; then		# append words to title
		tstring="$tstring $actword"														# if they are not numeric or have less the 6 digits
  	fi
done

echo "$tstring"
exit 0
